diff --git a/solr/core/src/java/org/apache/solr/update/processor/ContentHashVersionProcessor.java b/solr/core/src/java/org/apache/solr/update/processor/ContentHashVersionProcessor.java new file mode 100644 index 00000000000..f728e0905d4 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/update/processor/ContentHashVersionProcessor.java @@ -0,0 +1,237 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.update.processor; + +import java.io.IOException; +import java.lang.invoke.MethodHandles; +import java.util.Base64; +import java.util.Collection; +import java.util.List; +import java.util.Objects; +import java.util.Set; +import java.util.function.Predicate; +import org.apache.lucene.util.BytesRef; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.core.SolrCore; +import org.apache.solr.handler.component.RealTimeGetComponent; +import org.apache.solr.handler.component.RealTimeGetComponent.Resolution; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.schema.TextField; +import org.apache.solr.update.AddUpdateCommand; +import org.apache.solr.update.UpdateCommand; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * An implementation of {@link UpdateRequestProcessor} which computes a hash of selected doc values, + * and uses this hash value to reject/accept doc updates. + * + * + * + * Depending on {#discardSameDocuments} value, this processor may reject or accept doc update. This + * implementation can be used for monitoring or rejecting no-op updates (updates that do not change + * Solr document). + * + *

Note: hash is computed using {@link Lookup3Signature}. + * + * @see Lookup3Signature + */ +public class ContentHashVersionProcessor extends UpdateRequestProcessor { + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + private final SchemaField hashField; + private final SolrQueryResponse rsp; + private final SolrCore core; + private final Predicate includedFields; // Matcher for included fields in hash + private final Predicate excludedFields; // Matcher for excluded fields from hash + private OldDocProvider oldDocProvider = new DefaultDocProvider(); + private boolean discardSameDocuments; + private int sameCount = 0; + private int differentCount = 0; + private int unknownCount = 0; + + public ContentHashVersionProcessor( + Predicate hashIncludedFields, + Predicate hashExcludedFields, + String hashFieldName, + SolrQueryRequest req, + SolrQueryResponse rsp, + UpdateRequestProcessor next) { + super(next); + this.core = req.getCore(); + this.hashField = new SchemaField(hashFieldName, new TextField()); + this.rsp = rsp; + this.includedFields = hashIncludedFields; + this.excludedFields = hashExcludedFields; + } + + @Override + public void processAdd(AddUpdateCommand cmd) throws IOException { + SolrInputDocument newDoc = cmd.getSolrInputDocument(); + String newHash = computeDocHash(newDoc); + newDoc.setField(hashField.getName(), newHash); + int i = 0; + + if (!validateHash(cmd.getIndexedId(), newHash)) { + return; + } + + while (true) { + logOverlyFailedRetries(i, cmd); + try { + super.processAdd(cmd); + return; + } catch (SolrException e) { + if (e.code() != 409) { + throw e; + } + ++i; + } + } + } + + @Override + public void finish() throws IOException { + try { + super.finish(); + } finally { + rsp.addToLog("numAddsExisting", sameCount + differentCount + unknownCount); + rsp.addToLog("numAddsExistingWithIdentical", sameCount); + rsp.addToLog("numAddsExistingUnknown", unknownCount); + } + } + + private static void logOverlyFailedRetries(int i, UpdateCommand cmd) { + if ((i & 255) == 255) { + log.warn("Unusual number of optimistic concurrency retries: retries={} cmd={}", i, cmd); + } + } + + void setOldDocProvider(OldDocProvider oldDocProvider) { + this.oldDocProvider = oldDocProvider; + } + + void setDiscardSameDocuments(boolean discardSameDocuments) { + this.discardSameDocuments = discardSameDocuments; + } + + private boolean validateHash(BytesRef indexedDocId, String newHash) throws IOException { + assert null != indexedDocId; + + var docFoundAndOldUserVersions = getOldUserVersionsFromStored(indexedDocId); + if (docFoundAndOldUserVersions.found) { + String oldHash = + docFoundAndOldUserVersions.oldHash; // No hash: might want to keep track of these too + if (oldHash == null) { + unknownCount++; + return true; + } else if (Objects.equals(newHash, oldHash)) { + sameCount++; + return !discardSameDocuments; + } else { + differentCount++; + return true; + } + } + return true; // Doc not found + } + + private DocFoundAndOldUserAndSolrVersions getOldUserVersionsFromStored(BytesRef indexedDocId) + throws IOException { + SolrInputDocument oldDoc = oldDocProvider.getDocument(core, hashField.getName(), indexedDocId); + return null == oldDoc + ? DocFoundAndOldUserAndSolrVersions.NOT_FOUND + : getUserVersionAndSolrVersionFromDocument(oldDoc); + } + + private DocFoundAndOldUserAndSolrVersions getUserVersionAndSolrVersionFromDocument( + SolrInputDocument oldDoc) { + Object o = oldDoc.getFieldValue(hashField.getName()); + if (o != null) { + return new DocFoundAndOldUserAndSolrVersions(o.toString()); + } + return new DocFoundAndOldUserAndSolrVersions(); + } + + public String computeDocHash(SolrInputDocument doc) { + List docIncludedFieldNames = + doc.getFieldNames().stream() + .filter(includedFields) // Keep fields that match 'included fields' matcher... + .filter( + excludedFields + .negate()) // ...and exclude fields that match 'excluded fields' matcher + .sorted() // Sort to ensure consistent field order across different doc field orders + .toList(); + + final Signature sig = new Lookup3Signature(); + for (String fieldName : docIncludedFieldNames) { + sig.add(fieldName); + Object o = doc.getFieldValue(fieldName); + if (o instanceof Collection) { + for (Object oo : (Collection) o) { + sig.add(String.valueOf(oo)); + } + } else { + sig.add(String.valueOf(o)); + } + } + + // Signature, depending on implementation, may return 8-byte or 16-byte value + byte[] signature = sig.getSignature(); + return Base64.getEncoder() + .encodeToString(signature); // Makes a base64 hash out of signature value + } + + interface OldDocProvider { + SolrInputDocument getDocument(SolrCore core, String hashField, BytesRef indexedDocId) + throws IOException; + } + + private static class DefaultDocProvider implements OldDocProvider { + @Override + public SolrInputDocument getDocument(SolrCore core, String hashField, BytesRef indexedDocId) + throws IOException { + return RealTimeGetComponent.getInputDocument( + core, indexedDocId, indexedDocId, null, Set.of(hashField), Resolution.PARTIAL); + } + } + + private static class DocFoundAndOldUserAndSolrVersions { + private static final DocFoundAndOldUserAndSolrVersions NOT_FOUND = + new DocFoundAndOldUserAndSolrVersions(); + private final boolean found; + + public String oldHash; + + private DocFoundAndOldUserAndSolrVersions() { + this.found = false; + } + + private DocFoundAndOldUserAndSolrVersions(String oldHash) { + this.found = true; + this.oldHash = oldHash; + } + } +} diff --git a/solr/core/src/java/org/apache/solr/update/processor/ContentHashVersionProcessorFactory.java b/solr/core/src/java/org/apache/solr/update/processor/ContentHashVersionProcessorFactory.java new file mode 100644 index 00000000000..48fa52db928 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/update/processor/ContentHashVersionProcessorFactory.java @@ -0,0 +1,175 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.update.processor; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Locale; +import java.util.function.Predicate; +import java.util.stream.Collectors; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.common.util.StrUtils; +import org.apache.solr.core.SolrCore; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.util.plugin.SolrCoreAware; + +/** Factory for {@link ContentHashVersionProcessor} instances. */ +public class ContentHashVersionProcessorFactory extends UpdateRequestProcessorFactory + implements SolrCoreAware, UpdateRequestProcessorFactory.RunAlways { + private static final char SEPARATOR = ','; // Separator for included/excluded fields + static final String CONTENT_HASH_ENABLED_PARAM = "contentHashEnabled"; + private List includeFields = + Collections.singletonList("*"); // Included fields defaults to 'all' + private List excludeFields = new ArrayList<>(); + // No excluded field by default, yet hashFieldName is excluded by default + private String hashFieldName = + "content_hash"; // Field name to store computed hash on create/update operations + private boolean discardSameDocuments = true; + + public ContentHashVersionProcessorFactory() {} + + public void init(NamedList args) { + Object tmp = args.remove("includeFields"); + if (tmp != null) { + if (!(tmp instanceof String)) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, "'includeFields' must be configured as a "); + } + // Include fields support comma separated list of fields (e.g. "field1,field2,field3"). + // Also supports "*" to include all fields + this.includeFields = + StrUtils.splitSmart((String) tmp, SEPARATOR).stream() + .map(String::trim) + .collect(Collectors.toList()); + } + tmp = args.remove("hashFieldName"); + if (tmp != null) { + if (!(tmp instanceof String)) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, "'hashFieldName' must be configured as a "); + } + this.hashFieldName = (String) tmp; + } + + tmp = args.remove("excludeFields"); + if (tmp != null) { + if (!(tmp instanceof String)) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, "'excludeFields' must be configured as a "); + } + if ("*".equals(((String) tmp).trim())) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, "'excludeFields' can't exclude all fields."); + } + // Exclude fields support comma separated list of fields (e.g. + // "excluded_field1,excluded_field2"). + // Also supports "*" to exclude all fields + this.excludeFields = + StrUtils.splitSmart((String) tmp, SEPARATOR).stream() + .map(String::trim) + .collect(Collectors.toList()); + } + excludeFields.add(hashFieldName); // Hash field name is excluded from hash computation + + tmp = args.remove("hashCompareStrategy"); + if (tmp != null) { + if (!(tmp instanceof String)) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, + "'hashCompareStrategy' must be configured as a "); + } + String value = ((String) tmp).toLowerCase(Locale.ROOT); + if ("discard".equalsIgnoreCase(value)) { + discardSameDocuments = true; + } else if ("log".equalsIgnoreCase(value)) { + discardSameDocuments = false; + } else { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, + "Value '" + + value + + "' is unsupported for 'hashCompareStrategy', only 'discard' and 'log' are supported."); + } + } + + super.init(args); + } + + public UpdateRequestProcessor getInstance( + SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next) { + if (!req.getParams().getBool(CONTENT_HASH_ENABLED_PARAM, false)) { + return next; + } + + ContentHashVersionProcessor processor = + new ContentHashVersionProcessor( + buildFieldMatcher(includeFields), + buildFieldMatcher(excludeFields), + hashFieldName, + req, + rsp, + next); + processor.setDiscardSameDocuments(discardSameDocuments); + return processor; + } + + public void inform(SolrCore core) { + if (core.getLatestSchema().getUniqueKeyField() == null) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, "schema must have uniqueKey defined."); + } + } + + public String getHashFieldName() { + return hashFieldName; + } + + public List getIncludeFields() { + return includeFields; + } + + public List getExcludeFields() { + return excludeFields; + } + + public boolean discardSameDocuments() { + return discardSameDocuments; + } + + static Predicate buildFieldMatcher(List fieldNames) { + return fieldName -> { + for (String currentFieldName : fieldNames) { + if ("*".equals(currentFieldName)) { + return true; + } + if (fieldName.equals(currentFieldName)) { + return true; + } + if (currentFieldName.length() > 1 + && currentFieldName.endsWith("*") + && fieldName.startsWith(currentFieldName.substring(0, currentFieldName.length() - 1))) { + return true; + } + } + return false; + }; + } +} diff --git a/solr/core/src/test/org/apache/solr/update/processor/ContentHashVersionProcessorFactoryTest.java b/solr/core/src/test/org/apache/solr/update/processor/ContentHashVersionProcessorFactoryTest.java new file mode 100644 index 00000000000..2f2e920ac6d --- /dev/null +++ b/solr/core/src/test/org/apache/solr/update/processor/ContentHashVersionProcessorFactoryTest.java @@ -0,0 +1,176 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.update.processor; + +import static org.apache.solr.SolrTestCaseJ4.assumeWorkingMockito; +import static org.apache.solr.update.processor.ContentHashVersionProcessorFactory.CONTENT_HASH_ENABLED_PARAM; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertTrue; +import static org.mockito.ArgumentMatchers.anyBoolean; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import java.util.List; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.response.SolrQueryResponse; +import org.junit.BeforeClass; +import org.junit.Test; + +public class ContentHashVersionProcessorFactoryTest { + + @BeforeClass + public static void beforeClass() throws Exception { + assumeWorkingMockito(); + } + + @Test + public void shouldHaveSensibleDefaultValues() { + ContentHashVersionProcessorFactory factory = new ContentHashVersionProcessorFactory(); + assertEquals(List.of("*"), factory.getIncludeFields()); + assertEquals("content_hash", factory.getHashFieldName()); + assertTrue(factory.discardSameDocuments()); + } + + @Test + public void shouldInitWithHashFieldName() { + ContentHashVersionProcessorFactory factory = new ContentHashVersionProcessorFactory(); + NamedList args = new NamedList<>(); + args.add("hashFieldName", "_hash_field_"); + factory.init(args); + + assertEquals("_hash_field_", factory.getHashFieldName()); + } + + @Test + public void shouldInitWithAllField() { + ContentHashVersionProcessorFactory factory = new ContentHashVersionProcessorFactory(); + NamedList args = new NamedList<>(); + args.add("includeFields", "*"); + factory.init(args); + + assertEquals(1, factory.getIncludeFields().size()); + assertEquals("*", factory.getIncludeFields().getFirst()); + } + + @Test + public void shouldInitWithIncludedFields() { + ContentHashVersionProcessorFactory factory = new ContentHashVersionProcessorFactory(); + NamedList args = new NamedList<>(); + args.add("includeFields", " field1,field2 , field3 "); + factory.init(args); + + assertEquals(3, factory.getIncludeFields().size()); + assertEquals(List.of("field1", "field2", "field3"), factory.getIncludeFields()); + } + + @Test + public void shouldInitWithExcludedFields() { + ContentHashVersionProcessorFactory factory = new ContentHashVersionProcessorFactory(); + NamedList args = new NamedList<>(); + args.add("excludeFields", " field1,field2 , field3 "); + factory.init(args); + + assertEquals(4, factory.getExcludeFields().size()); + assertEquals(List.of("field1", "field2", "field3", "content_hash"), factory.getExcludeFields()); + } + + @Test + public void shouldSelectRejectSameHashStrategy() { + ContentHashVersionProcessorFactory factory = new ContentHashVersionProcessorFactory(); + NamedList args = new NamedList<>(); + args.add("hashCompareStrategy", "discard"); + factory.init(args); + + assertTrue(factory.discardSameDocuments()); + } + + @Test + public void shouldSelectLogStrategy() { + ContentHashVersionProcessorFactory factory = new ContentHashVersionProcessorFactory(); + NamedList args = new NamedList<>(); + args.add("hashCompareStrategy", "log"); + factory.init(args); + + assertFalse(factory.discardSameDocuments()); + } + + @Test + public void shouldSelectDiscardStrategy() { + ContentHashVersionProcessorFactory factory = new ContentHashVersionProcessorFactory(); + NamedList args = new NamedList<>(); + args.add("hashCompareStrategy", "discard"); + factory.init(args); + + assertTrue(factory.discardSameDocuments()); + } + + @Test(expected = SolrException.class) + public void shouldSelectUnsupportedStrategy() { + ContentHashVersionProcessorFactory factory = new ContentHashVersionProcessorFactory(); + NamedList args = new NamedList<>(); + args.add("hashCompareStrategy", "unsupported value"); + factory.init(args); + } + + @Test(expected = SolrException.class) + public void shouldRejectExcludeAllFields() { + ContentHashVersionProcessorFactory factory = new ContentHashVersionProcessorFactory(); + NamedList args = new NamedList<>(); + args.add("excludeFields", "*"); + factory.init(args); + } + + @Test + public void shouldDisableContentHashByQueryParameter() { + ContentHashVersionProcessorFactory factory = new ContentHashVersionProcessorFactory(); + UpdateRequestProcessor next = mock(UpdateRequestProcessor.class); + SolrQueryRequest updateRequest = createUpdateRequest(false); // Request disables processor + + UpdateRequestProcessor instance = + factory.getInstance(updateRequest, mock(SolrQueryResponse.class), next); + + assertEquals(instance, next); + } + + @Test + public void shouldEnableContentHashByQueryParameter() { + ContentHashVersionProcessorFactory factory = new ContentHashVersionProcessorFactory(); + UpdateRequestProcessor next = mock(UpdateRequestProcessor.class); + SolrQueryRequest updateRequest = createUpdateRequest(true); // Request enables processor + + UpdateRequestProcessor instance = + factory.getInstance(updateRequest, mock(SolrQueryResponse.class), next); + + assertNotEquals(instance, next); + } + + private static SolrQueryRequest createUpdateRequest(boolean enableContentHashParamValue) { + SolrQueryRequest req = mock(SolrQueryRequest.class); + SolrParams solrParams = mock(SolrParams.class); + when(solrParams.getBool(eq(CONTENT_HASH_ENABLED_PARAM), anyBoolean())) + .thenReturn(enableContentHashParamValue); + when(req.getParams()).thenReturn(solrParams); + + return req; + } +} diff --git a/solr/core/src/test/org/apache/solr/update/processor/ContentHashVersionProcessorTest.java b/solr/core/src/test/org/apache/solr/update/processor/ContentHashVersionProcessorTest.java new file mode 100644 index 00000000000..92ea8282f9e --- /dev/null +++ b/solr/core/src/test/org/apache/solr/update/processor/ContentHashVersionProcessorTest.java @@ -0,0 +1,443 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.update.processor; + +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import java.util.UUID; +import org.apache.lucene.util.BytesRef; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.schema.IndexSchema; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.schema.UUIDField; +import org.apache.solr.update.AddUpdateCommand; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +public class ContentHashVersionProcessorTest extends UpdateProcessorTestBase { + + public static final String ID_FIELD = "_id"; + public static final String HASH_FIELD_NAME = "_hash_"; + public static final String FIRST_FIELD = "field1"; + public static final String SECOND_FIELD = "field2"; + public static final String THIRD_FIELD = "docField3"; + public static final String FOURTH_FIELD = "field4"; + private SolrQueryRequest req; + + private ContentHashVersionProcessor getContentHashVersionProcessor( + SolrQueryRequest req, + SolrQueryResponse rsp, + UpdateRequestProcessor next, + List includedFields, + List excludedFields) { + ContentHashVersionProcessor processor = + new ContentHashVersionProcessor( + ContentHashVersionProcessorFactory.buildFieldMatcher(includedFields), + ContentHashVersionProcessorFactory.buildFieldMatcher(excludedFields), + HASH_FIELD_NAME, + req, + rsp, + next); + + // Given (previous doc retrieval configuration) + processor.setOldDocProvider( + (core, hashField, indexedDocId) -> { + final SolrInputDocument inputDocument = + doc( + f(ID_FIELD, indexedDocId.utf8ToString()), + f(FIRST_FIELD, "Initial values used to compute initial hash"), + f(SECOND_FIELD, "This a constant value for testing include/exclude fields")); + return doc( + f(ID_FIELD, inputDocument.getFieldValue(ID_FIELD)), + f(FIRST_FIELD, inputDocument.getFieldValue(FIRST_FIELD)), + f(SECOND_FIELD, inputDocument.getFieldValue(SECOND_FIELD)), + f(hashField, processor.computeDocHash(inputDocument))); + }); + return processor; + } + + @BeforeClass + public static void beforeClass() throws Exception { + assumeWorkingMockito(); + } + + @Before + public void setUp() throws Exception { + super.setUp(); + + // Given (processor configuration) + req = mock(SolrQueryRequest.class); + when(req.getParams()).thenReturn(mock(SolrParams.class)); + + // Given (schema configuration) + IndexSchema indexSchema = mock(IndexSchema.class); + when(req.getSchema()).thenReturn(indexSchema); + when(indexSchema.getUniqueKeyField()).thenReturn(new SchemaField(ID_FIELD, new UUIDField())); + when(indexSchema.indexableUniqueKey(anyString())) + .then(invocationOnMock -> new BytesRef(invocationOnMock.getArgument(0).toString())); + } + + @Test + public void shouldComputeHashForDoc() { + // Given + ContentHashVersionProcessor processor = + getContentHashVersionProcessor( + req, + mock(SolrQueryResponse.class), + mock(UpdateRequestProcessor.class), + List.of("*"), + List.of(ID_FIELD)); + + // Given (doc for update) + SolrInputDocument inputDocument1 = + doc( + f(ID_FIELD, UUID.randomUUID().toString()), + f(FIRST_FIELD, "Values will serve as input to compute a hash"), + f(SECOND_FIELD, "This a constant value for testing include/exclude fields")); + + // Then + assertEquals("Tak0G5a/DIE=", processor.computeDocHash(inputDocument1)); + + // Given (doc for update - with different order) + SolrInputDocument inputDocument2 = + doc( + f(ID_FIELD, UUID.randomUUID().toString()), + f(SECOND_FIELD, "This a constant value for testing include/exclude fields"), + f(FIRST_FIELD, "Values will serve as input to compute a hash")); + + // Then (hash remain same, since id is excluded from signature fields) + assertEquals("Tak0G5a/DIE=", processor.computeDocHash(inputDocument2)); + } + + @Test + public void shouldUseExcludedFieldsWildcard() { + // Given + ContentHashVersionProcessor processor = + getContentHashVersionProcessor( + req, + mock(SolrQueryResponse.class), + mock(UpdateRequestProcessor.class), + List.of("*"), + List.of("field*")); + + // Given (doc for update) + SolrInputDocument inputDocument = + doc( + f(ID_FIELD, "0000000001"), + f(FIRST_FIELD, UUID.randomUUID().toString()), + f(SECOND_FIELD, UUID.randomUUID().toString()), + f(THIRD_FIELD, "constant to have a constant hash"), + f(FOURTH_FIELD, UUID.randomUUID().toString())); + + // Then (only ID and THIRD_FIELD is used in hash, other fields contain random values) + assertEquals( + "bwE8Zjq0aOs=", processor.computeDocHash(inputDocument)); // Hash if only ID field was used + } + + @Test + public void shouldUseIncludedFieldsWildcard() { + // Given + ContentHashVersionProcessor processor = + getContentHashVersionProcessor( + req, + mock(SolrQueryResponse.class), + mock(UpdateRequestProcessor.class), + List.of("field*"), + List.of(THIRD_FIELD)); + + // Given (doc for update) + SolrInputDocument inputDocument = + doc( + f(ID_FIELD, "0000000001"), + f(FIRST_FIELD, "constant to have a constant hash for field1"), + f(SECOND_FIELD, "constant to have a constant hash for field2"), + f(THIRD_FIELD, UUID.randomUUID().toString()), + f(FOURTH_FIELD, "constant to have a constant hash for field4")); + + // Then + assertEquals("PozPs2qZQtw=", processor.computeDocHash(inputDocument)); + } + + @Test + public void shouldUseIncludedFieldsWildcard2() { + // Given (variant of previous shouldUseIncludedFieldsWildcard, without the excludedField config) + ContentHashVersionProcessor processor = + getContentHashVersionProcessor( + req, + mock(SolrQueryResponse.class), + mock(UpdateRequestProcessor.class), + List.of("field*"), + List.of()); + + // Given (doc for update) + SolrInputDocument inputDocument = + doc( + f(ID_FIELD, "0000000001"), + f(FIRST_FIELD, "constant to have a constant hash for field1"), + f(SECOND_FIELD, "constant to have a constant hash for field2"), + f(THIRD_FIELD, UUID.randomUUID().toString()), + f(FOURTH_FIELD, "constant to have a constant hash for field4")); + + // Then + assertEquals("PozPs2qZQtw=", processor.computeDocHash(inputDocument)); + } + + @Test + public void shouldDedupIncludedFields() { + // Given (processor to include field1 and field2 only) + ContentHashVersionProcessor processorWithDuplicatedFieldName = + getContentHashVersionProcessor( + req, + mock(SolrQueryResponse.class), + mock(UpdateRequestProcessor.class), + List.of(FIRST_FIELD, FIRST_FIELD, SECOND_FIELD), + List.of()); + + ContentHashVersionProcessor processorWithWildcard = + getContentHashVersionProcessor( + req, + mock(SolrQueryResponse.class), + mock(UpdateRequestProcessor.class), + List.of( + SECOND_FIELD, + FIRST_FIELD, + "field1*"), // Also change order of config (test reorder of field names) + List.of()); + + // Given (doc for update) + SolrInputDocument inputDocument = + doc( + f(ID_FIELD, "0000000001"), + f(FIRST_FIELD, "constant to have a constant hash for field1"), + f(SECOND_FIELD, "constant to have a constant hash for field2"), + f(THIRD_FIELD, UUID.randomUUID().toString()), + f(FOURTH_FIELD, "constant to have a constant hash for field4")); + + // Then + assertEquals("XavrOYGlkXM=", processorWithDuplicatedFieldName.computeDocHash(inputDocument)); + assertEquals("XavrOYGlkXM=", processorWithWildcard.computeDocHash(inputDocument)); + } + + @Test + public void shouldCreateSignatureForNewDoc() throws IOException { + // Given + SolrQueryResponse response = mock(SolrQueryResponse.class); + ContentHashVersionProcessor processor = + getContentHashVersionProcessor( + req, + response, + mock(UpdateRequestProcessor.class), + Arrays.asList(FIRST_FIELD, SECOND_FIELD), + List.of()); + processor.setDiscardSameDocuments(false); + processor.setOldDocProvider((core, hashField, indexedDocId) -> null); + + // Given (command) + AddUpdateCommand cmd = new AddUpdateCommand(req); + + // Given (doc for update) + SolrInputDocument inputDocument = + doc( + f(ID_FIELD, UUID.randomUUID().toString()), + f(FIRST_FIELD, "Values will serve as input to compute a hash"), + f(SECOND_FIELD, "This a constant value for testing include/exclude fields")); + cmd.solrDoc = inputDocument; + + // When + processor.processAdd(cmd); + processor.finish(); + + // Then + assertNotNull(inputDocument.getField(HASH_FIELD_NAME)); // signature field got added + assertEquals( + processor.computeDocHash(inputDocument), + inputDocument.getField(HASH_FIELD_NAME).getValue()); // ... and contains expected value + + // Then (asserts on hash comparison results) + verify(response, times(1)).addToLog(eq("numAddsExisting"), eq(0)); + verify(response, times(1)) + .addToLog(eq("numAddsExistingWithIdentical"), eq(0)); // And no hash clash with old doc + } + + @Test + public void shouldAddToResponseLog() throws IOException { + // Given + SolrQueryResponse response = mock(SolrQueryResponse.class); + ContentHashVersionProcessor processor = + getContentHashVersionProcessor( + req, + response, + mock(UpdateRequestProcessor.class), + Arrays.asList(FIRST_FIELD, SECOND_FIELD), + List.of()); + processor.setDiscardSameDocuments(false); + + // Given (command to update existing doc) + AddUpdateCommand cmdDoesNotChangeValues = new AddUpdateCommand(req); + + // Given (doc for update - matches the existing doc, see getContentHashVersionProcessor()) + SolrInputDocument initialDocument = + doc( + f(ID_FIELD, UUID.randomUUID().toString()), + f(FIRST_FIELD, "Initial values used to compute initial hash"), + f(SECOND_FIELD, "This a constant value for testing include/exclude fields")); + cmdDoesNotChangeValues.solrDoc = + doc( + f(ID_FIELD, initialDocument.getFieldValue(ID_FIELD)), + f(FIRST_FIELD, initialDocument.getFieldValue(FIRST_FIELD)), + f(SECOND_FIELD, initialDocument.getFieldValue(SECOND_FIELD)), + f(HASH_FIELD_NAME, processor.computeDocHash(initialDocument))); + + // Given (command to update existing doc with different content) + AddUpdateCommand cmdChangesDocValues = new AddUpdateCommand(req); + + // Given (doc for update - does *not* match the existing doc, see + // getContentHashVersionProcessor()) + cmdChangesDocValues.solrDoc = + doc( + f(ID_FIELD, UUID.randomUUID().toString()), + f(FIRST_FIELD, "This is a doc with values"), + f(SECOND_FIELD, "that differs from stored doc, so it's considered new")); + + // When + processor.processAdd(cmdDoesNotChangeValues); + processor.processAdd(cmdChangesDocValues); + processor.finish(); + + // Then (read as follows: 2 updates occurred for an existing doc. Among these updates, 1 update + // tried to replace + // doc with the same content) + verify(response, times(1)).addToLog(eq("numAddsExisting"), eq(2)); + verify(response, times(1)).addToLog(eq("numAddsExistingWithIdentical"), eq(1)); + } + + @Test + public void shouldNotUpdateSignatureForNewDoc() throws IOException { + // Given + SolrQueryResponse response = mock(SolrQueryResponse.class); + ContentHashVersionProcessor processor = + getContentHashVersionProcessor( + req, response, mock(UpdateRequestProcessor.class), List.of(SECOND_FIELD), List.of()); + + // Given (command) + AddUpdateCommand cmd = new AddUpdateCommand(req); + + // Given (doc for update) + SolrInputDocument inputDocument = + doc( + f(ID_FIELD, UUID.randomUUID().toString()), + f(FIRST_FIELD, "Values will serve as input to compute a hash"), + f(SECOND_FIELD, "This a constant value for testing include/exclude fields")); + cmd.solrDoc = inputDocument; + + // When + processor.processAdd(cmd); + + // Then + assertNotNull(inputDocument.getField(HASH_FIELD_NAME)); // signature field got added + assertEquals( + processor.computeDocHash(inputDocument), + inputDocument.getField(HASH_FIELD_NAME).getValue()); // ... and contains expected value + verify(response, never()).addToLog(eq("numAddsExisting"), eq(0)); + verify(response, never()).addToLog(eq("numAddsExistingWithIdentical"), eq(0)); + } + + @Test + public void shouldExcludeFieldsUpdateSignatureForNewDoc() throws IOException { + // Given + SolrQueryResponse response = mock(SolrQueryResponse.class); + ContentHashVersionProcessor processor = + getContentHashVersionProcessor( + req, + response, + mock(UpdateRequestProcessor.class), + List.of(FIRST_FIELD, SECOND_FIELD), + List.of(FIRST_FIELD)); + processor.setDiscardSameDocuments(false); + + // Given (command) + AddUpdateCommand cmd = new AddUpdateCommand(req); + + // Given (doc for update) + SolrInputDocument inputDocument = + doc( + f(ID_FIELD, UUID.randomUUID().toString()), + f(FIRST_FIELD, "Values will serve as input to compute a hash"), + f(SECOND_FIELD, "This a constant value for testing include/exclude fields")); + cmd.solrDoc = inputDocument; + + // When + processor.processAdd(cmd); + + // Then + assertNotNull(inputDocument.getField(HASH_FIELD_NAME)); // signature field got added + assertEquals( + processor.computeDocHash(inputDocument), + inputDocument.getField(HASH_FIELD_NAME).getValue()); // ... and contains expected value + verify(response, never()).addToLog(eq("numAddsExisting"), eq(1)); + verify(response, never()).addToLog(eq("numAddsExistingWithIdentical"), eq(0)); + } + + @Test + public void shouldCommitWithDiscardModeEnabled() throws IOException { + // Given + UpdateRequestProcessor nextProcessor = mock(UpdateRequestProcessor.class); + ContentHashVersionProcessor processor = + getContentHashVersionProcessor( + req, + mock(SolrQueryResponse.class), + nextProcessor, + List.of(FIRST_FIELD, SECOND_FIELD), + List.of(FIRST_FIELD)); + processor.setDiscardSameDocuments(true); + + // Given (command to update existing doc) + AddUpdateCommand cmdDoesNotChangeValues = new AddUpdateCommand(req); + + // Given (doc for update - matches the existing doc, see getContentHashVersionProcessor()) + SolrInputDocument initialDocument = + doc( + f(ID_FIELD, UUID.randomUUID().toString()), + f(FIRST_FIELD, "Initial values used to compute initial hash"), + f(SECOND_FIELD, "This a constant value for testing include/exclude fields")); + cmdDoesNotChangeValues.solrDoc = + doc( + f(ID_FIELD, initialDocument.getFieldValue(ID_FIELD)), + f(FIRST_FIELD, initialDocument.getFieldValue(FIRST_FIELD)), + f(SECOND_FIELD, initialDocument.getFieldValue(SECOND_FIELD)), + f(HASH_FIELD_NAME, processor.computeDocHash(initialDocument))); + + // When + processor.processAdd(cmdDoesNotChangeValues); + + // Then + verify(nextProcessor, never()).processAdd(eq(cmdDoesNotChangeValues)); + } +}