From b11d049e3d4116767df095730b0b7b6cbf744674 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Rinaldi?= Date: Wed, 25 Mar 2026 13:06:18 +0100 Subject: [PATCH 1/9] [llm-document-erichment] Add first revision of the feature --- .../model/SolrChatModel.java | 201 ++++++++++++++++ .../model/package-info.java | 19 ++ .../store/ChatModelException.java | 30 +++ .../store/ChatModelStore.java | 67 ++++++ .../store/package-info.java | 19 ++ .../store/rest/ManagedChatModelStore.java | 200 ++++++++++++++++ .../store/rest/package-info.java | 19 ++ .../DocumentEnrichmentUpdateProcessor.java | 97 ++++++++ ...umentEnrichmentUpdateProcessorFactory.java | 156 ++++++++++++ .../update/processor/package-info.java | 19 ++ .../dummy-chat-model-ambiguous.json | 8 + .../dummy-chat-model-unsupported.json | 8 + .../modelChatExamples/dummy-chat-model.json | 7 + .../exception-throwing-chat-model.json | 6 + .../mistralai-chat-model.json | 13 + .../modelChatExamples/openai-model.json | 13 + .../cohere-model.json | 0 .../dummy-model-ambiguous.json | 0 .../dummy-model-unsupported.json | 0 .../dummy-model.json | 0 .../exception-throwing-model.json | 0 .../huggingface-model.json | 0 .../mistralai-model.json | 0 .../openai-model.json | 0 .../conf/schema-language-models.xml | 2 + ...richment-update-request-processor-only.xml | 62 +++++ .../conf/solrconfig-document-enrichment.xml | 83 +++++++ .../languagemodels/TestLanguageModelBase.java | 44 +++- .../model/DummyChatModel.java | 80 +++++++ .../model/ExceptionThrowingChatModel.java | 48 ++++ .../store/rest/TestChatModelManager.java | 184 +++++++++++++++ .../rest/TestChatModelManagerPersistence.java | 101 ++++++++ ...stManagedChatModelStoreInitialization.java | 54 +++++ ...tEnrichmentUpdateProcessorFactoryTest.java | 222 ++++++++++++++++++ ...DocumentEnrichmentUpdateProcessorTest.java | 219 +++++++++++++++++ 35 files changed, 1973 insertions(+), 8 deletions(-) create mode 100644 solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/model/SolrChatModel.java create mode 100644 solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/model/package-info.java create mode 100644 solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/ChatModelException.java create mode 100644 solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/ChatModelStore.java create mode 100644 solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/package-info.java create mode 100644 solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/rest/ManagedChatModelStore.java create mode 100644 solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/rest/package-info.java create mode 100644 solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessor.java create mode 100644 solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java create mode 100644 solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/package-info.java create mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-ambiguous.json create mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-unsupported.json create mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model.json create mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/exception-throwing-chat-model.json create mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/mistralai-chat-model.json create mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/openai-model.json rename solr/modules/language-models/src/test-files/{modelExamples => modelEmbeddingExamples}/cohere-model.json (100%) rename solr/modules/language-models/src/test-files/{modelExamples => modelEmbeddingExamples}/dummy-model-ambiguous.json (100%) rename solr/modules/language-models/src/test-files/{modelExamples => modelEmbeddingExamples}/dummy-model-unsupported.json (100%) rename solr/modules/language-models/src/test-files/{modelExamples => modelEmbeddingExamples}/dummy-model.json (100%) rename solr/modules/language-models/src/test-files/{modelExamples => modelEmbeddingExamples}/exception-throwing-model.json (100%) rename solr/modules/language-models/src/test-files/{modelExamples => modelEmbeddingExamples}/huggingface-model.json (100%) rename solr/modules/language-models/src/test-files/{modelExamples => modelEmbeddingExamples}/mistralai-model.json (100%) rename solr/modules/language-models/src/test-files/{modelExamples => modelEmbeddingExamples}/openai-model.json (100%) create mode 100644 solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-document-enrichment-update-request-processor-only.xml create mode 100644 solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-document-enrichment.xml create mode 100644 solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/model/DummyChatModel.java create mode 100644 solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/model/ExceptionThrowingChatModel.java create mode 100644 solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManager.java create mode 100644 solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManagerPersistence.java create mode 100644 solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestManagedChatModelStoreInitialization.java create mode 100644 solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java create mode 100644 solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/model/SolrChatModel.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/model/SolrChatModel.java new file mode 100644 index 000000000000..9d06001e5903 --- /dev/null +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/model/SolrChatModel.java @@ -0,0 +1,201 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.languagemodels.documentenrichment.model; + +import dev.langchain4j.data.message.UserMessage; +import dev.langchain4j.model.chat.ChatModel; + +import java.lang.invoke.MethodHandles; +import java.lang.reflect.Method; +import java.time.Duration; +import java.util.ArrayList; +import java.util.Map; +import java.util.Objects; +import dev.langchain4j.model.chat.request.ChatRequest; +import dev.langchain4j.model.chat.response.ChatResponse; +import org.apache.lucene.util.Accountable; +import org.apache.lucene.util.RamUsageEstimator; +import org.apache.solr.common.SolrException; +import org.apache.solr.core.SolrResourceLoader; +import org.apache.solr.languagemodels.documentenrichment.store.ChatModelException; +import org.apache.solr.languagemodels.documentenrichment.store.rest.ManagedChatModelStore; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This object wraps a {@link ChatModel} to produce the content of new fields from another. + * It's meant to be used as a managed resource with the {@link + * ManagedChatModelStore} + */ +public class SolrChatModel implements Accountable { + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + private static final long BASE_RAM_BYTES = + RamUsageEstimator.shallowSizeOfInstance(SolrChatModel.class); + // timeout is type Duration + private static final String TIMEOUT_PARAM = "timeout"; + + // the following are Integer type + private static final String MAX_RETRIES_PARAM = "maxRetries"; + private static final String THINKING_BUDGET_TOKENS ="thinkingBudgetTokens"; + private static final String RANDOM_SEED = "randomSeed"; + + private final String name; + private final Map params; + private final ChatModel chatModel; + private final int hashCode; + + public static SolrChatModel getInstance( + SolrResourceLoader solrResourceLoader, + String className, + String name, + Map params) + throws ChatModelException { + try { + /* + * The idea here is to build a {@link dev.langchain4j.model.chat.ChatModel} using inversion + * of control. + * Each model has its own list of parameters we don't know beforehand, but each {@link dev.langchain4j.model.chat.ChatModel} class + * has its own builder that uses setters with the same name of the parameter in input. + * */ + ChatModel textToTextModel; + Class modelClass = solrResourceLoader.findClass(className, ChatModel.class); + var builder = modelClass.getMethod("builder").invoke(null); + if (params != null) { + /* + * This block of code has the responsibility of instantiate a {@link + * dev.langchain4j.model.chat.ChatModel} using the params provided.classes have + * params of The specific implementation of {@link + * dev.langchain4j.model.chat.ChatModel} is not known beforehand. So we benefit of + * the design choice in langchain4j that each subclass implementing {@link + * dev.langchain4j.model.chat.ChatModel} uses setters with the same name of the + * param. + */ + for (String paramName : params.keySet()) { + /* + * When a param is not primitive, we need to instantiate the object explicitly and then call the + * setter method. + * N.B. when adding support to new models, pay attention to all the parameters they + * support, some of them may require to be handled in here as separate switch cases + */ + switch (paramName) { + case TIMEOUT_PARAM -> builder + .getClass() + .getMethod(paramName, Duration.class) + .invoke(builder, Duration.ofSeconds((Long) params.get(paramName))); + + case MAX_RETRIES_PARAM, THINKING_BUDGET_TOKENS, RANDOM_SEED -> builder + .getClass() + .getMethod(paramName, Integer.class) + .invoke(builder, ((Long) params.get(paramName)).intValue()); + + /* + * For primitive params if there's only one setter available, we call it. + * If there's choice we default to the string one + */ + default -> { + ArrayList paramNameMatches = new ArrayList<>(); + for (var method : builder.getClass().getMethods()) { + if (paramName.equals(method.getName()) && method.getParameterCount() == 1) { + paramNameMatches.add(method); + } + } + if (paramNameMatches.size() == 1) { + paramNameMatches.getFirst().invoke(builder, params.get(paramName)); + } else { + try { + builder + .getClass() + .getMethod(paramName, String.class) + .invoke(builder, params.get(paramName).toString()); + } catch (NoSuchMethodException e) { + log.error("Parameter {} not supported by model {}", paramName, className); + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e.getMessage(), e); + } + } + } + } + } + } + textToTextModel = (ChatModel) builder.getClass().getMethod("build").invoke(builder); + return new SolrChatModel(name, textToTextModel, params); + } catch (final Exception e) { + throw new ChatModelException("Model loading failed for " + className, e); + } + } + + public SolrChatModel( + String name, ChatModel chatModel, Map params) { + this.name = name; + this.chatModel = chatModel; + this.params = params; + this.hashCode = calculateHashCode(); + } + + public String chat(String text){ + ChatRequest chatRequest = ChatRequest.builder() + //.responseFormat(responseFormat) // used for structured outputs + .messages(UserMessage.from(text)) + .build(); + ChatResponse chatResponse = chatModel.chat(chatRequest); + return chatResponse.aiMessage().text(); // To change in case of structured output support + } + + @Override + public String toString() { + return getClass().getSimpleName() + "(name=" + getName() + ")"; + } + + @Override + public long ramBytesUsed() { + return BASE_RAM_BYTES + + RamUsageEstimator.sizeOfObject(name) + + RamUsageEstimator.sizeOfObject(chatModel); + } + + @Override + public int hashCode() { + return hashCode; + } + + private int calculateHashCode() { + final int prime = 31; + int result = 1; + result = (prime * result) + Objects.hashCode(name); + result = (prime * result) + Objects.hashCode(chatModel); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) return true; + if (!(obj instanceof SolrChatModel)) return false; + final SolrChatModel other = (SolrChatModel) obj; + return Objects.equals(chatModel, other.chatModel) && Objects.equals(name, other.name); + } + + public String getName() { + return name; + } + + public String getChatModelClassName() { + return chatModel.getClass().getName(); + } + + public Map getParams() { + return params; + } +} diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/model/package-info.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/model/package-info.java new file mode 100644 index 000000000000..9b1575f35d58 --- /dev/null +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/model/package-info.java @@ -0,0 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** APIs and classes for implementing text to vector logic. */ +package org.apache.solr.languagemodels.documentenrichment.model; diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/ChatModelException.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/ChatModelException.java new file mode 100644 index 000000000000..a3315faaa234 --- /dev/null +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/ChatModelException.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.languagemodels.documentenrichment.store; + +public class ChatModelException extends RuntimeException { + + private static final long serialVersionUID = 1L; + + public ChatModelException(String message) { + super(message); + } + + public ChatModelException(String message, Exception cause) { + super(message, cause); + } +} diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/ChatModelStore.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/ChatModelStore.java new file mode 100644 index 000000000000..96105919c17d --- /dev/null +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/ChatModelStore.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.languagemodels.documentenrichment.store; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import org.apache.solr.languagemodels.documentenrichment.model.SolrChatModel; + +/** Simple store to manage CRUD operations on the {@link SolrChatModel} */ +public class ChatModelStore { + + private final Map availableModels; + + public ChatModelStore() { + availableModels = Collections.synchronizedMap(new LinkedHashMap<>()); + } + + public SolrChatModel getModel(String name) { + return availableModels.get(name); + } + + public void clear() { + availableModels.clear(); + } + + public List getModels() { + synchronized (availableModels) { + final List availableModelsValues = + new ArrayList<>(availableModels.values()); + return Collections.unmodifiableList(availableModelsValues); + } + } + + @Override + public String toString() { + return "ChatModelStore [availableModels=" + availableModels.keySet() + "]"; + } + + public SolrChatModel delete(String modelName) { + return availableModels.remove(modelName); + } + + public void addModel(SolrChatModel modeldata) throws ChatModelException { + final String name = modeldata.getName(); + if (availableModels.putIfAbsent(modeldata.getName(), modeldata) != null) { + throw new ChatModelException( + "model '" + name + "' already exists. Please use a different name"); + } + } +} diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/package-info.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/package-info.java new file mode 100644 index 000000000000..ec20da4f87ee --- /dev/null +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/package-info.java @@ -0,0 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** Contains model store related classes. */ +package org.apache.solr.languagemodels.documentenrichment.store; diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/rest/ManagedChatModelStore.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/rest/ManagedChatModelStore.java new file mode 100644 index 000000000000..f8c6414354d8 --- /dev/null +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/rest/ManagedChatModelStore.java @@ -0,0 +1,200 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.languagemodels.documentenrichment.store.rest; + +import java.lang.invoke.MethodHandles; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import net.jcip.annotations.ThreadSafe; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.core.SolrCore; +import org.apache.solr.core.SolrResourceLoader; +import org.apache.solr.languagemodels.documentenrichment.model.SolrChatModel; +import org.apache.solr.languagemodels.documentenrichment.store.ChatModelException; +import org.apache.solr.languagemodels.documentenrichment.store.ChatModelStore; +import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.rest.BaseSolrResource; +import org.apache.solr.rest.ManagedResource; +import org.apache.solr.rest.ManagedResourceObserver; +import org.apache.solr.rest.ManagedResourceStorage; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** Managed Resource wrapper for the {@link ChatModelStore} to expose it via REST */ +@ThreadSafe +public class ManagedChatModelStore extends ManagedResource + implements ManagedResource.ChildResourceSupport { + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + /** the model store rest endpoint */ + public static final String REST_END_POINT = "/schema/chat-model-store"; + + /** Managed model store: the name of the attribute containing all the models of a model store */ + private static final String MODELS_JSON_FIELD = "models"; + + /** name of the attribute containing a class */ + static final String CLASS_KEY = "class"; + + /** name of the attribute containing a name */ + static final String NAME_KEY = "name"; + + /** name of the attribute containing parameters */ + static final String PARAMS_KEY = "params"; + + public static void registerManagedChatModelStore( + SolrResourceLoader solrResourceLoader, ManagedResourceObserver managedResourceObserver) { + solrResourceLoader + .getManagedResourceRegistry() + .registerManagedResource( + REST_END_POINT, ManagedChatModelStore.class, managedResourceObserver); + } + + public static ManagedChatModelStore getManagedModelStore(SolrCore core) { + return (ManagedChatModelStore) core.getRestManager().getManagedResource(REST_END_POINT); + } + + /** + * Returns the available models as a list of Maps objects. After an update the managed resources + * needs to return the resources in this format in order to store in json somewhere (zookeeper, + * disk...) + * + * @return the available models as a list of Maps objects + */ + private static List modelsAsManagedResources(List models) { + return models.stream() + .map(ManagedChatModelStore::toModelMap) + .collect(Collectors.toList()); + } + + @SuppressWarnings("unchecked") + public static SolrChatModel fromModelMap( + SolrResourceLoader solrResourceLoader, Map chatModel) { + return SolrChatModel.getInstance( + solrResourceLoader, + (String) chatModel.get(CLASS_KEY), // modelClassName + (String) chatModel.get(NAME_KEY), // modelName + (Map) chatModel.get(PARAMS_KEY)); + } + + private static LinkedHashMap toModelMap(SolrChatModel model) { + final LinkedHashMap modelMap = new LinkedHashMap<>(5, 1.0f); + modelMap.put(NAME_KEY, model.getName()); + modelMap.put(CLASS_KEY, model.getChatModelClassName()); + modelMap.put(PARAMS_KEY, model.getParams()); + return modelMap; + } + + private final ChatModelStore store; + private Object managedData; + + public ManagedChatModelStore( + String resourceId, SolrResourceLoader loader, ManagedResourceStorage.StorageIO storageIO) + throws SolrException { + super(resourceId, loader, storageIO); + store = new ChatModelStore(); + } + + @Override + protected ManagedResourceStorage createStorage( + ManagedResourceStorage.StorageIO storageIO, SolrResourceLoader loader) throws SolrException { + return new ManagedResourceStorage.JsonStorage(storageIO, loader, -1); + } + + @Override + protected void onManagedDataLoadedFromStorage(NamedList managedInitArgs, Object managedData) + throws SolrException { + store.clear(); + this.managedData = managedData; + } + + public void loadStoredModels() { + log.info("------ managed models ~ loading ------"); + + if ((managedData != null) && (managedData instanceof List)) { + @SuppressWarnings({"unchecked"}) + final List> chatModels = (List>) managedData; + for (final Map chatModel : chatModels) { + addModelFromMap(chatModel); + } + } + } + + private void addModelFromMap(Map modelMap) { + try { + addModel(fromModelMap(solrResourceLoader, modelMap)); + } catch (final ChatModelException e) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e); + } + } + + public void addModel(SolrChatModel model) throws SolrException { + try { + if (log.isInfoEnabled()) { + log.info("adding model {}", model.getName()); + } + store.addModel(model); + } catch (final ChatModelException e) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e); + } + } + + @SuppressWarnings("unchecked") + @Override + protected Object applyUpdatesToManagedData(Object updates) { + if (updates instanceof List) { + final List> chatModels = (List>) updates; + for (final Map chatModel : chatModels) { + addModelFromMap(chatModel); + } + } + + if (updates instanceof Map) { + final Map map = (Map) updates; + addModelFromMap(map); + } + + return modelsAsManagedResources(store.getModels()); + } + + @Override + public void doDeleteChild(BaseSolrResource endpoint, String childId) { + store.delete(childId); + storeManagedData(applyUpdatesToManagedData(null)); + } + + /** + * Called to retrieve a named part (the given childId) of the resource at the given endpoint. + * Note: since we have a unique child managed store we ignore the childId. + */ + @Override + public void doGet(BaseSolrResource endpoint, String childId) { + final SolrQueryResponse response = endpoint.getSolrResponse(); + response.add(MODELS_JSON_FIELD, modelsAsManagedResources(store.getModels())); + } + + public SolrChatModel getModel(String modelName) { + return store.getModel(modelName); + } + + @Override + public String toString() { + return "ManagedChatModelStore [store=" + store + "]"; + } +} diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/rest/package-info.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/rest/package-info.java new file mode 100644 index 000000000000..dfb013a8a902 --- /dev/null +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/store/rest/package-info.java @@ -0,0 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** Contains the {@link org.apache.solr.rest.ManagedResource} that encapsulate the model stores. */ +package org.apache.solr.languagemodels.documentenrichment.store.rest; diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessor.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessor.java new file mode 100644 index 000000000000..a50160924e96 --- /dev/null +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessor.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.languagemodels.documentenrichment.update.processor; + +import java.io.IOException; +import java.lang.invoke.MethodHandles; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.SolrInputField; +import org.apache.solr.languagemodels.documentenrichment.model.SolrChatModel; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.schema.IndexSchema; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.update.AddUpdateCommand; +import org.apache.solr.update.processor.UpdateRequestProcessor; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +class DocumentEnrichmentUpdateProcessor extends UpdateRequestProcessor { + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + private IndexSchema schema; + private final String inputField; + private final String outputField; + private final String prompt; + private SolrChatModel chatModel; + + public DocumentEnrichmentUpdateProcessor( + String inputField, + String outputField, + String prompt, + SolrChatModel chatModel, + SolrQueryRequest req, + UpdateRequestProcessor next) { + super(next); + this.schema = req.getSchema(); + // prompt must contain "{input}" where the user wants to inject the input data to populate outputField + this.prompt = prompt; + this.inputField = inputField; + this.outputField = outputField; + this.chatModel = chatModel; + } + + /** + * @param cmd the update command in input containing the Document to process + * @throws IOException If there is a low-level I/O error + */ + @Override + public void processAdd(AddUpdateCommand cmd) throws IOException { + SolrInputDocument doc = cmd.getSolrInputDocument(); + SolrInputField inputFieldContent = doc.get(inputField); + if (!isNullOrEmpty(inputFieldContent)) { + try { + // as for now, only a plain text as prompt is sent to the model (no support for tools/skills/agents) + String toInject = inputFieldContent.getValue().toString(); + String injectedPrompt = prompt.replace("{input}", toInject); + String response = chatModel.chat(injectedPrompt); + /* TODO: check if the outputField is multivalued and adapt the code/llm call to deal with lists also, together + with structured output support + */ + doc.setField(outputField, response); + } catch (RuntimeException chatModelFailure) { + if (log.isErrorEnabled()) { + SchemaField uniqueKeyField = schema.getUniqueKeyField(); + String uniqueKeyFieldName = uniqueKeyField.getName(); + log.error( + "Could not process: {} for the document with {}: {}", + inputField, + uniqueKeyFieldName, + doc.getFieldValue(uniqueKeyFieldName), + chatModelFailure); + } + } + } + super.processAdd(cmd); + } + + protected boolean isNullOrEmpty(SolrInputField inputFieldContent) { + return (inputFieldContent == null + || inputFieldContent.getValue() == null + || inputFieldContent.getValue().toString().isEmpty()); + } +} diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java new file mode 100644 index 000000000000..b40904f55aca --- /dev/null +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java @@ -0,0 +1,156 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.languagemodels.documentenrichment.update.processor; + +import org.apache.solr.common.SolrException; +import org.apache.solr.common.params.RequiredSolrParams; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.core.SolrCore; +import org.apache.solr.core.SolrResourceLoader; +import org.apache.solr.languagemodels.documentenrichment.model.SolrChatModel; +import org.apache.solr.languagemodels.documentenrichment.store.rest.ManagedChatModelStore; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.rest.ManagedResource; +import org.apache.solr.rest.ManagedResourceObserver; +import org.apache.solr.schema.StrField; +import org.apache.solr.schema.TextField; +import org.apache.solr.schema.FieldType; +import org.apache.solr.schema.IndexSchema; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.update.processor.UpdateRequestProcessor; +import org.apache.solr.update.processor.UpdateRequestProcessorFactory; +import org.apache.solr.util.plugin.SolrCoreAware; + +/** + * Insert in an existing field the output of the model coming from a textual field value. + * + *

The parameters supported are: + * + *

+ * <processor class="solr.llm.documentenrichment.update.processor.DocumentEnrichmentUpdateProcessorFactory">
+ *   <str name="inputField">textualField</str>
+ *   <str name="outputField">anotherTextualField</str>
+ *   <str name="model">ChatModel</str>
+ * </processor>
+ * 
+ * + * + * * + */ +public class DocumentEnrichmentUpdateProcessorFactory extends UpdateRequestProcessorFactory + implements SolrCoreAware, ManagedResourceObserver { + private static final String INPUT_FIELD_PARAM = "inputField"; + private static final String OUTPUT_FIELD_PARAM = "outputField"; + private static final String PROMPT = "prompt"; + private static final String MODEL_NAME = "model"; + private ManagedChatModelStore modelStore = null; + + private String inputField; // TODO: change with a list of input fields (check how it's done in other UpdateProcessor that supports this behaviour) + private String outputField; + private String prompt; + private String modelName; + private SolrParams params; + + @Override + public void init(final NamedList args) { + params = args.toSolrParams(); + RequiredSolrParams required = params.required(); + inputField = required.get(INPUT_FIELD_PARAM); + outputField = required.get(OUTPUT_FIELD_PARAM); + prompt = required.get(PROMPT); + if (!prompt.contains("{input}")) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, + "prompt must contain {input} placeholder"); + } + modelName = required.get(MODEL_NAME); + } + + @Override + public void inform(SolrCore core) { + final SolrResourceLoader solrResourceLoader = core.getResourceLoader(); + ManagedChatModelStore.registerManagedChatModelStore(solrResourceLoader, this); + } + + @Override + public void onManagedResourceInitialized(NamedList args, ManagedResource res) + throws SolrException { + if (res instanceof ManagedChatModelStore) { + modelStore = (ManagedChatModelStore) res; + } + if (modelStore != null) { + modelStore.loadStoredModels(); + } + } + + @Override + public UpdateRequestProcessor getInstance( + SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next) { + IndexSchema latestSchema = req.getCore().getLatestSchema(); + + if (!latestSchema.isDynamicField(inputField) && !latestSchema.hasExplicitField(inputField)) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, "undefined field: \"" + inputField + "\""); + } + + final SchemaField outputFieldSchema = latestSchema.getField(outputField); + assertIsTextualField(outputFieldSchema); + + ManagedChatModelStore modelStore = + ManagedChatModelStore.getManagedModelStore(req.getCore()); + SolrChatModel chatModel = modelStore.getModel(modelName); + if (chatModel == null) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, + "The model configured in the Update Request Processor '" + + modelName + + "' can't be found in the store: " + + ManagedChatModelStore.REST_END_POINT); + } + + return new DocumentEnrichmentUpdateProcessor(inputField, outputField, prompt, chatModel, req, next); + } + // This is used on the outputField. Now the support is limited. Can be changed with structured outputs. + protected void assertIsTextualField(SchemaField schemaField) { + FieldType fieldType = schemaField.getType(); + if (!(fieldType instanceof StrField) && !(fieldType instanceof TextField)) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, + "only textual fields are compatible with Document Enrichment: " + + schemaField.getName()); + } + } + + public String getInputField() { + return inputField; + } + + public String getOutputField() { + return outputField; + } + + public String getPrompt() { + return prompt; + } + + public String getModelName() { + return modelName; + } +} diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/package-info.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/package-info.java new file mode 100644 index 000000000000..1aaedcf004fd --- /dev/null +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/package-info.java @@ -0,0 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** Contains update request processor related classes. */ +package org.apache.solr.languagemodels.documentenrichment.update.processor; diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-ambiguous.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-ambiguous.json new file mode 100644 index 000000000000..1d737c9ae9d2 --- /dev/null +++ b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-ambiguous.json @@ -0,0 +1,8 @@ +{ + "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel", + "name": "dummy-chat-1", + "params": { + "response": "enriched content", + "ambiguous": 10 + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-unsupported.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-unsupported.json new file mode 100644 index 000000000000..5f3404982b90 --- /dev/null +++ b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-unsupported.json @@ -0,0 +1,8 @@ +{ + "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel", + "name": "dummy-chat-1", + "params": { + "response": "enriched content", + "unsupported": 10 + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model.json new file mode 100644 index 000000000000..f331535d5e9f --- /dev/null +++ b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model.json @@ -0,0 +1,7 @@ +{ + "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel", + "name": "dummy-chat-1", + "params": { + "response": "enriched content" + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/exception-throwing-chat-model.json b/solr/modules/language-models/src/test-files/modelChatExamples/exception-throwing-chat-model.json new file mode 100644 index 000000000000..29bcce318ada --- /dev/null +++ b/solr/modules/language-models/src/test-files/modelChatExamples/exception-throwing-chat-model.json @@ -0,0 +1,6 @@ +{ + "class": "org.apache.solr.languagemodels.documentenrichment.model.ExceptionThrowingChatModel", + "name": "exception-throwing-chat-model", + "params": { + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/mistralai-chat-model.json b/solr/modules/language-models/src/test-files/modelChatExamples/mistralai-chat-model.json new file mode 100644 index 000000000000..b8a130191ceb --- /dev/null +++ b/solr/modules/language-models/src/test-files/modelChatExamples/mistralai-chat-model.json @@ -0,0 +1,13 @@ +{ + "class": "dev.langchain4j.model.mistralai.MistralAiChatModel", + "name": "mistralai-chat-1", + "params": { + "baseUrl": "https://api.mistral.ai/v1", + "apiKey": "apiKey-mistralAI", + "modelName": "mistral-small-latest", + "timeout": 60, + "logRequests": true, + "logResponses": true, + "maxRetries": 5 + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/openai-model.json b/solr/modules/language-models/src/test-files/modelChatExamples/openai-model.json new file mode 100644 index 000000000000..74ffde65e3b6 --- /dev/null +++ b/solr/modules/language-models/src/test-files/modelChatExamples/openai-model.json @@ -0,0 +1,13 @@ +{ + "class": "dev.langchain4j.model.openai.OpenAiChatModel", + "name": "openai-1", + "params": { + "baseUrl": "https://api.openai.com/v1", + "apiKey": "apiKey-openAI", + "modelName": "gpt-5.4-nano", + "timeout": 60, + "logRequests": true, + "logResponses": true, + "maxRetries": 5 + } +} diff --git a/solr/modules/language-models/src/test-files/modelExamples/cohere-model.json b/solr/modules/language-models/src/test-files/modelEmbeddingExamples/cohere-model.json similarity index 100% rename from solr/modules/language-models/src/test-files/modelExamples/cohere-model.json rename to solr/modules/language-models/src/test-files/modelEmbeddingExamples/cohere-model.json diff --git a/solr/modules/language-models/src/test-files/modelExamples/dummy-model-ambiguous.json b/solr/modules/language-models/src/test-files/modelEmbeddingExamples/dummy-model-ambiguous.json similarity index 100% rename from solr/modules/language-models/src/test-files/modelExamples/dummy-model-ambiguous.json rename to solr/modules/language-models/src/test-files/modelEmbeddingExamples/dummy-model-ambiguous.json diff --git a/solr/modules/language-models/src/test-files/modelExamples/dummy-model-unsupported.json b/solr/modules/language-models/src/test-files/modelEmbeddingExamples/dummy-model-unsupported.json similarity index 100% rename from solr/modules/language-models/src/test-files/modelExamples/dummy-model-unsupported.json rename to solr/modules/language-models/src/test-files/modelEmbeddingExamples/dummy-model-unsupported.json diff --git a/solr/modules/language-models/src/test-files/modelExamples/dummy-model.json b/solr/modules/language-models/src/test-files/modelEmbeddingExamples/dummy-model.json similarity index 100% rename from solr/modules/language-models/src/test-files/modelExamples/dummy-model.json rename to solr/modules/language-models/src/test-files/modelEmbeddingExamples/dummy-model.json diff --git a/solr/modules/language-models/src/test-files/modelExamples/exception-throwing-model.json b/solr/modules/language-models/src/test-files/modelEmbeddingExamples/exception-throwing-model.json similarity index 100% rename from solr/modules/language-models/src/test-files/modelExamples/exception-throwing-model.json rename to solr/modules/language-models/src/test-files/modelEmbeddingExamples/exception-throwing-model.json diff --git a/solr/modules/language-models/src/test-files/modelExamples/huggingface-model.json b/solr/modules/language-models/src/test-files/modelEmbeddingExamples/huggingface-model.json similarity index 100% rename from solr/modules/language-models/src/test-files/modelExamples/huggingface-model.json rename to solr/modules/language-models/src/test-files/modelEmbeddingExamples/huggingface-model.json diff --git a/solr/modules/language-models/src/test-files/modelExamples/mistralai-model.json b/solr/modules/language-models/src/test-files/modelEmbeddingExamples/mistralai-model.json similarity index 100% rename from solr/modules/language-models/src/test-files/modelExamples/mistralai-model.json rename to solr/modules/language-models/src/test-files/modelEmbeddingExamples/mistralai-model.json diff --git a/solr/modules/language-models/src/test-files/modelExamples/openai-model.json b/solr/modules/language-models/src/test-files/modelEmbeddingExamples/openai-model.json similarity index 100% rename from solr/modules/language-models/src/test-files/modelExamples/openai-model.json rename to solr/modules/language-models/src/test-files/modelEmbeddingExamples/openai-model.json diff --git a/solr/modules/language-models/src/test-files/solr/collection1/conf/schema-language-models.xml b/solr/modules/language-models/src/test-files/solr/collection1/conf/schema-language-models.xml index ef93fbc057dd..5334762cc388 100644 --- a/solr/modules/language-models/src/test-files/solr/collection1/conf/schema-language-models.xml +++ b/solr/modules/language-models/src/test-files/solr/collection1/conf/schema-language-models.xml @@ -36,11 +36,13 @@ + + diff --git a/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-document-enrichment-update-request-processor-only.xml b/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-document-enrichment-update-request-processor-only.xml new file mode 100644 index 000000000000..522fbfe09267 --- /dev/null +++ b/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-document-enrichment-update-request-processor-only.xml @@ -0,0 +1,62 @@ + + + + + ${tests.luceneMatchVersion:LATEST} + ${solr.data.dir:} + + + + + + + + + + + + + + + 15000 + false + + + 1000 + + + ${solr.data.dir:} + + + + + + explicit + json + true + id + + + + + + string_field + enriched_field + Summarize this content: {input} + dummy-chat-1 + + + + + diff --git a/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-document-enrichment.xml b/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-document-enrichment.xml new file mode 100644 index 000000000000..02015f6296ab --- /dev/null +++ b/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-document-enrichment.xml @@ -0,0 +1,83 @@ + + + + + ${tests.luceneMatchVersion:LATEST} + ${solr.data.dir:} + + + + + + + + + + + + + + + 15000 + false + + + 1000 + + + ${solr.data.dir:} + + + + + + explicit + json + true + id + + + + + + string_field + enriched_field + Summarize this content: {input} + dummy-chat-1 + + + + + + + string_field + enriched_field + Summarize this content: {input} + exception-throwing-chat-model + + + + + + + + string_field + enriched_field + Summarize this content: {input} + dummy-chat-1 + + + + + diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/TestLanguageModelBase.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/TestLanguageModelBase.java index aaf3143e3513..d7a4ac9b8c96 100644 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/TestLanguageModelBase.java +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/TestLanguageModelBase.java @@ -26,6 +26,7 @@ import java.util.List; import org.apache.commons.io.file.PathUtils; import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.languagemodels.documentenrichment.store.rest.ManagedChatModelStore; import org.apache.solr.languagemodels.textvectorisation.store.rest.ManagedTextToVectorModelStore; import org.apache.solr.util.RestTestBase; import org.slf4j.Logger; @@ -38,11 +39,13 @@ public class TestLanguageModelBase extends RestTestBase { protected static Path tmpSolrHome; protected static Path tmpConfDir; - public static final String MODEL_FILE_NAME = "_schema_text-to-vector-model-store.json"; + public static final String EMBEDDING_MODEL_FILE_NAME = "_schema_text-to-vector-model-store.json"; + public static final String CHAT_MODEL_FILE_NAME = "_schema_chat-model-store.json"; protected static final String COLLECTION = "collection1"; protected static final String CONF_DIR = COLLECTION + "/conf"; protected static Path embeddingModelStoreFile = null; + protected static Path chatModelStoreFile = null; protected static String IDField = "id"; protected static String vectorField = "vector"; @@ -61,17 +64,26 @@ protected static void initFolders(boolean isPersistent) throws Exception { tmpSolrHome = createTempDir(); tmpConfDir = tmpSolrHome.resolve(CONF_DIR); PathUtils.copyDirectory(TEST_PATH(), tmpSolrHome.toAbsolutePath()); - final Path modelStore = tmpConfDir.resolve(MODEL_FILE_NAME); + final Path embeddingStore = tmpConfDir.resolve(EMBEDDING_MODEL_FILE_NAME); + final Path chatStore = tmpConfDir.resolve(CHAT_MODEL_FILE_NAME); if (isPersistent) { - embeddingModelStoreFile = modelStore; + embeddingModelStoreFile = embeddingStore; + chatModelStoreFile = chatStore; } - if (Files.exists(modelStore)) { + if (Files.exists(embeddingStore)) { if (log.isInfoEnabled()) { - log.info("remove model store config file in {}", modelStore.toAbsolutePath()); + log.info("remove model store config file in {}", embeddingStore.toAbsolutePath()); } - Files.delete(modelStore); + Files.delete(embeddingStore); + } + + if (Files.exists(chatStore)) { + if (log.isInfoEnabled()) { + log.info("remove chat model store config file in {}", chatStore.toAbsolutePath()); + } + Files.delete(chatStore); } System.setProperty("managed.schema.mutable", "true"); @@ -90,7 +102,7 @@ protected static void afterTest() throws Exception { } public static void loadModel(String fileName, String status) throws Exception { - final URL url = TestLanguageModelBase.class.getResource("/modelExamples/" + fileName); + final URL url = TestLanguageModelBase.class.getResource("/modelEmbeddingExamples/" + fileName); final String multipleModels = Files.readString(Path.of(url.toURI()), StandardCharsets.UTF_8); assertJPut( @@ -100,13 +112,29 @@ public static void loadModel(String fileName, String status) throws Exception { } public static void loadModel(String fileName) throws Exception { - final URL url = TestLanguageModelBase.class.getResource("/modelExamples/" + fileName); + final URL url = TestLanguageModelBase.class.getResource("/modelEmbeddingExamples/" + fileName); final String multipleModels = Files.readString(Path.of(url.toURI()), StandardCharsets.UTF_8); assertJPut( ManagedTextToVectorModelStore.REST_END_POINT, multipleModels, "/responseHeader/status==0"); } + public static void loadChatModel(String fileName, String status) throws Exception { + final URL url = TestLanguageModelBase.class.getResource("/modelChatExamples/" + fileName); + final String model = Files.readString(Path.of(url.toURI()), StandardCharsets.UTF_8); + + assertJPut( + ManagedChatModelStore.REST_END_POINT, model, "/responseHeader/status==" + status); + } + + public static void loadChatModel(String fileName) throws Exception { + final URL url = TestLanguageModelBase.class.getResource("/modelChatExamples/" + fileName); + final String model = Files.readString(Path.of(url.toURI()), StandardCharsets.UTF_8); + + assertJPut( + ManagedChatModelStore.REST_END_POINT, model, "/responseHeader/status==0"); + } + protected static void prepareIndex() throws Exception { List docsToIndex = prepareDocs(); for (SolrInputDocument doc : docsToIndex) { diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/model/DummyChatModel.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/model/DummyChatModel.java new file mode 100644 index 000000000000..753150cb6f02 --- /dev/null +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/model/DummyChatModel.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.languagemodels.documentenrichment.model; + +import dev.langchain4j.data.message.AiMessage; +import dev.langchain4j.model.chat.ChatModel; +import dev.langchain4j.model.chat.request.ChatRequest; +import dev.langchain4j.model.chat.response.ChatResponse; + +/** + * A deterministic {@link ChatModel} for testing. It returns a fixed response string regardless of + * the input, allowing tests to assert exact enriched-field values without real API calls. + * + *

The builder also exposes {@code unsupported} and {@code ambiguous} setter methods to exercise + * the reflection-based parameter handling in {@link + * org.apache.solr.languagemodels.documentenrichment.model.SolrChatModel#getInstance}. + */ +public class DummyChatModel implements ChatModel { + + private final String response; + + public DummyChatModel(String response) { + this.response = response; + } + + @Override + public ChatResponse chat(ChatRequest chatRequest) { + return ChatResponse.builder().aiMessage(AiMessage.from(response)).build(); + } + + public static DummyChatModelBuilder builder() { + return new DummyChatModelBuilder(); + } + + public static class DummyChatModelBuilder { + private String response = "dummy response"; + private int intValue; + + public DummyChatModelBuilder() {} + + public DummyChatModelBuilder response(String response) { + this.response = response; + return this; + } + + /** Intentionally has no String overload so the reflection code raises a BAD_REQUEST error. */ + public DummyChatModelBuilder unsupported(Integer input) { + return this; + } + + /** Two overloads make this param "ambiguous": the reflection code should default to String. */ + public DummyChatModelBuilder ambiguous(int input) { + this.intValue = input; + return this; + } + + public DummyChatModelBuilder ambiguous(String input) { + this.intValue = Integer.valueOf(input); + return this; + } + + public DummyChatModel build() { + return new DummyChatModel(this.response); + } + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/model/ExceptionThrowingChatModel.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/model/ExceptionThrowingChatModel.java new file mode 100644 index 000000000000..e5eda8d493f1 --- /dev/null +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/model/ExceptionThrowingChatModel.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.languagemodels.documentenrichment.model; + +import dev.langchain4j.model.chat.ChatModel; +import dev.langchain4j.model.chat.request.ChatRequest; +import dev.langchain4j.model.chat.response.ChatResponse; + +/** + * A {@link ChatModel} that always throws a {@link RuntimeException}. Used to verify that {@link + * org.apache.solr.languagemodels.documentenrichment.update.processor.DocumentEnrichmentUpdateProcessor} + * handles chat-model failures gracefully (logs the error and continues indexing without the + * enriched field). + */ +public class ExceptionThrowingChatModel implements ChatModel { + + @Override + public ChatResponse chat(ChatRequest chatRequest) { + throw new RuntimeException("Failed to enrich"); + } + + public static ExceptionThrowingChatModelBuilder builder() { + return new ExceptionThrowingChatModelBuilder(); + } + + public static class ExceptionThrowingChatModelBuilder { + + public ExceptionThrowingChatModelBuilder() {} + + public ExceptionThrowingChatModel build() { + return new ExceptionThrowingChatModel(); + } + } +} diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManager.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManager.java new file mode 100644 index 000000000000..60e97e5a6f19 --- /dev/null +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManager.java @@ -0,0 +1,184 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.languagemodels.documentenrichment.store.rest; + +import org.apache.solr.common.util.NamedList; +import org.apache.solr.core.SolrResourceLoader; +import org.apache.solr.languagemodels.TestLanguageModelBase; +import org.apache.solr.languagemodels.documentenrichment.update.processor.DocumentEnrichmentUpdateProcessorFactory; +import org.apache.solr.rest.ManagedResource; +import org.apache.solr.rest.ManagedResourceStorage; +import org.apache.solr.rest.RestManager; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +public class TestChatModelManager extends TestLanguageModelBase { + + @BeforeClass + public static void init() throws Exception { + setupTest("solrconfig-document-enrichment.xml", "schema-language-models.xml", false, false); + } + + @AfterClass + public static void cleanup() throws Exception { + afterTest(); + } + + @Test + public void test() throws Exception { + final SolrResourceLoader loader = new SolrResourceLoader(tmpSolrHome); + + final RestManager.Registry registry = loader.getManagedResourceRegistry(); + assertNotNull( + "Expected a non-null RestManager.Registry from the SolrResourceLoader!", registry); + + final String resourceId = "/schema/mstore1"; + registry.registerManagedResource( + resourceId, ManagedChatModelStore.class, new DocumentEnrichmentUpdateProcessorFactory()); + + final NamedList initArgs = new NamedList<>(); + + final RestManager restManager = new RestManager(); + restManager.init(loader, initArgs, new ManagedResourceStorage.InMemoryStorageIO()); + + final ManagedResource res = restManager.getManagedResource(resourceId); + assertTrue(res instanceof ManagedChatModelStore); + assertEquals(res.getResourceId(), resourceId); + } + + @Test + public void testRestManagerEndpoints() throws Exception { + assertJQ("/schema/managed", "/responseHeader/status==0"); + + final String openAiClassName = "dev.langchain4j.model.openai.OpenAiChatModel"; + + // fails — no params provided + String model = "{ \"name\":\"testChatModel1\", \"class\":\"" + openAiClassName + "\"}"; + assertJPut(ManagedChatModelStore.REST_END_POINT, model, "/responseHeader/status==400"); + + // success + model = + "{ name:\"testChatModel2\", class:\"" + + openAiClassName + + "\"," + + "params:{" + + "baseUrl:\"https://api.openai.com/v1\"," + + "apiKey:\"testApiKey2\"," + + "modelName:\"gpt-4o-mini\"," + + "logRequests:true," + + "logResponses:false" + + "}}"; + assertJPut(ManagedChatModelStore.REST_END_POINT, model, "/responseHeader/status==0"); + + // success — multiple models in one PUT + final String multipleModels = + "[{ name:\"testChatModel3\", class:\"" + + openAiClassName + + "\"," + + "params:{baseUrl:\"https://api.openai.com/v1\"," + + "apiKey:\"testApiKey3\"," + + "modelName:\"gpt-4o-mini\"," + + "logRequests:true," + + "logResponses:false" + + "}}\n" + + ",{ name:\"testChatModel4\", class:\"" + + openAiClassName + + "\"," + + "params:{baseUrl:\"https://api.openai.com/v1\"," + + "apiKey:\"testApiKey4\"," + + "modelName:\"gpt-4o-mini\"," + + "logRequests:true," + + "logResponses:false" + + "}}]"; + assertJPut(ManagedChatModelStore.REST_END_POINT, multipleModels, "/responseHeader/status==0"); + + final String qryResult = JQ(ManagedChatModelStore.REST_END_POINT); + assertTrue( + qryResult.contains("\"name\":\"testChatModel2\"") + && qryResult.contains("\"name\":\"testChatModel3\"") + && qryResult.contains("\"name\":\"testChatModel4\"")); + + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/name=='testChatModel2'"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[1]/name=='testChatModel3'"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[2]/name=='testChatModel4'"); + restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/testChatModel2"); + restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/testChatModel3"); + restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/testChatModel4"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models==[]'"); + } + + @Test + public void loadChatModel_openAi_shouldLoadModelConfig() throws Exception { + loadChatModel("openai-model.json"); + + final String modelName = "openai-1"; + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/name=='" + modelName + "'"); + assertJQ( + ManagedChatModelStore.REST_END_POINT, + "/models/[0]/params/baseUrl=='https://api.openai.com/v1'"); + assertJQ( + ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/apiKey=='apiKey-openAI'"); + assertJQ( + ManagedChatModelStore.REST_END_POINT, + "/models/[0]/params/modelName=='gpt-5.4-nano'"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/timeout==60"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/logRequests==true"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/logResponses==true"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/maxRetries==5"); + + restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/" + modelName); + } + + @Test + public void loadChatModel_mistralAi_shouldLoadModelConfig() throws Exception { + loadChatModel("mistralai-chat-model.json"); + + final String modelName = "mistralai-chat-1"; + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/name=='" + modelName + "'"); + assertJQ( + ManagedChatModelStore.REST_END_POINT, + "/models/[0]/params/baseUrl=='https://api.mistral.ai/v1'"); + assertJQ( + ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/apiKey=='apiKey-mistralAI'"); + assertJQ( + ManagedChatModelStore.REST_END_POINT, + "/models/[0]/params/modelName=='mistral-small-latest'"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/timeout==60"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/logRequests==true"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/logResponses==true"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/maxRetries==5"); + + restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/" + modelName); + } + + @Test + public void loadChatModel_dummyUnsupportedParam_shouldRaiseError() throws Exception { + loadChatModel("dummy-chat-model-unsupported.json", "400"); + } + + @Test + public void loadChatModel_dummyAmbiguousParam_shouldDefaultToString() throws Exception { + loadChatModel("dummy-chat-model-ambiguous.json"); + + final String modelName = "dummy-chat-1"; + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/name=='" + modelName + "'"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/ambiguous==10"); + + restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/" + modelName); + } +} diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManagerPersistence.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManagerPersistence.java new file mode 100644 index 000000000000..654c98556ab4 --- /dev/null +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManagerPersistence.java @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.languagemodels.documentenrichment.store.rest; + +import static java.nio.charset.StandardCharsets.UTF_8; + +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import org.apache.solr.common.util.Utils; +import org.apache.solr.languagemodels.TestLanguageModelBase; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +public class TestChatModelManagerPersistence extends TestLanguageModelBase { + + @Before + public void init() throws Exception { + setupTest("solrconfig-document-enrichment.xml", "schema-language-models.xml", false, true); + } + + @After + public void cleanup() throws Exception { + afterTest(); + } + + @Test + public void testModelAreStoredCompact() throws Exception { + loadChatModel("openai-model.json"); + + final String JSONOnDisk = Files.readString(chatModelStoreFile, StandardCharsets.UTF_8); + Object objectFromDisk = Utils.fromJSONString(JSONOnDisk); + assertEquals(new String(Utils.toJSON(objectFromDisk, -1), UTF_8), JSONOnDisk); + } + + @Test + public void testModelStorePersistence() throws Exception { + // check store is empty at start + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/==[]"); + + // load a model + loadChatModel("openai-model.json"); + + final String modelName = "openai-1"; + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/name=='" + modelName + "'"); + assertJQ( + ManagedChatModelStore.REST_END_POINT, + "/models/[0]/params/baseUrl=='https://api.openai.com/v1'"); + assertJQ( + ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/apiKey=='apiKey-openAI'"); + assertJQ( + ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/modelName=='gpt-5.4-nano'"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/timeout==60"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/logRequests==true"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/logResponses==true"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/maxRetries==5"); + + // check persistence after reload + restTestHarness.reload(); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/name=='" + modelName + "'"); + assertJQ( + ManagedChatModelStore.REST_END_POINT, + "/models/[0]/params/baseUrl=='https://api.openai.com/v1'"); + assertJQ( + ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/apiKey=='apiKey-openAI'"); + assertJQ( + ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/modelName=='gpt-5.4-nano'"); + + // check persistence after restart + getJetty().stop(); + getJetty().start(); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/name=='" + modelName + "'"); + assertJQ( + ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/modelName=='gpt-5.4-nano'"); + + // delete model and verify persistence of the empty state + restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/" + modelName); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/==[]"); + + restTestHarness.reload(); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/==[]"); + + getJetty().stop(); + getJetty().start(); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/==[]"); + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestManagedChatModelStoreInitialization.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestManagedChatModelStoreInitialization.java new file mode 100644 index 000000000000..0106558401a8 --- /dev/null +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestManagedChatModelStoreInitialization.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.languagemodels.documentenrichment.store.rest; + +import org.apache.solr.languagemodels.TestLanguageModelBase; +import org.junit.After; +import org.junit.Test; + +public class TestManagedChatModelStoreInitialization extends TestLanguageModelBase { + + @After + public void cleanUp() throws Exception { + afterTest(); + } + + @Test + public void managedChatModelStore_whenUpdateRequestComponentConfigured_shouldBeInitialized() + throws Exception { + setupTest( + "solrconfig-document-enrichment-update-request-processor-only.xml", + "schema-language-models.xml", + false, + false); + + assertJQ(ManagedChatModelStore.REST_END_POINT, "/responseHeader/status==0"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models==[]"); + } + + @Test + public void managedChatModelStore_whenNoComponents_shouldNotBeInitialized() throws Exception { + setupTest( + "solrconfig-language-models-no-components.xml", "schema-language-models.xml", false, false); + assertJQ( + ManagedChatModelStore.REST_END_POINT, + "/responseHeader/status==400", + "/error/msg=='No REST managed resource registered for path " + + ManagedChatModelStore.REST_END_POINT + + "'"); + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java new file mode 100644 index 000000000000..91d773dbeb61 --- /dev/null +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java @@ -0,0 +1,222 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.languagemodels.documentenrichment.update.processor; + +import org.apache.solr.common.SolrException; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.core.SolrCore; +import org.apache.solr.languagemodels.TestLanguageModelBase; +import org.apache.solr.languagemodels.documentenrichment.model.SolrChatModel; +import org.apache.solr.languagemodels.documentenrichment.store.rest.ManagedChatModelStore; +import org.apache.solr.request.SolrQueryRequestBase; +import org.apache.solr.update.processor.UpdateRequestProcessor; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +public class DocumentEnrichmentUpdateProcessorFactoryTest extends TestLanguageModelBase { + + @BeforeClass + public static void init() throws Exception { + setupTest("solrconfig-document-enrichment.xml", "schema-language-models.xml", false, false); + } + + @AfterClass + public static void cleanup() throws Exception { + afterTest(); + } + + SolrCore collection1; + + @Before + public void setup() { + collection1 = solrTestRule.getCoreContainer().getCore("collection1"); + } + + @After + public void after() { + collection1.close(); + } + + @Test + public void init_fullArgs_shouldInitAllParams() { + NamedList args = new NamedList<>(); + args.add("inputField", "string_field"); + args.add("outputField", "enriched_field"); + args.add("prompt", "Summarize: {input}"); + args.add("model", "model1"); + + DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); + factory.init(args); + + assertEquals("string_field", factory.getInputField()); + assertEquals("enriched_field", factory.getOutputField()); + assertEquals("Summarize: {input}", factory.getPrompt()); + assertEquals("model1", factory.getModelName()); + } + + @Test + public void init_nullInputField_shouldThrowExceptionWithDetailedMessage() { + NamedList args = new NamedList<>(); + args.add("outputField", "enriched_field"); + args.add("prompt", "Summarize: {input}"); + args.add("model", "model1"); + + DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); + + SolrException e = assertThrows(SolrException.class, () -> factory.init(args)); + assertEquals("Missing required parameter: inputField", e.getMessage()); + } + + @Test + public void init_nullOutputField_shouldThrowExceptionWithDetailedMessage() { + NamedList args = new NamedList<>(); + args.add("inputField", "string_field"); + args.add("prompt", "Summarize: {input}"); + args.add("model", "model1"); + + DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); + + SolrException e = assertThrows(SolrException.class, () -> factory.init(args)); + assertEquals("Missing required parameter: outputField", e.getMessage()); + } + + @Test + public void init_nullPrompt_shouldThrowExceptionWithDetailedMessage() { + NamedList args = new NamedList<>(); + args.add("inputField", "string_field"); + args.add("outputField", "enriched_field"); + args.add("model", "model1"); + + DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); + + SolrException e = assertThrows(SolrException.class, () -> factory.init(args)); + assertEquals("Missing required parameter: prompt", e.getMessage()); + } + + @Test + public void init_missingPlaceholderPrompt_shouldThrowExceptionWithDetailedMessage() { + NamedList args = new NamedList<>(); + args.add("inputField", "string_field"); + args.add("outputField", "enriched_field"); + args.add("prompt", "Summarize:"); + args.add("model", "model1"); + + DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); + + SolrException e = assertThrows(SolrException.class, () -> factory.init(args)); + assertEquals("prompt must contain {input} placeholder", e.getMessage()); + } + + @Test + public void init_nullModel_shouldThrowExceptionWithDetailedMessage() { + NamedList args = new NamedList<>(); + args.add("inputField", "string_field"); + args.add("outputField", "enriched_field"); + args.add("prompt", "Summarize: {input}"); + + DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); + + SolrException e = assertThrows(SolrException.class, () -> factory.init(args)); + assertEquals("Missing required parameter: model", e.getMessage()); + } + + /* Following tests depend on a real solr schema and depend on BeforeClass-AfterClass methods */ + + @Test + public void init_notExistentOutputField_shouldThrowExceptionWithDetailedMessage() { + NamedList args = new NamedList<>(); + args.add("inputField", "string_field"); + args.add("outputField", "notExistentOutput"); + args.add("prompt", "Summarize: {input}"); + args.add("model", "model1"); + + DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); + ModifiableSolrParams params = new ModifiableSolrParams(); + SolrQueryRequestBase req = new SolrQueryRequestBase(collection1, params) {}; + factory.init(args); + + SolrException e = assertThrows(SolrException.class, () -> factory.getInstance(req, null, null)); + assertEquals("undefined field: \"notExistentOutput\"", e.getMessage()); + } + + @Test + public void init_notTextualOutputField_shouldThrowExceptionWithDetailedMessage() { + // vector is a DenseVectorField — not a textual field + NamedList args = new NamedList<>(); + args.add("inputField", "string_field"); + args.add("outputField", "vector"); + args.add("prompt", "Summarize: {input}"); + args.add("model", "model1"); + + DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); + ModifiableSolrParams params = new ModifiableSolrParams(); + SolrQueryRequestBase req = new SolrQueryRequestBase(collection1, params) {}; + factory.init(args); + + SolrException e = assertThrows(SolrException.class, () -> factory.getInstance(req, null, null)); + assertEquals( + "only textual fields are compatible with Document Enrichment: vector", e.getMessage()); + } + + @Test + public void init_notExistentInputField_shouldThrowExceptionWithDetailedMessage() { + NamedList args = new NamedList<>(); + args.add("inputField", "notExistentInput"); + args.add("outputField", "enriched_field"); + args.add("prompt", "Summarize: {input}"); + args.add("model", "model1"); + + DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); + ModifiableSolrParams params = new ModifiableSolrParams(); + SolrQueryRequestBase req = new SolrQueryRequestBase(collection1, params) {}; + factory.init(args); + + SolrException e = assertThrows(SolrException.class, () -> factory.getInstance(req, null, null)); + assertEquals("undefined field: \"notExistentInput\"", e.getMessage()); + } + + @Test + public void init_dynamicInputField_shouldNotThrowException() { + UpdateRequestProcessor instance = + createUpdateProcessor("text_s", "enriched_field", collection1, "model1"); + assertNotNull(instance); + } + + private UpdateRequestProcessor createUpdateProcessor( + String inputFieldName, String outputFieldName, SolrCore core, String modelName) { + NamedList args = new NamedList<>(); + + ManagedChatModelStore.getManagedModelStore(core) + .addModel(new SolrChatModel(modelName, null, null)); + args.add("inputField", inputFieldName); + args.add("outputField", outputFieldName); + args.add("prompt", "Summarize: {input}"); + args.add("model", modelName); + + DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); + ModifiableSolrParams params = new ModifiableSolrParams(); + factory.init(args); + + SolrQueryRequestBase req = new SolrQueryRequestBase(core, params) {}; + + return factory.getInstance(req, null, null); + } +} diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java new file mode 100644 index 000000000000..76d691cdebad --- /dev/null +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java @@ -0,0 +1,219 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.languagemodels.documentenrichment.update.processor; + +import java.io.IOException; +import java.util.Map; +import org.apache.solr.client.solrj.RemoteSolrException; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.request.SolrQuery; +import org.apache.solr.client.solrj.request.UpdateRequest; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.languagemodels.TestLanguageModelBase; +import org.apache.solr.languagemodels.documentenrichment.store.rest.ManagedChatModelStore; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +public class DocumentEnrichmentUpdateProcessorTest extends TestLanguageModelBase { + + @BeforeClass + public static void init() throws Exception { + setupTest("solrconfig-document-enrichment.xml", "schema-language-models.xml", false, false); + } + + @AfterClass + public static void cleanup() throws Exception { + afterTest(); + } + + @After + public void afterEachTest() throws Exception { + restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/dummy-chat-1"); + restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/exception-throwing-chat-model"); + } + + @Test + public void processAdd_inputField_shouldEnrichInputField() throws Exception { + loadChatModel("dummy-chat-model.json"); + + addWithChain(sdoc("id", "99", "string_field", "Vegeta is the saiyan prince."), "documentEnrichment"); + addWithChain(sdoc("id", "98", "string_field", "Kakaroth is a saiyan grown up on planet Earth."), "documentEnrichment"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery(); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "/response/docs/[0]/enriched_field=='enriched content'", + "/response/docs/[1]/id=='98'", + "/response/docs/[1]/enriched_field=='enriched content'"); + + restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/dummy-1"); // clean up + } + + /* + This test looks for the 'dummy-chat-1' model, but such model is not loaded — + the model store is empty, so the update fails. + */ + @Test + public void processAdd_modelNotFound_shouldThrowException() { + RuntimeException thrown = + assertThrows( + "model not found should throw an exception", + RemoteSolrException.class, + () -> + addWithChain( + sdoc("id", "99", "string_field", "Vegeta is the saiyan prince."), + "documentEnrichment")); + assertTrue( + thrown + .getMessage() + .contains( + "The model configured in the Update Request Processor 'dummy-chat-1' can't be found in the store: /schema/chat-model-store")); + } + + @Test + public void processAdd_emptyInputField_shouldLogAndIndexWithNoEnrichedField() throws Exception { + loadChatModel("dummy-chat-model.json"); + addWithChain(sdoc("id", "99", "string_field", ""), "documentEnrichment"); + addWithChain(sdoc("id", "98", "string_field", "Vegeta is the saiyan prince."), "documentEnrichment"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery(); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "!/response/docs/[0]/enriched_field==", // no enriched field for doc 99 + "/response/docs/[1]/id=='98'", + "/response/docs/[1]/enriched_field=='enriched content'"); + } + + @Test + public void processAdd_nullInputField_shouldLogAndIndexWithNoEnrichedField() throws Exception { + loadChatModel("dummy-chat-model.json"); + addWithChain(sdoc("id", "99", "string_field", "Vegeta is the saiyan prince."), "documentEnrichment"); + assertU(adoc("id", "98")); // no string_field + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery(); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "/response/docs/[0]/enriched_field=='enriched content'", + "/response/docs/[1]/id=='98'", + "!/response/docs/[1]/enriched_field=="); // no enriched field for doc 98 + } + + @Test + public void processAdd_failingEnrichment_shouldLogAndIndexWithNoEnrichedField() throws Exception { + loadChatModel("exception-throwing-chat-model.json"); + addWithChain(sdoc("id", "99", "string_field", "Vegeta is the saiyan prince."), "failingDocumentEnrichment"); + addWithChain(sdoc("id", "98", "string_field", "Kakaroth is a saiyan grown up on planet Earth."), "failingDocumentEnrichment"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery(); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "!/response/docs/[0]/enriched_field==", // no enriched field for doc 99 + "/response/docs/[1]/id=='98'", + "!/response/docs/[1]/enriched_field=="); // no enriched field for doc 98 + } + + @Test + public void processAtomicUpdate_shouldTriggerEnrichmentAndFetchTheStoredContent() + throws Exception { + // Verifies that when using a processor chain configured for partial updates + // (i.e., DistributedUpdateProcessorFactory before DocumentEnrichmentUpdateProcessorFactory), + // the system correctly retrieves the stored value of string_field and generates the + // enriched content for the document. + loadChatModel("dummy-chat-model.json"); + assertU(adoc("id", "99", "string_field", "Vegeta is the saiyan prince.")); + assertU(adoc("id", "98", "string_field", "Kakaroth is a saiyan grown up on planet Earth.")); + assertU(commit()); + + SolrInputDocument atomicDoc = new SolrInputDocument(); + atomicDoc.setField("id", "99"); + atomicDoc.setField("enriched", Map.of("set", "true")); + addWithChain(atomicDoc, "documentEnrichmentForPartialUpdates"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery(); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "/response/docs/[0]/enriched_field=='enriched content'", + "/response/docs/[1]/id=='98'", + "!/response/docs/[1]/enriched_field==" // no enriched field for document 98 + ); + } + + @Test + public void processAtomicUpdate_shouldReplaceExistingEnrichedFieldNotAppend() throws Exception { + // Verifies that when a document already contains an enriched_field and string_field is + // modified via atomic update, the enriched content is recomputed and replaces the previous + // value rather than being appended. + loadChatModel("dummy-chat-model.json"); + addWithChain(sdoc("id", "99", "string_field", "Vegeta is the saiyan prince."), "documentEnrichment"); + addWithChain(sdoc("id", "98", "string_field", "Kakaroth is a saiyan grown up on planet Earth."), "documentEnrichment"); + assertU(commit()); + + SolrInputDocument atomicDoc = new SolrInputDocument(); + atomicDoc.setField("id", "99"); + atomicDoc.setField("string_field", Map.of("set", "Vegeta is the saiyan prince from the Dragon Ball series.")); + addWithChain(atomicDoc, "documentEnrichmentForPartialUpdates"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery(); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "/response/docs/[0]/enriched_field=='enriched content'", + "/response/docs/[1]/id=='98'", + "/response/docs/[1]/enriched_field=='enriched content'"); + } + + private SolrQuery getEnrichmentQuery() { + final SolrQuery query = new SolrQuery(); + query.setQuery("*:*"); + query.add("fl", "id,enriched_field"); + query.add("sort", "id desc"); + return query; + } + + void addWithChain(SolrInputDocument document, String updateChain) + throws SolrServerException, IOException { + UpdateRequest req = new UpdateRequest(); + req.add(document); + req.setParam("update.chain", updateChain); + solrTestRule.getSolrClient("collection1").request(req); + } +} From dfb27abb1d7c34a6401d3cf23121bddca0174e8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Rinaldi?= Date: Thu, 26 Mar 2026 10:01:24 +0100 Subject: [PATCH 2/9] [llm-document-enrichment] First working version --- .../model/DummyChatModelTest.java | 48 +++++++++++++++++++ .../store/rest/TestChatModelManager.java | 6 +-- 2 files changed, 51 insertions(+), 3 deletions(-) create mode 100644 solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/model/DummyChatModelTest.java diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/model/DummyChatModelTest.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/model/DummyChatModelTest.java new file mode 100644 index 000000000000..6449b7b2f55c --- /dev/null +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/model/DummyChatModelTest.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.languagemodels.documentenrichment.model; + +import dev.langchain4j.data.message.UserMessage; +import dev.langchain4j.model.chat.request.ChatRequest; +import org.apache.solr.SolrTestCase; +import org.junit.Test; + +public class DummyChatModelTest extends SolrTestCase { + + @Test + public void constructAndChat() throws Exception { + assertEquals( + "hello world", + new DummyChatModel("hello world") + .chat(ChatRequest.builder().messages(UserMessage.from("any input")).build()) + .aiMessage() + .text()); + assertEquals( + "fixed response", + new DummyChatModel("fixed response") + .chat(ChatRequest.builder().messages(UserMessage.from("another input")).build()) + .aiMessage() + .text()); + assertEquals( + "dummy response", + DummyChatModel.builder() + .build() + .chat(ChatRequest.builder().messages(UserMessage.from("default")).build()) + .aiMessage() + .text()); + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManager.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManager.java index 60e97e5a6f19..dc1b67e0debb 100644 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManager.java +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManager.java @@ -68,11 +68,11 @@ public void testRestManagerEndpoints() throws Exception { final String openAiClassName = "dev.langchain4j.model.openai.OpenAiChatModel"; // fails — no params provided - String model = "{ \"name\":\"testChatModel1\", \"class\":\"" + openAiClassName + "\"}"; - assertJPut(ManagedChatModelStore.REST_END_POINT, model, "/responseHeader/status==400"); +// String model = "{ \"name\":\"testChatModel1\", \"class\":\"" + openAiClassName + "\"}"; +// assertJPut(ManagedChatModelStore.REST_END_POINT, model, "/responseHeader/status==400"); // success - model = + String model = "{ name:\"testChatModel2\", class:\"" + openAiClassName + "\"," From 827548a6549343e8bfe53d11715a162c0e3e05ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Rinaldi?= Date: Thu, 26 Mar 2026 12:32:55 +0100 Subject: [PATCH 3/9] [llm-document-enrichment] Add promptFile feature to DocumentEnrichmentUpdateProcessorFactory --- ...umentEnrichmentUpdateProcessorFactory.java | 63 +++++++++++++++++-- .../conf/prompt-no-placeholder.txt | 1 + .../solr/collection1/conf/prompt.txt | 1 + ...tEnrichmentUpdateProcessorFactoryTest.java | 51 ++++++++++++++- 4 files changed, 109 insertions(+), 7 deletions(-) create mode 100644 solr/modules/language-models/src/test-files/solr/collection1/conf/prompt-no-placeholder.txt create mode 100644 solr/modules/language-models/src/test-files/solr/collection1/conf/prompt.txt diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java index b40904f55aca..8e26971da485 100644 --- a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java @@ -17,6 +17,9 @@ package org.apache.solr.languagemodels.documentenrichment.update.processor; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.RequiredSolrParams; import org.apache.solr.common.params.SolrParams; @@ -47,24 +50,38 @@ * <processor class="solr.llm.documentenrichment.update.processor.DocumentEnrichmentUpdateProcessorFactory"> * <str name="inputField">textualField</str> * <str name="outputField">anotherTextualField</str> + * <str name="prompt">Summarize: {input}</str> * <str name="model">ChatModel</str> * </processor> * * + *

Alternatively, the prompt can be loaded from a text file using {@code promptFile}: * - * * + *

+ * <processor class="solr.llm.documentenrichment.update.processor.DocumentEnrichmentUpdateProcessorFactory">
+ *   <str name="inputField">textualField</str>
+ *   <str name="outputField">anotherTextualField</str>
+ *   <str name="promptFile">prompt.txt</str>
+ *   <str name="model">ChatModel</str>
+ * </processor>
+ * 
+ * + *

Exactly one of {@code prompt} or {@code promptFile} must be provided. The prompt (from either + * source) must contain the {@code {input}} placeholder. */ public class DocumentEnrichmentUpdateProcessorFactory extends UpdateRequestProcessorFactory implements SolrCoreAware, ManagedResourceObserver { private static final String INPUT_FIELD_PARAM = "inputField"; private static final String OUTPUT_FIELD_PARAM = "outputField"; private static final String PROMPT = "prompt"; + private static final String PROMPT_FILE = "promptFile"; private static final String MODEL_NAME = "model"; private ManagedChatModelStore modelStore = null; private String inputField; // TODO: change with a list of input fields (check how it's done in other UpdateProcessor that supports this behaviour) private String outputField; private String prompt; + private String promptFile; private String modelName; private SolrParams params; @@ -74,19 +91,51 @@ public void init(final NamedList args) { RequiredSolrParams required = params.required(); inputField = required.get(INPUT_FIELD_PARAM); outputField = required.get(OUTPUT_FIELD_PARAM); - prompt = required.get(PROMPT); - if (!prompt.contains("{input}")) { + modelName = required.get(MODEL_NAME); + + String inlinePrompt = params.get(PROMPT); + String promptFilePath = params.get(PROMPT_FILE); + + if (inlinePrompt == null && promptFilePath == null) { throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, - "prompt must contain {input} placeholder"); + "Either 'prompt' or 'promptFile' must be provided"); } - modelName = required.get(MODEL_NAME); + if (inlinePrompt != null && promptFilePath != null) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, + "Only one of 'prompt' or 'promptFile' can be provided, not both"); + } + if (inlinePrompt != null) { + if (!inlinePrompt.contains("{input}")) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, + "prompt must contain {input} placeholder"); + } + this.prompt = inlinePrompt; + } + this.promptFile = promptFilePath; } @Override public void inform(SolrCore core) { final SolrResourceLoader solrResourceLoader = core.getResourceLoader(); ManagedChatModelStore.registerManagedChatModelStore(solrResourceLoader, this); + if (promptFile != null) { + try (InputStream is = solrResourceLoader.openResource(promptFile)) { + prompt = new String(is.readAllBytes(), StandardCharsets.UTF_8).trim(); + } catch (IOException e) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, + "Cannot read prompt file: " + promptFile, + e); + } + if (!prompt.contains("{input}")) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, + "prompt must contain {input} placeholder"); + } + } } @Override @@ -153,4 +202,8 @@ public String getPrompt() { public String getModelName() { return modelName; } + + public String getPromptFile() { + return promptFile; + } } diff --git a/solr/modules/language-models/src/test-files/solr/collection1/conf/prompt-no-placeholder.txt b/solr/modules/language-models/src/test-files/solr/collection1/conf/prompt-no-placeholder.txt new file mode 100644 index 000000000000..c43c5399dc07 --- /dev/null +++ b/solr/modules/language-models/src/test-files/solr/collection1/conf/prompt-no-placeholder.txt @@ -0,0 +1 @@ +Summarize this content without the placeholder. \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/solr/collection1/conf/prompt.txt b/solr/modules/language-models/src/test-files/solr/collection1/conf/prompt.txt new file mode 100644 index 000000000000..a9e89d5bd9dc --- /dev/null +++ b/solr/modules/language-models/src/test-files/solr/collection1/conf/prompt.txt @@ -0,0 +1 @@ +Summarize this content: {input} \ No newline at end of file diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java index 91d773dbeb61..15fb9c37ad16 100644 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java @@ -99,7 +99,7 @@ public void init_nullOutputField_shouldThrowExceptionWithDetailedMessage() { } @Test - public void init_nullPrompt_shouldThrowExceptionWithDetailedMessage() { + public void init_neitherPromptNorPromptFile_shouldThrowExceptionWithDetailedMessage() { NamedList args = new NamedList<>(); args.add("inputField", "string_field"); args.add("outputField", "enriched_field"); @@ -108,7 +108,54 @@ public void init_nullPrompt_shouldThrowExceptionWithDetailedMessage() { DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); SolrException e = assertThrows(SolrException.class, () -> factory.init(args)); - assertEquals("Missing required parameter: prompt", e.getMessage()); + assertEquals("Either 'prompt' or 'promptFile' must be provided", e.getMessage()); + } + + @Test + public void init_bothPromptAndPromptFile_shouldThrowExceptionWithDetailedMessage() { + NamedList args = new NamedList<>(); + args.add("inputField", "string_field"); + args.add("outputField", "enriched_field"); + args.add("prompt", "Summarize: {input}"); + args.add("promptFile", "prompt.txt"); + args.add("model", "model1"); + + DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); + + SolrException e = assertThrows(SolrException.class, () -> factory.init(args)); + assertEquals("Only one of 'prompt' or 'promptFile' can be provided, not both", e.getMessage()); + } + + @Test + public void init_promptFile_shouldLoadPromptFromFile() { + NamedList args = new NamedList<>(); + args.add("inputField", "string_field"); + args.add("outputField", "enriched_field"); + args.add("promptFile", "prompt.txt"); + args.add("model", "model1"); + + DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); + factory.init(args); + factory.inform(collection1); + + assertEquals("prompt.txt", factory.getPromptFile()); + assertNotNull(factory.getPrompt()); + assertTrue(factory.getPrompt().contains("{input}")); + } + + @Test + public void init_promptFileWithMissingPlaceholder_shouldThrowExceptionWithDetailedMessage() { + NamedList args = new NamedList<>(); + args.add("inputField", "string_field"); + args.add("outputField", "enriched_field"); + args.add("promptFile", "prompt-no-placeholder.txt"); + args.add("model", "model1"); + + DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); + factory.init(args); + + SolrException e = assertThrows(SolrException.class, () -> factory.inform(collection1)); + assertEquals("prompt must contain {input} placeholder", e.getMessage()); } @Test From cc943170231017069f4d850ae9a09f64a5e9daad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Rinaldi?= Date: Fri, 27 Mar 2026 15:58:57 +0100 Subject: [PATCH 4/9] [llm-document-enrichment] Add multiple inputField support + tests --- .../DocumentEnrichmentUpdateProcessor.java | 63 ++++--- ...umentEnrichmentUpdateProcessorFactory.java | 111 ++++++++---- .../collection1/conf/prompt-multi-field.txt | 1 + .../solr/collection1/conf/prompt.txt | 2 +- .../conf/schema-language-models.xml | 1 + ...richment-update-request-processor-only.xml | 2 +- .../conf/solrconfig-document-enrichment.xml | 30 +++- ...tEnrichmentUpdateProcessorFactoryTest.java | 161 ++++++++++++++---- ...DocumentEnrichmentUpdateProcessorTest.java | 113 ++++++++++++ 9 files changed, 389 insertions(+), 95 deletions(-) create mode 100644 solr/modules/language-models/src/test-files/solr/collection1/conf/prompt-multi-field.txt diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessor.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessor.java index a50160924e96..5abd5629d8bb 100644 --- a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessor.java +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessor.java @@ -19,6 +19,7 @@ import java.io.IOException; import java.lang.invoke.MethodHandles; +import java.util.List; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.SolrInputField; import org.apache.solr.languagemodels.documentenrichment.model.SolrChatModel; @@ -34,13 +35,13 @@ class DocumentEnrichmentUpdateProcessor extends UpdateRequestProcessor { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private IndexSchema schema; - private final String inputField; + private final List inputFields; private final String outputField; private final String prompt; private SolrChatModel chatModel; public DocumentEnrichmentUpdateProcessor( - String inputField, + List inputFields, String outputField, String prompt, SolrChatModel chatModel, @@ -48,10 +49,9 @@ public DocumentEnrichmentUpdateProcessor( UpdateRequestProcessor next) { super(next); this.schema = req.getSchema(); - // prompt must contain "{input}" where the user wants to inject the input data to populate outputField - this.prompt = prompt; - this.inputField = inputField; + this.inputFields = inputFields; this.outputField = outputField; + this.prompt = prompt; this.chatModel = chatModel; } @@ -62,28 +62,35 @@ public DocumentEnrichmentUpdateProcessor( @Override public void processAdd(AddUpdateCommand cmd) throws IOException { SolrInputDocument doc = cmd.getSolrInputDocument(); - SolrInputField inputFieldContent = doc.get(inputField); - if (!isNullOrEmpty(inputFieldContent)) { - try { - // as for now, only a plain text as prompt is sent to the model (no support for tools/skills/agents) - String toInject = inputFieldContent.getValue().toString(); - String injectedPrompt = prompt.replace("{input}", toInject); - String response = chatModel.chat(injectedPrompt); - /* TODO: check if the outputField is multivalued and adapt the code/llm call to deal with lists also, together - with structured output support - */ - doc.setField(outputField, response); - } catch (RuntimeException chatModelFailure) { - if (log.isErrorEnabled()) { - SchemaField uniqueKeyField = schema.getUniqueKeyField(); - String uniqueKeyFieldName = uniqueKeyField.getName(); - log.error( - "Could not process: {} for the document with {}: {}", - inputField, - uniqueKeyFieldName, - doc.getFieldValue(uniqueKeyFieldName), - chatModelFailure); - } + + // Collect all field values; skip enrichment if any declared field is null or empty + String injectedPrompt = prompt; + for (String fieldName : inputFields) { + SolrInputField field = doc.get(fieldName); + if (isNullOrEmpty(field)) { + super.processAdd(cmd); + return; + } + injectedPrompt = injectedPrompt.replace("{" + fieldName + "}", field.getValue().toString()); + } + + try { + // as for now, only a plain text as prompt is sent to the model (no support for tools/skills/agents) + String response = chatModel.chat(injectedPrompt); + /* TODO: check if the outputField is multivalued and adapt the code/llm call to deal with lists also, together + with structured output support + */ + doc.setField(outputField, response); + } catch (RuntimeException chatModelFailure) { + if (log.isErrorEnabled()) { + SchemaField uniqueKeyField = schema.getUniqueKeyField(); + String uniqueKeyFieldName = uniqueKeyField.getName(); + log.error( + "Could not process fields {} for the document with {}: {}", + inputFields, + uniqueKeyFieldName, + doc.getFieldValue(uniqueKeyFieldName), + chatModelFailure); } } super.processAdd(cmd); @@ -94,4 +101,4 @@ protected boolean isNullOrEmpty(SolrInputField inputFieldContent) { || inputFieldContent.getValue() == null || inputFieldContent.getValue().toString().isEmpty()); } -} +} \ No newline at end of file diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java index 8e26971da485..50e710838b63 100644 --- a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java @@ -20,6 +20,13 @@ import java.io.IOException; import java.io.InputStream; import java.nio.charset.StandardCharsets; +import java.util.Collection; +import java.util.HashSet; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.RequiredSolrParams; import org.apache.solr.common.params.SolrParams; @@ -32,25 +39,28 @@ import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.rest.ManagedResource; import org.apache.solr.rest.ManagedResourceObserver; -import org.apache.solr.schema.StrField; -import org.apache.solr.schema.TextField; import org.apache.solr.schema.FieldType; import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.SchemaField; +import org.apache.solr.schema.StrField; +import org.apache.solr.schema.TextField; import org.apache.solr.update.processor.UpdateRequestProcessor; import org.apache.solr.update.processor.UpdateRequestProcessorFactory; import org.apache.solr.util.plugin.SolrCoreAware; /** - * Insert in an existing field the output of the model coming from a textual field value. + * Insert in an existing field the output of the model coming from one or more textual field values. * - *

The parameters supported are: + *

One or more {@code inputField} parameters specify the Solr fields to use as input. Each field + * name must appear as a {@code {fieldName}} placeholder in the prompt. Exactly one of {@code + * prompt} or {@code promptFile} must be provided. * *

  * <processor class="solr.llm.documentenrichment.update.processor.DocumentEnrichmentUpdateProcessorFactory">
- *   <str name="inputField">textualField</str>
- *   <str name="outputField">anotherTextualField</str>
- *   <str name="prompt">Summarize: {input}</str>
+ *   <str name="inputField">title_field</str>
+ *   <str name="inputField">body_field</str>
+ *   <str name="outputField">enriched_field</str>
+ *   <str name="prompt">Title: {title_field}. Body: {body_field}.</str>
  *   <str name="model">ChatModel</str>
  * </processor>
  * 
@@ -59,15 +69,22 @@ * *
  * <processor class="solr.llm.documentenrichment.update.processor.DocumentEnrichmentUpdateProcessorFactory">
- *   <str name="inputField">textualField</str>
- *   <str name="outputField">anotherTextualField</str>
+ *   <str name="inputField">title_field</str>
+ *   <str name="outputField">enriched_field</str>
  *   <str name="promptFile">prompt.txt</str>
  *   <str name="model">ChatModel</str>
  * </processor>
  * 
* - *

Exactly one of {@code prompt} or {@code promptFile} must be provided. The prompt (from either - * source) must contain the {@code {input}} placeholder. + *

Validation rules: + * + *

    + *
  • At least one {@code inputField} must be declared. + *
  • Exactly one of {@code prompt} or {@code promptFile} must be provided. + *
  • Every declared {@code inputField} must have a corresponding {@code {fieldName}} placeholder + * in the prompt. + *
  • Every {@code {placeholder}} in the prompt must correspond to a declared {@code inputField}. + *
*/ public class DocumentEnrichmentUpdateProcessorFactory extends UpdateRequestProcessorFactory implements SolrCoreAware, ManagedResourceObserver { @@ -76,9 +93,11 @@ public class DocumentEnrichmentUpdateProcessorFactory extends UpdateRequestProce private static final String PROMPT = "prompt"; private static final String PROMPT_FILE = "promptFile"; private static final String MODEL_NAME = "model"; + private static final Pattern PLACEHOLDER_PATTERN = Pattern.compile("\\{([^}]+)\\}"); + private ManagedChatModelStore modelStore = null; - private String inputField; // TODO: change with a list of input fields (check how it's done in other UpdateProcessor that supports this behaviour) + private List inputFields; private String outputField; private String prompt; private String promptFile; @@ -87,9 +106,18 @@ public class DocumentEnrichmentUpdateProcessorFactory extends UpdateRequestProce @Override public void init(final NamedList args) { + // removeConfigArgs handles both multiple and + // and must be called before toSolrParams() since it mutates args in place + Collection fieldNames = args.removeConfigArgs(INPUT_FIELD_PARAM); + if (fieldNames.isEmpty()) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, + "At least one 'inputField' must be provided"); + } + inputFields = List.copyOf(fieldNames); + params = args.toSolrParams(); RequiredSolrParams required = params.required(); - inputField = required.get(INPUT_FIELD_PARAM); outputField = required.get(OUTPUT_FIELD_PARAM); modelName = required.get(MODEL_NAME); @@ -107,11 +135,7 @@ public void init(final NamedList args) { "Only one of 'prompt' or 'promptFile' can be provided, not both"); } if (inlinePrompt != null) { - if (!inlinePrompt.contains("{input}")) { - throw new SolrException( - SolrException.ErrorCode.SERVER_ERROR, - "prompt must contain {input} placeholder"); - } + validatePromptPlaceholders(inlinePrompt, inputFields); this.prompt = inlinePrompt; } this.promptFile = promptFilePath; @@ -130,11 +154,7 @@ public void inform(SolrCore core) { "Cannot read prompt file: " + promptFile, e); } - if (!prompt.contains("{input}")) { - throw new SolrException( - SolrException.ErrorCode.SERVER_ERROR, - "prompt must contain {input} placeholder"); - } + validatePromptPlaceholders(prompt, inputFields); } } @@ -154,16 +174,17 @@ public UpdateRequestProcessor getInstance( SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next) { IndexSchema latestSchema = req.getCore().getLatestSchema(); - if (!latestSchema.isDynamicField(inputField) && !latestSchema.hasExplicitField(inputField)) { - throw new SolrException( - SolrException.ErrorCode.SERVER_ERROR, "undefined field: \"" + inputField + "\""); + for (String fieldName : inputFields) { + if (!latestSchema.isDynamicField(fieldName) && !latestSchema.hasExplicitField(fieldName)) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, "undefined field: \"" + fieldName + "\""); + } } final SchemaField outputFieldSchema = latestSchema.getField(outputField); assertIsTextualField(outputFieldSchema); - ManagedChatModelStore modelStore = - ManagedChatModelStore.getManagedModelStore(req.getCore()); + ManagedChatModelStore modelStore = ManagedChatModelStore.getManagedModelStore(req.getCore()); SolrChatModel chatModel = modelStore.getModel(modelName); if (chatModel == null) { throw new SolrException( @@ -174,8 +195,10 @@ public UpdateRequestProcessor getInstance( + ManagedChatModelStore.REST_END_POINT); } - return new DocumentEnrichmentUpdateProcessor(inputField, outputField, prompt, chatModel, req, next); + return new DocumentEnrichmentUpdateProcessor( + inputFields, outputField, prompt, chatModel, req, next); } + // This is used on the outputField. Now the support is limited. Can be changed with structured outputs. protected void assertIsTextualField(SchemaField schemaField) { FieldType fieldType = schemaField.getType(); @@ -187,8 +210,32 @@ protected void assertIsTextualField(SchemaField schemaField) { } } - public String getInputField() { - return inputField; + private static void validatePromptPlaceholders(String prompt, List fieldNames) { + Set promptPlaceholders = new LinkedHashSet<>(); + Matcher m = PLACEHOLDER_PATTERN.matcher(prompt); + while (m.find()) { + promptPlaceholders.add(m.group(1)); + } + + Set missingInPrompt = new LinkedHashSet<>(fieldNames); + missingInPrompt.removeAll(promptPlaceholders); + if (!missingInPrompt.isEmpty()) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, + "prompt is missing placeholders for inputField(s): " + missingInPrompt); + } + + Set unknownInPrompt = new LinkedHashSet<>(promptPlaceholders); + unknownInPrompt.removeAll(new HashSet<>(fieldNames)); + if (!unknownInPrompt.isEmpty()) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, + "prompt contains placeholders not declared as inputField(s): " + unknownInPrompt); + } + } + + public List getInputFields() { + return inputFields; } public String getOutputField() { @@ -206,4 +253,4 @@ public String getModelName() { public String getPromptFile() { return promptFile; } -} +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/solr/collection1/conf/prompt-multi-field.txt b/solr/modules/language-models/src/test-files/solr/collection1/conf/prompt-multi-field.txt new file mode 100644 index 000000000000..65c2f125e36c --- /dev/null +++ b/solr/modules/language-models/src/test-files/solr/collection1/conf/prompt-multi-field.txt @@ -0,0 +1 @@ +Title: {string_field}. Body: {body_field}. \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/solr/collection1/conf/prompt.txt b/solr/modules/language-models/src/test-files/solr/collection1/conf/prompt.txt index a9e89d5bd9dc..502449a5cf5d 100644 --- a/solr/modules/language-models/src/test-files/solr/collection1/conf/prompt.txt +++ b/solr/modules/language-models/src/test-files/solr/collection1/conf/prompt.txt @@ -1 +1 @@ -Summarize this content: {input} \ No newline at end of file +Summarize this content: {string_field} \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/solr/collection1/conf/schema-language-models.xml b/solr/modules/language-models/src/test-files/solr/collection1/conf/schema-language-models.xml index 5334762cc388..f7ad738784f6 100644 --- a/solr/modules/language-models/src/test-files/solr/collection1/conf/schema-language-models.xml +++ b/solr/modules/language-models/src/test-files/solr/collection1/conf/schema-language-models.xml @@ -36,6 +36,7 @@ + diff --git a/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-document-enrichment-update-request-processor-only.xml b/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-document-enrichment-update-request-processor-only.xml index 522fbfe09267..7aa85a8b362a 100644 --- a/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-document-enrichment-update-request-processor-only.xml +++ b/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-document-enrichment-update-request-processor-only.xml @@ -53,7 +53,7 @@ string_field enriched_field - Summarize this content: {input} + Summarize this content: {string_field} dummy-chat-1 diff --git a/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-document-enrichment.xml b/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-document-enrichment.xml index 02015f6296ab..25f07fea4272 100644 --- a/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-document-enrichment.xml +++ b/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-document-enrichment.xml @@ -53,7 +53,7 @@ string_field enriched_field - Summarize this content: {input} + Summarize this content: {string_field} dummy-chat-1 @@ -63,7 +63,7 @@ string_field enriched_field - Summarize this content: {input} + Summarize this content: {string_field} exception-throwing-chat-model @@ -74,10 +74,32 @@ string_field enriched_field - Summarize this content: {input} + Summarize this content: {string_field} dummy-chat-1 - + + + string_field + body_field + enriched_field + Title: {string_field}. Body: {body_field}. + dummy-chat-1 + + + + + + + string_field + body_field + enriched_field + Title: {string_field}. Body: {body_field}. + exception-throwing-chat-model + + + + + \ No newline at end of file diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java index 15fb9c37ad16..2f2a91686859 100644 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java @@ -16,6 +16,7 @@ */ package org.apache.solr.languagemodels.documentenrichment.update.processor; +import java.util.List; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.util.NamedList; @@ -60,36 +61,51 @@ public void init_fullArgs_shouldInitAllParams() { NamedList args = new NamedList<>(); args.add("inputField", "string_field"); args.add("outputField", "enriched_field"); - args.add("prompt", "Summarize: {input}"); + args.add("prompt", "Summarize: {string_field}"); args.add("model", "model1"); DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); factory.init(args); - assertEquals("string_field", factory.getInputField()); + assertEquals(List.of("string_field"), factory.getInputFields()); assertEquals("enriched_field", factory.getOutputField()); - assertEquals("Summarize: {input}", factory.getPrompt()); + assertEquals("Summarize: {string_field}", factory.getPrompt()); assertEquals("model1", factory.getModelName()); } @Test - public void init_nullInputField_shouldThrowExceptionWithDetailedMessage() { + public void init_multipleInputFields_shouldInitAllFields() { + NamedList args = new NamedList<>(); + args.add("inputField", "string_field"); + args.add("inputField", "body_field"); + args.add("outputField", "enriched_field"); + args.add("prompt", "Title: {string_field}. Body: {body_field}."); + args.add("model", "model1"); + + DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); + factory.init(args); + + assertEquals(List.of("string_field", "body_field"), factory.getInputFields()); + } + + @Test + public void init_noInputField_shouldThrowExceptionWithDetailedMessage() { NamedList args = new NamedList<>(); args.add("outputField", "enriched_field"); - args.add("prompt", "Summarize: {input}"); + args.add("prompt", "Summarize: {string_field}"); args.add("model", "model1"); DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); SolrException e = assertThrows(SolrException.class, () -> factory.init(args)); - assertEquals("Missing required parameter: inputField", e.getMessage()); + assertEquals("At least one 'inputField' must be provided", e.getMessage()); } @Test public void init_nullOutputField_shouldThrowExceptionWithDetailedMessage() { NamedList args = new NamedList<>(); args.add("inputField", "string_field"); - args.add("prompt", "Summarize: {input}"); + args.add("prompt", "Summarize: {string_field}"); args.add("model", "model1"); DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); @@ -116,7 +132,7 @@ public void init_bothPromptAndPromptFile_shouldThrowExceptionWithDetailedMessage NamedList args = new NamedList<>(); args.add("inputField", "string_field"); args.add("outputField", "enriched_field"); - args.add("prompt", "Summarize: {input}"); + args.add("prompt", "Summarize: {string_field}"); args.add("promptFile", "prompt.txt"); args.add("model", "model1"); @@ -127,49 +143,48 @@ public void init_bothPromptAndPromptFile_shouldThrowExceptionWithDetailedMessage } @Test - public void init_promptFile_shouldLoadPromptFromFile() { + public void init_promptMissingPlaceholderForDeclaredField_shouldThrowExceptionWithDetailedMessage() { NamedList args = new NamedList<>(); args.add("inputField", "string_field"); args.add("outputField", "enriched_field"); - args.add("promptFile", "prompt.txt"); + args.add("prompt", "Summarize:"); args.add("model", "model1"); DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); - factory.init(args); - factory.inform(collection1); - assertEquals("prompt.txt", factory.getPromptFile()); - assertNotNull(factory.getPrompt()); - assertTrue(factory.getPrompt().contains("{input}")); + SolrException e = assertThrows(SolrException.class, () -> factory.init(args)); + assertEquals("prompt is missing placeholders for inputField(s): [string_field]", e.getMessage()); } @Test - public void init_promptFileWithMissingPlaceholder_shouldThrowExceptionWithDetailedMessage() { + public void init_promptMissingOnePlaceholderOfMultipleFields_shouldThrowExceptionWithDetailedMessage() { NamedList args = new NamedList<>(); args.add("inputField", "string_field"); + args.add("inputField", "body_field"); args.add("outputField", "enriched_field"); - args.add("promptFile", "prompt-no-placeholder.txt"); + args.add("prompt", "Title: {string_field}."); args.add("model", "model1"); DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); - factory.init(args); - SolrException e = assertThrows(SolrException.class, () -> factory.inform(collection1)); - assertEquals("prompt must contain {input} placeholder", e.getMessage()); + SolrException e = assertThrows(SolrException.class, () -> factory.init(args)); + assertEquals("prompt is missing placeholders for inputField(s): [body_field]", e.getMessage()); } @Test - public void init_missingPlaceholderPrompt_shouldThrowExceptionWithDetailedMessage() { + public void init_promptHasExtraPlaceholderNotDeclaredAsInputField_shouldThrowExceptionWithDetailedMessage() { NamedList args = new NamedList<>(); args.add("inputField", "string_field"); args.add("outputField", "enriched_field"); - args.add("prompt", "Summarize:"); + args.add("prompt", "Title: {string_field}. Extra: {unknown_field}."); args.add("model", "model1"); DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); SolrException e = assertThrows(SolrException.class, () -> factory.init(args)); - assertEquals("prompt must contain {input} placeholder", e.getMessage()); + assertEquals( + "prompt contains placeholders not declared as inputField(s): [unknown_field]", + e.getMessage()); } @Test @@ -177,7 +192,7 @@ public void init_nullModel_shouldThrowExceptionWithDetailedMessage() { NamedList args = new NamedList<>(); args.add("inputField", "string_field"); args.add("outputField", "enriched_field"); - args.add("prompt", "Summarize: {input}"); + args.add("prompt", "Summarize: {string_field}"); DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); @@ -185,6 +200,57 @@ public void init_nullModel_shouldThrowExceptionWithDetailedMessage() { assertEquals("Missing required parameter: model", e.getMessage()); } + @Test + public void init_promptFile_shouldLoadPromptFromFile() { + NamedList args = new NamedList<>(); + args.add("inputField", "string_field"); + args.add("outputField", "enriched_field"); + args.add("promptFile", "prompt.txt"); + args.add("model", "model1"); + + DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); + factory.init(args); + factory.inform(collection1); + + assertEquals("prompt.txt", factory.getPromptFile()); + assertNotNull(factory.getPrompt()); + assertTrue(factory.getPrompt().contains("{string_field}")); + } + + @Test + public void init_promptFileMultiField_shouldLoadAndValidateBothPlaceholders() { + NamedList args = new NamedList<>(); + args.add("inputField", "string_field"); + args.add("inputField", "body_field"); + args.add("outputField", "enriched_field"); + args.add("promptFile", "prompt-multi-field.txt"); + args.add("model", "model1"); + + DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); + factory.init(args); + factory.inform(collection1); + + assertNotNull(factory.getPrompt()); + assertTrue(factory.getPrompt().contains("{string_field}")); + assertTrue(factory.getPrompt().contains("{body_field}")); + } + + @Test + public void init_promptFileWithMissingPlaceholder_shouldThrowExceptionInInform() { + NamedList args = new NamedList<>(); + args.add("inputField", "string_field"); + args.add("outputField", "enriched_field"); + args.add("promptFile", "prompt-no-placeholder.txt"); + args.add("model", "model1"); + + DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); + factory.init(args); + + SolrException e = assertThrows(SolrException.class, () -> factory.inform(collection1)); + assertEquals( + "prompt is missing placeholders for inputField(s): [string_field]", e.getMessage()); + } + /* Following tests depend on a real solr schema and depend on BeforeClass-AfterClass methods */ @Test @@ -192,7 +258,7 @@ public void init_notExistentOutputField_shouldThrowExceptionWithDetailedMessage( NamedList args = new NamedList<>(); args.add("inputField", "string_field"); args.add("outputField", "notExistentOutput"); - args.add("prompt", "Summarize: {input}"); + args.add("prompt", "Summarize: {string_field}"); args.add("model", "model1"); DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); @@ -210,7 +276,7 @@ public void init_notTextualOutputField_shouldThrowExceptionWithDetailedMessage() NamedList args = new NamedList<>(); args.add("inputField", "string_field"); args.add("outputField", "vector"); - args.add("prompt", "Summarize: {input}"); + args.add("prompt", "Summarize: {string_field}"); args.add("model", "model1"); DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); @@ -228,7 +294,25 @@ public void init_notExistentInputField_shouldThrowExceptionWithDetailedMessage() NamedList args = new NamedList<>(); args.add("inputField", "notExistentInput"); args.add("outputField", "enriched_field"); - args.add("prompt", "Summarize: {input}"); + args.add("prompt", "Summarize: {notExistentInput}"); + args.add("model", "model1"); + + DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); + ModifiableSolrParams params = new ModifiableSolrParams(); + SolrQueryRequestBase req = new SolrQueryRequestBase(collection1, params) {}; + factory.init(args); + + SolrException e = assertThrows(SolrException.class, () -> factory.getInstance(req, null, null)); + assertEquals("undefined field: \"notExistentInput\"", e.getMessage()); + } + + @Test + public void init_multipleInputFields_oneNotExistent_shouldThrowExceptionWithDetailedMessage() { + NamedList args = new NamedList<>(); + args.add("inputField", "string_field"); + args.add("inputField", "notExistentInput"); + args.add("outputField", "enriched_field"); + args.add("prompt", "Title: {string_field}. Body: {notExistentInput}."); args.add("model", "model1"); DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); @@ -243,10 +327,29 @@ public void init_notExistentInputField_shouldThrowExceptionWithDetailedMessage() @Test public void init_dynamicInputField_shouldNotThrowException() { UpdateRequestProcessor instance = - createUpdateProcessor("text_s", "enriched_field", collection1, "model1"); + createUpdateProcessor("text_s", "enriched_field", collection1, "model2"); assertNotNull(instance); } + @Test + public void init_multipleDynamicInputFields_shouldNotThrowException() { + NamedList args = new NamedList<>(); + ManagedChatModelStore.getManagedModelStore(collection1) + .addModel(new SolrChatModel("model1", null, null)); + args.add("inputField", "text_s"); + args.add("inputField", "body_field"); + args.add("outputField", "enriched_field"); + args.add("prompt", "Title: {text_s}. Body: {body_field}."); + args.add("model", "model1"); + + DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); + ModifiableSolrParams params = new ModifiableSolrParams(); + factory.init(args); + + SolrQueryRequestBase req = new SolrQueryRequestBase(collection1, params) {}; + assertNotNull(factory.getInstance(req, null, null)); + } + private UpdateRequestProcessor createUpdateProcessor( String inputFieldName, String outputFieldName, SolrCore core, String modelName) { NamedList args = new NamedList<>(); @@ -255,7 +358,7 @@ private UpdateRequestProcessor createUpdateProcessor( .addModel(new SolrChatModel(modelName, null, null)); args.add("inputField", inputFieldName); args.add("outputField", outputFieldName); - args.add("prompt", "Summarize: {input}"); + args.add("prompt", "Summarize: {" + inputFieldName + "}"); args.add("model", modelName); DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java index 76d691cdebad..d715fb6e8e93 100644 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java @@ -201,6 +201,119 @@ public void processAtomicUpdate_shouldReplaceExistingEnrichedFieldNotAppend() th "/response/docs/[1]/enriched_field=='enriched content'"); } + // --- multi-field tests --- + + @Test + public void processAdd_multipleInputFields_allPresent_shouldEnrichDocument() throws Exception { + loadChatModel("dummy-chat-model.json"); + + addWithChain( + sdoc("id", "99", "string_field", "Vegeta is the saiyan prince.", "body_field", "He is very proud."), + "documentEnrichmentMultiField"); + addWithChain( + sdoc("id", "98", "string_field", "Kakaroth is a saiyan.", "body_field", "He grew up on Earth."), + "documentEnrichmentMultiField"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery(); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "/response/docs/[0]/enriched_field=='enriched content'", + "/response/docs/[1]/id=='98'", + "/response/docs/[1]/enriched_field=='enriched content'"); + } + + @Test + public void processAdd_multipleInputFields_firstFieldNull_shouldSkipEnrichment() throws Exception { + loadChatModel("dummy-chat-model.json"); + + addWithChain( + sdoc("id", "99", "body_field", "He is very proud."), // string_field absent + "documentEnrichmentMultiField"); + addWithChain( + sdoc("id", "98", "body_field", "He is very jealous."), // string_field absent + "documentEnrichmentMultiField"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery(); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "!/response/docs/[0]/enriched_field==", + "/response/docs/[1]/id=='98'", + "!/response/docs/[1]/enriched_field=="); + } + + @Test + public void processAdd_multipleInputFields_secondFieldEmpty_shouldSkipEnrichment() throws Exception { + loadChatModel("dummy-chat-model.json"); + + addWithChain( + sdoc("id", "99", "string_field", "Vegeta is the saiyan prince.", "body_field", ""), + "documentEnrichmentMultiField"); + addWithChain( + sdoc("id", "98", "string_field", "Goku is the best saiyan.", "body_field", ""), + "documentEnrichmentMultiField"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery(); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "!/response/docs/[0]/enriched_field==", + "/response/docs/[1]/id=='98'", + "!/response/docs/[1]/enriched_field=="); + } + + @Test + public void processAdd_multipleInputFields_bothFieldsAbsent_shouldSkipEnrichment() throws Exception { + loadChatModel("dummy-chat-model.json"); + + addWithChain(sdoc("id", "99"), "documentEnrichmentMultiField"); + addWithChain(sdoc("id", "98"), "documentEnrichmentMultiField"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery(); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "!/response/docs/[0]/enriched_field==", + "/response/docs/[1]/id=='98'", + "!/response/docs/[1]/enriched_field=="); + } + + @Test + public void processAdd_multipleInputFields_failingModel_shouldLogAndSkipEnrichment() throws Exception { + loadChatModel("exception-throwing-chat-model.json"); + + addWithChain( + sdoc("id", "99", "string_field", "Vegeta is the saiyan prince.", "body_field", "He is very proud."), + "failingDocumentEnrichmentMultiField"); + addWithChain( + sdoc("id", "98", "string_field", "Kakaroth is a saiyan.", "body_field", "He grew up on Earth."), + "failingDocumentEnrichmentMultiField"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery(); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "!/response/docs/[0]/enriched_field==", + "/response/docs/[1]/id=='98'", + "!/response/docs/[1]/enriched_field=="); + } + private SolrQuery getEnrichmentQuery() { final SolrQuery query = new SolrQuery(); query.setQuery("*:*"); From c723362b1c83f2f50b43b71761970dc60e9f746b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Rinaldi?= Date: Mon, 30 Mar 2026 16:52:10 +0200 Subject: [PATCH 5/9] [llm-document-enrchment] Add supprot for: - multivalued outputField - outputField different from Str/Text, with numeric, boolean and date --- .../model/SolrChatModel.java | 35 +- .../DocumentEnrichmentUpdateProcessor.java | 25 +- ...umentEnrichmentUpdateProcessorFactory.java | 95 ++++- .../dummy-chat-model-multivalued-boolean.json | 7 + .../dummy-chat-model-multivalued-date.json | 7 + .../dummy-chat-model-multivalued-double.json | 7 + .../dummy-chat-model-multivalued-float.json | 7 + .../dummy-chat-model-multivalued-int.json | 7 + .../dummy-chat-model-multivalued-long.json | 7 + .../dummy-chat-model-multivalued-string.json | 7 + .../dummy-chat-model-single-boolean.json | 7 + .../dummy-chat-model-single-date.json | 7 + .../dummy-chat-model-single-double.json | 7 + .../dummy-chat-model-single-float.json | 7 + .../dummy-chat-model-single-int.json | 7 + .../dummy-chat-model-single-long.json | 7 + .../modelChatExamples/dummy-chat-model.json | 2 +- .../conf/schema-language-models.xml | 23 +- .../conf/solrconfig-document-enrichment.xml | 130 ++++++ ...tEnrichmentUpdateProcessorFactoryTest.java | 48 ++- ...DocumentEnrichmentUpdateProcessorTest.java | 370 ++++++++++++++++-- 21 files changed, 762 insertions(+), 57 deletions(-) create mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-boolean.json create mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-date.json create mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-double.json create mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-float.json create mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-int.json create mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-long.json create mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-string.json create mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-boolean.json create mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-date.json create mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-double.json create mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-float.json create mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-int.json create mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-long.json diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/model/SolrChatModel.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/model/SolrChatModel.java index 9d06001e5903..1cc8edb0e742 100644 --- a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/model/SolrChatModel.java +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/model/SolrChatModel.java @@ -18,18 +18,18 @@ import dev.langchain4j.data.message.UserMessage; import dev.langchain4j.model.chat.ChatModel; - +import dev.langchain4j.model.chat.request.ChatRequest; +import dev.langchain4j.model.chat.request.ResponseFormat; import java.lang.invoke.MethodHandles; import java.lang.reflect.Method; import java.time.Duration; import java.util.ArrayList; import java.util.Map; import java.util.Objects; -import dev.langchain4j.model.chat.request.ChatRequest; -import dev.langchain4j.model.chat.response.ChatResponse; import org.apache.lucene.util.Accountable; import org.apache.lucene.util.RamUsageEstimator; import org.apache.solr.common.SolrException; +import org.apache.solr.common.util.Utils; import org.apache.solr.core.SolrResourceLoader; import org.apache.solr.languagemodels.documentenrichment.store.ChatModelException; import org.apache.solr.languagemodels.documentenrichment.store.rest.ManagedChatModelStore; @@ -145,13 +145,28 @@ public SolrChatModel( this.hashCode = calculateHashCode(); } - public String chat(String text){ - ChatRequest chatRequest = ChatRequest.builder() - //.responseFormat(responseFormat) // used for structured outputs - .messages(UserMessage.from(text)) - .build(); - ChatResponse chatResponse = chatModel.chat(chatRequest); - return chatResponse.aiMessage().text(); // To change in case of structured output support + /** + * Sends a structured chat request and returns the parsed value from the {@code {"value": ...}} + * JSON object that the model is instructed to produce via {@code responseFormat}. + * + * @return the extracted value: a {@link String}, {@link Number}, {@link Boolean}, or {@link + * java.util.List} depending on the Solr output field type + */ + @SuppressWarnings("unchecked") + public Object chat(String text, ResponseFormat responseFormat) { + ChatRequest chatRequest = + ChatRequest.builder() + .responseFormat(responseFormat) + .messages(UserMessage.from(text)) + .build(); + String rawJson = chatModel.chat(chatRequest).aiMessage().text(); + Object parsed = Utils.fromJSONString(rawJson); + if (!(parsed instanceof Map map) || !map.containsKey("value")) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, + "LLM response is missing the 'value' key: " + rawJson); + } + return ((Map) map).get("value"); } @Override diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessor.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessor.java index 5abd5629d8bb..57ca29e1a7dd 100644 --- a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessor.java +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessor.java @@ -17,6 +17,7 @@ package org.apache.solr.languagemodels.documentenrichment.update.processor; +import dev.langchain4j.model.chat.request.ResponseFormat; import java.io.IOException; import java.lang.invoke.MethodHandles; import java.util.List; @@ -38,13 +39,17 @@ class DocumentEnrichmentUpdateProcessor extends UpdateRequestProcessor { private final List inputFields; private final String outputField; private final String prompt; - private SolrChatModel chatModel; + private final SolrChatModel chatModel; + private final boolean multiValued; + private final ResponseFormat responseFormat; public DocumentEnrichmentUpdateProcessor( List inputFields, String outputField, String prompt, SolrChatModel chatModel, + boolean multiValued, + ResponseFormat responseFormat, SolrQueryRequest req, UpdateRequestProcessor next) { super(next); @@ -53,6 +58,8 @@ public DocumentEnrichmentUpdateProcessor( this.outputField = outputField; this.prompt = prompt; this.chatModel = chatModel; + this.multiValued = multiValued; + this.responseFormat = responseFormat; } /** @@ -76,11 +83,15 @@ public void processAdd(AddUpdateCommand cmd) throws IOException { try { // as for now, only a plain text as prompt is sent to the model (no support for tools/skills/agents) - String response = chatModel.chat(injectedPrompt); - /* TODO: check if the outputField is multivalued and adapt the code/llm call to deal with lists also, together - with structured output support - */ - doc.setField(outputField, response); + // chatModel.chat returns the parsed value from the structured JSON response + Object value = chatModel.chat(injectedPrompt, responseFormat); + if (multiValued && value instanceof List list) { + for (Object item : list) { + doc.addField(outputField, item); + } + } else { + doc.setField(outputField, value); + } } catch (RuntimeException chatModelFailure) { if (log.isErrorEnabled()) { SchemaField uniqueKeyField = schema.getUniqueKeyField(); @@ -101,4 +112,4 @@ protected boolean isNullOrEmpty(SolrInputField inputFieldContent) { || inputFieldContent.getValue() == null || inputFieldContent.getValue().toString().isEmpty()); } -} \ No newline at end of file +} diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java index 50e710838b63..508b46fb7be7 100644 --- a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java @@ -17,6 +17,16 @@ package org.apache.solr.languagemodels.documentenrichment.update.processor; +import dev.langchain4j.model.chat.request.ResponseFormat; +import dev.langchain4j.model.chat.request.ResponseFormatType; +import dev.langchain4j.model.chat.request.json.JsonArraySchema; +import dev.langchain4j.model.chat.request.json.JsonBooleanSchema; +import dev.langchain4j.model.chat.request.json.JsonIntegerSchema; +import dev.langchain4j.model.chat.request.json.JsonNumberSchema; +import dev.langchain4j.model.chat.request.json.JsonObjectSchema; +import dev.langchain4j.model.chat.request.json.JsonSchema; +import dev.langchain4j.model.chat.request.json.JsonSchemaElement; +import dev.langchain4j.model.chat.request.json.JsonStringSchema; import java.io.IOException; import java.io.InputStream; import java.nio.charset.StandardCharsets; @@ -39,8 +49,15 @@ import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.rest.ManagedResource; import org.apache.solr.rest.ManagedResourceObserver; +import org.apache.solr.schema.BoolField; +import org.apache.solr.schema.DatePointField; +import org.apache.solr.schema.DenseVectorField; +import org.apache.solr.schema.DoublePointField; import org.apache.solr.schema.FieldType; +import org.apache.solr.schema.FloatPointField; import org.apache.solr.schema.IndexSchema; +import org.apache.solr.schema.IntPointField; +import org.apache.solr.schema.LongPointField; import org.apache.solr.schema.SchemaField; import org.apache.solr.schema.StrField; import org.apache.solr.schema.TextField; @@ -182,7 +199,10 @@ public UpdateRequestProcessor getInstance( } final SchemaField outputFieldSchema = latestSchema.getField(outputField); - assertIsTextualField(outputFieldSchema); + assertIsSupportedField(outputFieldSchema); + + ResponseFormat responseFormat = buildResponseFormat(outputFieldSchema); + boolean multiValued = outputFieldSchema.multiValued(); ManagedChatModelStore modelStore = ManagedChatModelStore.getManagedModelStore(req.getCore()); SolrChatModel chatModel = modelStore.getModel(modelName); @@ -196,17 +216,74 @@ public UpdateRequestProcessor getInstance( } return new DocumentEnrichmentUpdateProcessor( - inputFields, outputField, prompt, chatModel, req, next); + inputFields, outputField, prompt, chatModel, multiValued, responseFormat, req, next); + } + + /** + * Validates that the output field type is supported. Supported types are: textual (Str, Text), + * numeric (Int, Long, Float, Double), boolean and date. Vector and binary fields are not + * supported. + */ + protected void assertIsSupportedField(SchemaField schemaField) { + try { + toJsonSchemaElement(schemaField.getType()); + } catch (SolrException e) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, + "field type is not supported by Document Enrichment: " + schemaField.getName()); + } } - // This is used on the outputField. Now the support is limited. Can be changed with structured outputs. - protected void assertIsTextualField(SchemaField schemaField) { - FieldType fieldType = schemaField.getType(); - if (!(fieldType instanceof StrField) && !(fieldType instanceof TextField)) { + /** + * Builds a {@link ResponseFormat} that instructs the model to return a JSON object {@code + * {"value": ...}} whose value type matches the Solr field type. For multivalued fields the value + * is wrapped in a JSON array. + */ + static ResponseFormat buildResponseFormat(SchemaField schemaField) { + JsonSchemaElement valueElement = toJsonSchemaElement(schemaField.getType()); + JsonSchemaElement valueSchema = + schemaField.multiValued() + ? JsonArraySchema.builder().items(valueElement).build() // could be only supported by Gemini + // (source: https://github.com/langchain4j/langchain4j/blob/main/docs/docs/tutorials/structured-outputs.md) + // If not supported, we cannot support multivalued fields as outputField + : valueElement; + return ResponseFormat.builder() + .type(ResponseFormatType.JSON) + .jsonSchema( + JsonSchema.builder() + .name("output") + .rootElement( + JsonObjectSchema.builder() + .addProperty("value", valueSchema) + .required("value") + .build()) + .build()) + .build(); + } + + private static JsonSchemaElement toJsonSchemaElement(FieldType fieldType) { + // DenseVectorField extends FloatPointField, so it must be rejected before the numeric checks + if (fieldType instanceof DenseVectorField) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, + "field type is not supported by Document Enrichment: " + + fieldType.getClass().getSimpleName()); + } + if (fieldType instanceof StrField + || fieldType instanceof TextField + || fieldType instanceof DatePointField) { + return new JsonStringSchema(); + } else if (fieldType instanceof IntPointField || fieldType instanceof LongPointField) { + return new JsonIntegerSchema(); + } else if (fieldType instanceof FloatPointField || fieldType instanceof DoublePointField) { + return new JsonNumberSchema(); + } else if (fieldType instanceof BoolField) { + return new JsonBooleanSchema(); + } else { throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, - "only textual fields are compatible with Document Enrichment: " - + schemaField.getName()); + "field type is not supported by Document Enrichment: " + + fieldType.getClass().getSimpleName()); } } @@ -253,4 +330,4 @@ public String getModelName() { public String getPromptFile() { return promptFile; } -} \ No newline at end of file +} diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-boolean.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-boolean.json new file mode 100644 index 000000000000..7ba22888cb2b --- /dev/null +++ b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-boolean.json @@ -0,0 +1,7 @@ +{ + "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel", + "name": "dummy-boolean-multi", + "params": { + "response": "{\"value\": [true, false]}" + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-date.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-date.json new file mode 100644 index 000000000000..f159e3334614 --- /dev/null +++ b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-date.json @@ -0,0 +1,7 @@ +{ + "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel", + "name": "dummy-date-multi", + "params": { + "response": "{\"value\": [\"2024-01-15T00:00:00Z\", \"2025-06-30T00:00:00Z\"]}" + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-double.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-double.json new file mode 100644 index 000000000000..8b01495e474e --- /dev/null +++ b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-double.json @@ -0,0 +1,7 @@ +{ + "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel", + "name": "dummy-double-multi", + "params": { + "response": "{\"value\": [3.14, 2.71]}" + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-float.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-float.json new file mode 100644 index 000000000000..0415048c1315 --- /dev/null +++ b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-float.json @@ -0,0 +1,7 @@ +{ + "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel", + "name": "dummy-float-multi", + "params": { + "response": "{\"value\": [1.5, 2.5]}" + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-int.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-int.json new file mode 100644 index 000000000000..ff15d3f0b584 --- /dev/null +++ b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-int.json @@ -0,0 +1,7 @@ +{ + "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel", + "name": "dummy-int-multi", + "params": { + "response": "{\"value\": [1, 2]}" + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-long.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-long.json new file mode 100644 index 000000000000..03c06eb0f5d3 --- /dev/null +++ b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-long.json @@ -0,0 +1,7 @@ +{ + "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel", + "name": "dummy-long-multi", + "params": { + "response": "{\"value\": [10, 20, 30]}" + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-string.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-string.json new file mode 100644 index 000000000000..b482ef654211 --- /dev/null +++ b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-string.json @@ -0,0 +1,7 @@ +{ + "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel", + "name": "dummy-chat-multivalued-1", + "params": { + "response": "{\"value\": [\"tag1\", \"tag2\"]}" + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-boolean.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-boolean.json new file mode 100644 index 000000000000..caca167287a6 --- /dev/null +++ b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-boolean.json @@ -0,0 +1,7 @@ +{ + "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel", + "name": "dummy-boolean", + "params": { + "response": "{\"value\": true}" + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-date.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-date.json new file mode 100644 index 000000000000..b98eb53cf506 --- /dev/null +++ b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-date.json @@ -0,0 +1,7 @@ +{ + "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel", + "name": "dummy-date", + "params": { + "response": "{\"value\": \"2024-01-15T00:00:00Z\"}" + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-double.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-double.json new file mode 100644 index 000000000000..5301937628f7 --- /dev/null +++ b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-double.json @@ -0,0 +1,7 @@ +{ + "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel", + "name": "dummy-double", + "params": { + "response": "{\"value\": 2.5}" + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-float.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-float.json new file mode 100644 index 000000000000..8f0c63512a35 --- /dev/null +++ b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-float.json @@ -0,0 +1,7 @@ +{ + "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel", + "name": "dummy-float", + "params": { + "response": "{\"value\": 1.5}" + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-int.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-int.json new file mode 100644 index 000000000000..664d846e1260 --- /dev/null +++ b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-int.json @@ -0,0 +1,7 @@ +{ + "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel", + "name": "dummy-int", + "params": { + "response": "{\"value\": 7}" + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-long.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-long.json new file mode 100644 index 000000000000..6d58cab102fa --- /dev/null +++ b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-single-long.json @@ -0,0 +1,7 @@ +{ + "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel", + "name": "dummy-long", + "params": { + "response": "{\"value\": 42}" + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model.json index f331535d5e9f..169cbc710450 100644 --- a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model.json +++ b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model.json @@ -2,6 +2,6 @@ "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel", "name": "dummy-chat-1", "params": { - "response": "enriched content" + "response": "{\"value\": \"enriched content\"}" } } \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/solr/collection1/conf/schema-language-models.xml b/solr/modules/language-models/src/test-files/solr/collection1/conf/schema-language-models.xml index f7ad738784f6..a7d329e1a88f 100644 --- a/solr/modules/language-models/src/test-files/solr/collection1/conf/schema-language-models.xml +++ b/solr/modules/language-models/src/test-files/solr/collection1/conf/schema-language-models.xml @@ -25,7 +25,11 @@ - + + + + + @@ -38,6 +42,23 @@ + + + + + + + + + + + + + + + + + diff --git a/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-document-enrichment.xml b/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-document-enrichment.xml index 25f07fea4272..f9b82c153d9e 100644 --- a/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-document-enrichment.xml +++ b/solr/modules/language-models/src/test-files/solr/collection1/conf/solrconfig-document-enrichment.xml @@ -91,6 +91,16 @@ + + + string_field + enriched_field_multi + Extract tags from: {string_field} + dummy-chat-multivalued-1 + + + + string_field @@ -102,4 +112,124 @@ + + + string_field + output_long + Extract a number from: {string_field} + dummy-long + + + + + + + string_field + output_int + Extract a number from: {string_field} + dummy-int + + + + + + + string_field + output_float + Extract a number from: {string_field} + dummy-float + + + + + + + string_field + output_double + Extract a number from: {string_field} + dummy-double + + + + + + + string_field + output_boolean + Is this true or false: {string_field} + dummy-boolean + + + + + + + string_field + output_date + Extract a date from: {string_field} + dummy-date + + + + + + + string_field + output_long_multi + Extract numbers from: {string_field} + dummy-long-multi + + + + + + + string_field + output_int_multi + Extract numbers from: {string_field} + dummy-int-multi + + + + + + + string_field + output_float_multi + Extract numbers from: {string_field} + dummy-float-multi + + + + + + + string_field + output_double_multi + Extract numbers from: {string_field} + dummy-double-multi + + + + + + + string_field + output_boolean_multi + Extract boolean values from: {string_field} + dummy-boolean-multi + + + + + + + string_field + output_date_multi + Extract dates from: {string_field} + dummy-date-multi + + + + \ No newline at end of file diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java index 2f2a91686859..b2ba1cf0a401 100644 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java @@ -286,7 +286,7 @@ public void init_notTextualOutputField_shouldThrowExceptionWithDetailedMessage() SolrException e = assertThrows(SolrException.class, () -> factory.getInstance(req, null, null)); assertEquals( - "only textual fields are compatible with Document Enrichment: vector", e.getMessage()); + "field type is not supported by Document Enrichment: vector", e.getMessage()); } @Test @@ -324,6 +324,52 @@ public void init_multipleInputFields_oneNotExistent_shouldThrowExceptionWithDeta assertEquals("undefined field: \"notExistentInput\"", e.getMessage()); } + @Test + public void init_multivaluedStringOutputField_shouldNotThrowException() { + UpdateRequestProcessor instance = + createUpdateProcessor("string_field", "enriched_field_multi", collection1, "model-mv"); + assertNotNull(instance); + } + + @Test + public void init_multivaluedStringOutputField_buildResponseFormat_shouldProduceArraySchema() { + NamedList args = new NamedList<>(); + ManagedChatModelStore.getManagedModelStore(collection1) + .addModel(new SolrChatModel("model-rf", null, null)); + args.add("inputField", "string_field"); + args.add("outputField", "enriched_field_multi"); + args.add("prompt", "Summarize: {string_field}"); + args.add("model", "model-rf"); + + DocumentEnrichmentUpdateProcessorFactory factory = new DocumentEnrichmentUpdateProcessorFactory(); + factory.init(args); + ModifiableSolrParams params = new ModifiableSolrParams(); + SolrQueryRequestBase req = new SolrQueryRequestBase(collection1, params) {}; + assertNotNull(factory.getInstance(req, null, null)); + + // verify the ResponseFormat is constructed correctly for the multivalued field + var schema = collection1.getLatestSchema(); + var schemaField = schema.getField("enriched_field_multi"); + assertTrue(schemaField.multiValued()); + var responseFormat = DocumentEnrichmentUpdateProcessorFactory.buildResponseFormat(schemaField); + assertNotNull(responseFormat); + assertEquals( + dev.langchain4j.model.chat.request.ResponseFormatType.JSON, responseFormat.type()); + assertNotNull(responseFormat.jsonSchema()); + } + + @Test + public void init_singleValuedStringOutputField_buildResponseFormat_shouldProduceStringSchema() { + var schema = collection1.getLatestSchema(); + var schemaField = schema.getField("enriched_field"); + assertFalse(schemaField.multiValued()); + var responseFormat = DocumentEnrichmentUpdateProcessorFactory.buildResponseFormat(schemaField); + assertNotNull(responseFormat); + assertEquals( + dev.langchain4j.model.chat.request.ResponseFormatType.JSON, responseFormat.type()); + assertNotNull(responseFormat.jsonSchema()); + } + @Test public void init_dynamicInputField_shouldNotThrowException() { UpdateRequestProcessor instance = diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java index d715fb6e8e93..5349de833df3 100644 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java @@ -42,21 +42,30 @@ public static void cleanup() throws Exception { afterTest(); } + private String loadedModelId; + @After public void afterEachTest() throws Exception { - restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/dummy-chat-1"); - restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/exception-throwing-chat-model"); + if (loadedModelId != null) { + restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/" + loadedModelId); + loadedModelId = null; + } + } + + private void loadTestChatModel(String fileName, String modelId) throws Exception { + loadChatModel(fileName); + loadedModelId = modelId; } @Test public void processAdd_inputField_shouldEnrichInputField() throws Exception { - loadChatModel("dummy-chat-model.json"); + loadTestChatModel("dummy-chat-model.json", "dummy-chat-1"); addWithChain(sdoc("id", "99", "string_field", "Vegeta is the saiyan prince."), "documentEnrichment"); addWithChain(sdoc("id", "98", "string_field", "Kakaroth is a saiyan grown up on planet Earth."), "documentEnrichment"); assertU(commit()); - final SolrQuery query = getEnrichmentQuery(); + final SolrQuery query = getEnrichmentQuery("enriched_field"); assertJQ( "/query" + query.toQueryString(), @@ -65,8 +74,6 @@ public void processAdd_inputField_shouldEnrichInputField() throws Exception { "/response/docs/[0]/enriched_field=='enriched content'", "/response/docs/[1]/id=='98'", "/response/docs/[1]/enriched_field=='enriched content'"); - - restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/dummy-1"); // clean up } /* @@ -92,12 +99,12 @@ public void processAdd_modelNotFound_shouldThrowException() { @Test public void processAdd_emptyInputField_shouldLogAndIndexWithNoEnrichedField() throws Exception { - loadChatModel("dummy-chat-model.json"); + loadTestChatModel("dummy-chat-model.json", "dummy-chat-1"); addWithChain(sdoc("id", "99", "string_field", ""), "documentEnrichment"); addWithChain(sdoc("id", "98", "string_field", "Vegeta is the saiyan prince."), "documentEnrichment"); assertU(commit()); - final SolrQuery query = getEnrichmentQuery(); + final SolrQuery query = getEnrichmentQuery("enriched_field"); assertJQ( "/query" + query.toQueryString(), @@ -110,12 +117,12 @@ public void processAdd_emptyInputField_shouldLogAndIndexWithNoEnrichedField() th @Test public void processAdd_nullInputField_shouldLogAndIndexWithNoEnrichedField() throws Exception { - loadChatModel("dummy-chat-model.json"); + loadTestChatModel("dummy-chat-model.json", "dummy-chat-1"); addWithChain(sdoc("id", "99", "string_field", "Vegeta is the saiyan prince."), "documentEnrichment"); assertU(adoc("id", "98")); // no string_field assertU(commit()); - final SolrQuery query = getEnrichmentQuery(); + final SolrQuery query = getEnrichmentQuery("enriched_field"); assertJQ( "/query" + query.toQueryString(), @@ -128,12 +135,12 @@ public void processAdd_nullInputField_shouldLogAndIndexWithNoEnrichedField() thr @Test public void processAdd_failingEnrichment_shouldLogAndIndexWithNoEnrichedField() throws Exception { - loadChatModel("exception-throwing-chat-model.json"); + loadTestChatModel("exception-throwing-chat-model.json", "exception-throwing-chat-model"); addWithChain(sdoc("id", "99", "string_field", "Vegeta is the saiyan prince."), "failingDocumentEnrichment"); addWithChain(sdoc("id", "98", "string_field", "Kakaroth is a saiyan grown up on planet Earth."), "failingDocumentEnrichment"); assertU(commit()); - final SolrQuery query = getEnrichmentQuery(); + final SolrQuery query = getEnrichmentQuery("enriched_field"); assertJQ( "/query" + query.toQueryString(), @@ -151,7 +158,7 @@ public void processAtomicUpdate_shouldTriggerEnrichmentAndFetchTheStoredContent( // (i.e., DistributedUpdateProcessorFactory before DocumentEnrichmentUpdateProcessorFactory), // the system correctly retrieves the stored value of string_field and generates the // enriched content for the document. - loadChatModel("dummy-chat-model.json"); + loadTestChatModel("dummy-chat-model.json", "dummy-chat-1"); assertU(adoc("id", "99", "string_field", "Vegeta is the saiyan prince.")); assertU(adoc("id", "98", "string_field", "Kakaroth is a saiyan grown up on planet Earth.")); assertU(commit()); @@ -162,7 +169,7 @@ public void processAtomicUpdate_shouldTriggerEnrichmentAndFetchTheStoredContent( addWithChain(atomicDoc, "documentEnrichmentForPartialUpdates"); assertU(commit()); - final SolrQuery query = getEnrichmentQuery(); + final SolrQuery query = getEnrichmentQuery("enriched_field"); assertJQ( "/query" + query.toQueryString(), @@ -179,7 +186,7 @@ public void processAtomicUpdate_shouldReplaceExistingEnrichedFieldNotAppend() th // Verifies that when a document already contains an enriched_field and string_field is // modified via atomic update, the enriched content is recomputed and replaces the previous // value rather than being appended. - loadChatModel("dummy-chat-model.json"); + loadTestChatModel("dummy-chat-model.json", "dummy-chat-1"); addWithChain(sdoc("id", "99", "string_field", "Vegeta is the saiyan prince."), "documentEnrichment"); addWithChain(sdoc("id", "98", "string_field", "Kakaroth is a saiyan grown up on planet Earth."), "documentEnrichment"); assertU(commit()); @@ -190,7 +197,7 @@ public void processAtomicUpdate_shouldReplaceExistingEnrichedFieldNotAppend() th addWithChain(atomicDoc, "documentEnrichmentForPartialUpdates"); assertU(commit()); - final SolrQuery query = getEnrichmentQuery(); + final SolrQuery query = getEnrichmentQuery("enriched_field"); assertJQ( "/query" + query.toQueryString(), @@ -205,7 +212,7 @@ public void processAtomicUpdate_shouldReplaceExistingEnrichedFieldNotAppend() th @Test public void processAdd_multipleInputFields_allPresent_shouldEnrichDocument() throws Exception { - loadChatModel("dummy-chat-model.json"); + loadTestChatModel("dummy-chat-model.json", "dummy-chat-1"); addWithChain( sdoc("id", "99", "string_field", "Vegeta is the saiyan prince.", "body_field", "He is very proud."), @@ -215,7 +222,7 @@ public void processAdd_multipleInputFields_allPresent_shouldEnrichDocument() thr "documentEnrichmentMultiField"); assertU(commit()); - final SolrQuery query = getEnrichmentQuery(); + final SolrQuery query = getEnrichmentQuery("enriched_field"); assertJQ( "/query" + query.toQueryString(), @@ -228,7 +235,7 @@ public void processAdd_multipleInputFields_allPresent_shouldEnrichDocument() thr @Test public void processAdd_multipleInputFields_firstFieldNull_shouldSkipEnrichment() throws Exception { - loadChatModel("dummy-chat-model.json"); + loadTestChatModel("dummy-chat-model.json", "dummy-chat-1"); addWithChain( sdoc("id", "99", "body_field", "He is very proud."), // string_field absent @@ -238,7 +245,7 @@ public void processAdd_multipleInputFields_firstFieldNull_shouldSkipEnrichment() "documentEnrichmentMultiField"); assertU(commit()); - final SolrQuery query = getEnrichmentQuery(); + final SolrQuery query = getEnrichmentQuery("enriched_field"); assertJQ( "/query" + query.toQueryString(), @@ -251,7 +258,7 @@ public void processAdd_multipleInputFields_firstFieldNull_shouldSkipEnrichment() @Test public void processAdd_multipleInputFields_secondFieldEmpty_shouldSkipEnrichment() throws Exception { - loadChatModel("dummy-chat-model.json"); + loadTestChatModel("dummy-chat-model.json", "dummy-chat-1"); addWithChain( sdoc("id", "99", "string_field", "Vegeta is the saiyan prince.", "body_field", ""), @@ -261,7 +268,7 @@ public void processAdd_multipleInputFields_secondFieldEmpty_shouldSkipEnrichment "documentEnrichmentMultiField"); assertU(commit()); - final SolrQuery query = getEnrichmentQuery(); + final SolrQuery query = getEnrichmentQuery("enriched_field"); assertJQ( "/query" + query.toQueryString(), @@ -274,13 +281,13 @@ public void processAdd_multipleInputFields_secondFieldEmpty_shouldSkipEnrichment @Test public void processAdd_multipleInputFields_bothFieldsAbsent_shouldSkipEnrichment() throws Exception { - loadChatModel("dummy-chat-model.json"); + loadTestChatModel("dummy-chat-model.json", "dummy-chat-1"); addWithChain(sdoc("id", "99"), "documentEnrichmentMultiField"); addWithChain(sdoc("id", "98"), "documentEnrichmentMultiField"); assertU(commit()); - final SolrQuery query = getEnrichmentQuery(); + final SolrQuery query = getEnrichmentQuery("enriched_field"); assertJQ( "/query" + query.toQueryString(), @@ -293,7 +300,7 @@ public void processAdd_multipleInputFields_bothFieldsAbsent_shouldSkipEnrichment @Test public void processAdd_multipleInputFields_failingModel_shouldLogAndSkipEnrichment() throws Exception { - loadChatModel("exception-throwing-chat-model.json"); + loadTestChatModel("exception-throwing-chat-model.json", "exception-throwing-chat-model"); addWithChain( sdoc("id", "99", "string_field", "Vegeta is the saiyan prince.", "body_field", "He is very proud."), @@ -303,7 +310,7 @@ public void processAdd_multipleInputFields_failingModel_shouldLogAndSkipEnrichme "failingDocumentEnrichmentMultiField"); assertU(commit()); - final SolrQuery query = getEnrichmentQuery(); + final SolrQuery query = getEnrichmentQuery("enriched_field"); assertJQ( "/query" + query.toQueryString(), @@ -314,10 +321,317 @@ public void processAdd_multipleInputFields_failingModel_shouldLogAndSkipEnrichme "!/response/docs/[1]/enriched_field=="); } - private SolrQuery getEnrichmentQuery() { + @Test + public void processAdd_multivaluedStringOutputField_shouldPopulateAllValues() throws Exception { + loadTestChatModel("dummy-chat-model-multivalued-string.json", "dummy-chat-multivalued-1"); + + addWithChain( + sdoc("id", "99", "string_field", "Vegeta is the saiyan prince."), + "documentEnrichmentMultivaluedString"); + addWithChain( + sdoc("id", "98", "string_field", "Kakaroth is a saiyan grown up on planet Earth."), + "documentEnrichmentMultivaluedString"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("enriched_field_multi"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "/response/docs/[0]/enriched_field_multi/[0]=='tag1'", + "/response/docs/[0]/enriched_field_multi/[1]=='tag2'", + "/response/docs/[1]/id=='98'", + "/response/docs/[1]/enriched_field_multi/[0]=='tag1'", + "/response/docs/[1]/enriched_field_multi/[1]=='tag2'"); + } + + @Test + public void processAdd_multivaluedStringOutputField_emptyInput_shouldSkipEnrichment() + throws Exception { + loadTestChatModel("dummy-chat-model-multivalued-string.json", "dummy-chat-multivalued-1"); + + addWithChain(sdoc("id", "99", "string_field", ""), "documentEnrichmentMultivaluedString"); + addWithChain(sdoc("id", "98", "string_field", ""), "documentEnrichmentMultivaluedString"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("enriched_field_multi"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "!/response/docs/[0]/enriched_field_multi==", + "/response/docs/[1]/id=='98'", + "!/response/docs/[1]/enriched_field_multi=="); + } + + // --- typed single-valued output field tests --- + + @Test + public void processAdd_singleLongOutputField_shouldPopulateValue() throws Exception { + loadTestChatModel("dummy-chat-model-single-long.json", "dummy-long"); + + addWithChain(sdoc("id", "99", "string_field", "some content"), "documentEnrichmentSingleLong"); + addWithChain(sdoc("id", "98", "string_field", "other content"), "documentEnrichmentSingleLong"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("output_long"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "/response/docs/[0]/output_long==42", + "/response/docs/[1]/id=='98'", + "/response/docs/[1]/output_long==42"); + } + + @Test + public void processAdd_singleIntOutputField_shouldPopulateValue() throws Exception { + loadTestChatModel("dummy-chat-model-single-int.json", "dummy-int"); + + addWithChain(sdoc("id", "99", "string_field", "some content"), "documentEnrichmentSingleInt"); + addWithChain(sdoc("id", "98", "string_field", "other content"), "documentEnrichmentSingleInt"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("output_int"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "/response/docs/[0]/output_int==7", + "/response/docs/[1]/id=='98'", + "/response/docs/[1]/output_int==7"); + } + + @Test + public void processAdd_singleFloatOutputField_shouldPopulateValue() throws Exception { + loadTestChatModel("dummy-chat-model-single-float.json", "dummy-float"); + + addWithChain(sdoc("id", "99", "string_field", "some content"), "documentEnrichmentSingleFloat"); + addWithChain(sdoc("id", "98", "string_field", "other content"), "documentEnrichmentSingleFloat"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("output_float"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "/response/docs/[0]/output_float==1.5", + "/response/docs/[1]/id=='98'", + "/response/docs/[1]/output_float==1.5"); + } + + @Test + public void processAdd_singleDoubleOutputField_shouldPopulateValue() throws Exception { + loadTestChatModel("dummy-chat-model-single-double.json", "dummy-double"); + + addWithChain( + sdoc("id", "99", "string_field", "some content"), "documentEnrichmentSingleDouble"); + addWithChain( + sdoc("id", "98", "string_field", "other content"), "documentEnrichmentSingleDouble"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("output_double"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "/response/docs/[0]/output_double==2.5", + "/response/docs/[1]/id=='98'", + "/response/docs/[1]/output_double==2.5"); + } + + @Test + public void processAdd_singleBooleanOutputField_shouldPopulateValue() throws Exception { + loadTestChatModel("dummy-chat-model-single-boolean.json", "dummy-boolean"); + + addWithChain( + sdoc("id", "99", "string_field", "some content"), "documentEnrichmentSingleBoolean"); + addWithChain( + sdoc("id", "98", "string_field", "other content"), "documentEnrichmentSingleBoolean"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("output_boolean"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "/response/docs/[0]/output_boolean==true", + "/response/docs/[1]/id=='98'", + "/response/docs/[1]/output_boolean==true"); + } + + @Test + public void processAdd_singleDateOutputField_shouldPopulateValue() throws Exception { + loadTestChatModel("dummy-chat-model-single-date.json", "dummy-date"); + + addWithChain(sdoc("id", "99", "string_field", "some content"), "documentEnrichmentSingleDate"); + addWithChain(sdoc("id", "98", "string_field", "other content"), "documentEnrichmentSingleDate"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("output_date"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "/response/docs/[0]/output_date=='2024-01-15T00:00:00Z'", + "/response/docs/[1]/id=='98'", + "/response/docs/[1]/output_date=='2024-01-15T00:00:00Z'"); + } + + // --- typed multivalued output field tests --- + + @Test + public void processAdd_multivaluedLongOutputField_shouldPopulateAllValues() throws Exception { + loadTestChatModel("dummy-chat-model-multivalued-long.json", "dummy-long-multi"); + + addWithChain( + sdoc("id", "99", "string_field", "some content"), "documentEnrichmentMultivaluedLong"); + addWithChain( + sdoc("id", "98", "string_field", "other content"), "documentEnrichmentMultivaluedLong"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("output_long_multi"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "/response/docs/[0]/output_long_multi/[0]==10", + "/response/docs/[0]/output_long_multi/[1]==20", + "/response/docs/[0]/output_long_multi/[2]==30", + "/response/docs/[1]/id=='98'", + "/response/docs/[1]/output_long_multi/[0]==10", + "/response/docs/[1]/output_long_multi/[1]==20", + "/response/docs/[1]/output_long_multi/[2]==30"); + } + + @Test + public void processAdd_multivaluedIntOutputField_shouldPopulateAllValues() throws Exception { + loadTestChatModel("dummy-chat-model-multivalued-int.json", "dummy-int-multi"); + + addWithChain( + sdoc("id", "99", "string_field", "some content"), "documentEnrichmentMultivaluedInt"); + addWithChain( + sdoc("id", "98", "string_field", "other content"), "documentEnrichmentMultivaluedInt"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("output_int_multi"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "/response/docs/[0]/output_int_multi/[0]==1", + "/response/docs/[0]/output_int_multi/[1]==2", + "/response/docs/[1]/id=='98'", + "/response/docs/[1]/output_int_multi/[0]==1", + "/response/docs/[1]/output_int_multi/[1]==2"); + } + + @Test + public void processAdd_multivaluedFloatOutputField_shouldPopulateAllValues() throws Exception { + loadTestChatModel("dummy-chat-model-multivalued-float.json", "dummy-float-multi"); + + addWithChain( + sdoc("id", "99", "string_field", "some content"), "documentEnrichmentMultivaluedFloat"); + addWithChain( + sdoc("id", "98", "string_field", "other content"), "documentEnrichmentMultivaluedFloat"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("output_float_multi"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "/response/docs/[0]/output_float_multi/[0]==1.5", + "/response/docs/[0]/output_float_multi/[1]==2.5", + "/response/docs/[1]/id=='98'", + "/response/docs/[1]/output_float_multi/[0]==1.5", + "/response/docs/[1]/output_float_multi/[1]==2.5"); + } + + @Test + public void processAdd_multivaluedDoubleOutputField_shouldPopulateAllValues() throws Exception { + loadTestChatModel("dummy-chat-model-multivalued-double.json", "dummy-double-multi"); + + addWithChain( + sdoc("id", "99", "string_field", "some content"), "documentEnrichmentMultivaluedDouble"); + addWithChain( + sdoc("id", "98", "string_field", "other content"), "documentEnrichmentMultivaluedDouble"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("output_double_multi"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "/response/docs/[0]/output_double_multi/[0]==3.14", + "/response/docs/[0]/output_double_multi/[1]==2.71", + "/response/docs/[1]/id=='98'", + "/response/docs/[1]/output_double_multi/[0]==3.14", + "/response/docs/[1]/output_double_multi/[1]==2.71"); + } + + @Test + public void processAdd_multivaluedBooleanOutputField_shouldPopulateAllValues() throws Exception { + loadTestChatModel("dummy-chat-model-multivalued-boolean.json", "dummy-boolean-multi"); + + addWithChain( + sdoc("id", "99", "string_field", "some content"), "documentEnrichmentMultivaluedBoolean"); + addWithChain( + sdoc("id", "98", "string_field", "other content"), "documentEnrichmentMultivaluedBoolean"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("output_boolean_multi"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "/response/docs/[0]/output_boolean_multi/[0]==true", + "/response/docs/[0]/output_boolean_multi/[1]==false", + "/response/docs/[1]/id=='98'", + "/response/docs/[1]/output_boolean_multi/[0]==true", + "/response/docs/[1]/output_boolean_multi/[1]==false"); + } + + @Test + public void processAdd_multivaluedDateOutputField_shouldPopulateAllValues() throws Exception { + loadTestChatModel("dummy-chat-model-multivalued-date.json", "dummy-date-multi"); + + addWithChain( + sdoc("id", "99", "string_field", "some content"), "documentEnrichmentMultivaluedDate"); + addWithChain( + sdoc("id", "98", "string_field", "other content"), "documentEnrichmentMultivaluedDate"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("output_date_multi"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "/response/docs/[0]/output_date_multi/[0]=='2024-01-15T00:00:00Z'", + "/response/docs/[0]/output_date_multi/[1]=='2025-06-30T00:00:00Z'", + "/response/docs/[1]/id=='98'", + "/response/docs/[1]/output_date_multi/[0]=='2024-01-15T00:00:00Z'", + "/response/docs/[1]/output_date_multi/[1]=='2025-06-30T00:00:00Z'"); + } + + private SolrQuery getEnrichmentQuery(String enrichedFieldName) { final SolrQuery query = new SolrQuery(); query.setQuery("*:*"); - query.add("fl", "id,enriched_field"); + query.add("fl", "id,"+enrichedFieldName); query.add("sort", "id desc"); return query; } From cf0d6bb20411490a35691b22decf638a4315da61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Rinaldi?= Date: Tue, 31 Mar 2026 14:52:48 +0200 Subject: [PATCH 6/9] [llm-document-enrichment] polished code, added tests and added file for documentation --- .../unreleased/llm-document-enrichment.yml | 4 ++ .../model/SolrChatModel.java | 7 +- .../DocumentEnrichmentUpdateProcessor.java | 2 +- ...umentEnrichmentUpdateProcessorFactory.java | 26 ++----- .../dummy-chat-model-malformed-json.json | 7 ++ .../dummy-chat-model-missing-value-key.json | 7 ++ .../dummy-chat-model-multivalued-scalar.json | 7 ++ .../exception-throwing-chat-model.json | 2 +- .../model/DummyChatModel.java | 2 +- .../store/rest/TestChatModelManager.java | 6 +- ...tEnrichmentUpdateProcessorFactoryTest.java | 14 ++-- ...DocumentEnrichmentUpdateProcessorTest.java | 72 +++++++++++++++++++ .../modules/indexing-guide/indexing-nav.adoc | 1 + .../pages/document-enrichment-with-llms.adoc | 19 +++++ 14 files changed, 139 insertions(+), 37 deletions(-) create mode 100644 changelog/unreleased/llm-document-enrichment.yml create mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-malformed-json.json create mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-missing-value-key.json create mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-scalar.json create mode 100644 solr/solr-ref-guide/modules/indexing-guide/pages/document-enrichment-with-llms.adoc diff --git a/changelog/unreleased/llm-document-enrichment.yml b/changelog/unreleased/llm-document-enrichment.yml new file mode 100644 index 000000000000..fd6e55d6249f --- /dev/null +++ b/changelog/unreleased/llm-document-enrichment.yml @@ -0,0 +1,4 @@ +title: Add DocumentEnrichmentUpdateProcessorFactory for LLM-based document enrichment at index time +type: added # added, changed, fixed, deprecated, removed, dependency_update, security, other +authors: +- name: Nicolò Rinaldi diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/model/SolrChatModel.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/model/SolrChatModel.java index 1cc8edb0e742..afd45d11ca07 100644 --- a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/model/SolrChatModel.java +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/model/SolrChatModel.java @@ -48,9 +48,9 @@ public class SolrChatModel implements Accountable { // timeout is type Duration private static final String TIMEOUT_PARAM = "timeout"; - // the following are Integer type + // the followings are Integer type private static final String MAX_RETRIES_PARAM = "maxRetries"; - private static final String THINKING_BUDGET_TOKENS ="thinkingBudgetTokens"; + private static final String THINKING_BUDGET_TOKENS = "thinkingBudgetTokens"; private static final String RANDOM_SEED = "randomSeed"; private final String name; @@ -152,7 +152,6 @@ public SolrChatModel( * @return the extracted value: a {@link String}, {@link Number}, {@link Boolean}, or {@link * java.util.List} depending on the Solr output field type */ - @SuppressWarnings("unchecked") public Object chat(String text, ResponseFormat responseFormat) { ChatRequest chatRequest = ChatRequest.builder() @@ -166,7 +165,7 @@ public Object chat(String text, ResponseFormat responseFormat) { SolrException.ErrorCode.SERVER_ERROR, "LLM response is missing the 'value' key: " + rawJson); } - return ((Map) map).get("value"); + return map.get("value"); } @Override diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessor.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessor.java index 57ca29e1a7dd..3f90fd8eb580 100644 --- a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessor.java +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessor.java @@ -35,7 +35,7 @@ class DocumentEnrichmentUpdateProcessor extends UpdateRequestProcessor { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - private IndexSchema schema; + private final IndexSchema schema; private final List inputFields; private final String outputField; private final String prompt; diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java index 508b46fb7be7..659a20897eb4 100644 --- a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java @@ -199,7 +199,6 @@ public UpdateRequestProcessor getInstance( } final SchemaField outputFieldSchema = latestSchema.getField(outputField); - assertIsSupportedField(outputFieldSchema); ResponseFormat responseFormat = buildResponseFormat(outputFieldSchema); boolean multiValued = outputFieldSchema.multiValued(); @@ -219,33 +218,20 @@ public UpdateRequestProcessor getInstance( inputFields, outputField, prompt, chatModel, multiValued, responseFormat, req, next); } - /** - * Validates that the output field type is supported. Supported types are: textual (Str, Text), - * numeric (Int, Long, Float, Double), boolean and date. Vector and binary fields are not - * supported. - */ - protected void assertIsSupportedField(SchemaField schemaField) { - try { - toJsonSchemaElement(schemaField.getType()); - } catch (SolrException e) { - throw new SolrException( - SolrException.ErrorCode.SERVER_ERROR, - "field type is not supported by Document Enrichment: " + schemaField.getName()); - } - } - /** * Builds a {@link ResponseFormat} that instructs the model to return a JSON object {@code * {"value": ...}} whose value type matches the Solr field type. For multivalued fields the value - * is wrapped in a JSON array. + * is wrapped in a {@link JsonArraySchema} nested inside the root {@link JsonObjectSchema}. + * + *

Nesting {@link JsonArraySchema} inside a {@link JsonObjectSchema} property is supported by + * all langchain4j providers that implement structured outputs with {@link JsonObjectSchema} (OpenAI, Azure OpenAI, + * Google AI, Gemini, Mistral, Ollama, Amazon Bedrock, Watsonx). */ static ResponseFormat buildResponseFormat(SchemaField schemaField) { JsonSchemaElement valueElement = toJsonSchemaElement(schemaField.getType()); JsonSchemaElement valueSchema = schemaField.multiValued() - ? JsonArraySchema.builder().items(valueElement).build() // could be only supported by Gemini - // (source: https://github.com/langchain4j/langchain4j/blob/main/docs/docs/tutorials/structured-outputs.md) - // If not supported, we cannot support multivalued fields as outputField + ? JsonArraySchema.builder().items(valueElement).build() : valueElement; return ResponseFormat.builder() .type(ResponseFormatType.JSON) diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-malformed-json.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-malformed-json.json new file mode 100644 index 000000000000..bdc8394add3b --- /dev/null +++ b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-malformed-json.json @@ -0,0 +1,7 @@ +{ + "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel", + "name": "dummy-chat-1", + "params": { + "response": "not valid json at all" + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-missing-value-key.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-missing-value-key.json new file mode 100644 index 000000000000..42a52faf650a --- /dev/null +++ b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-missing-value-key.json @@ -0,0 +1,7 @@ +{ + "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel", + "name": "dummy-chat-1", + "params": { + "response": "{\"result\": \"some value\"}" + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-scalar.json b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-scalar.json new file mode 100644 index 000000000000..2deb27259554 --- /dev/null +++ b/solr/modules/language-models/src/test-files/modelChatExamples/dummy-chat-model-multivalued-scalar.json @@ -0,0 +1,7 @@ +{ + "class": "org.apache.solr.languagemodels.documentenrichment.model.DummyChatModel", + "name": "dummy-chat-multivalued-1", + "params": { + "response": "{\"value\": \"a single string\"}" + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/exception-throwing-chat-model.json b/solr/modules/language-models/src/test-files/modelChatExamples/exception-throwing-chat-model.json index 29bcce318ada..3fad70744ff5 100644 --- a/solr/modules/language-models/src/test-files/modelChatExamples/exception-throwing-chat-model.json +++ b/solr/modules/language-models/src/test-files/modelChatExamples/exception-throwing-chat-model.json @@ -3,4 +3,4 @@ "name": "exception-throwing-chat-model", "params": { } -} \ No newline at end of file +} diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/model/DummyChatModel.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/model/DummyChatModel.java index 753150cb6f02..42987b1d69ce 100644 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/model/DummyChatModel.java +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/model/DummyChatModel.java @@ -77,4 +77,4 @@ public DummyChatModel build() { return new DummyChatModel(this.response); } } -} \ No newline at end of file +} diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManager.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManager.java index dc1b67e0debb..49c1b70ce2e0 100644 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManager.java +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManager.java @@ -67,10 +67,6 @@ public void testRestManagerEndpoints() throws Exception { final String openAiClassName = "dev.langchain4j.model.openai.OpenAiChatModel"; - // fails — no params provided -// String model = "{ \"name\":\"testChatModel1\", \"class\":\"" + openAiClassName + "\"}"; -// assertJPut(ManagedChatModelStore.REST_END_POINT, model, "/responseHeader/status==400"); - // success String model = "{ name:\"testChatModel2\", class:\"" @@ -119,7 +115,7 @@ public void testRestManagerEndpoints() throws Exception { restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/testChatModel2"); restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/testChatModel3"); restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/testChatModel4"); - assertJQ(ManagedChatModelStore.REST_END_POINT, "/models==[]'"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models==[]"); } @Test diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java index b2ba1cf0a401..e92bded3c75e 100644 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactoryTest.java @@ -286,7 +286,7 @@ public void init_notTextualOutputField_shouldThrowExceptionWithDetailedMessage() SolrException e = assertThrows(SolrException.class, () -> factory.getInstance(req, null, null)); assertEquals( - "field type is not supported by Document Enrichment: vector", e.getMessage()); + "field type is not supported by Document Enrichment: DenseVectorField", e.getMessage()); } @Test @@ -325,14 +325,15 @@ public void init_multipleInputFields_oneNotExistent_shouldThrowExceptionWithDeta } @Test - public void init_multivaluedStringOutputField_shouldNotThrowException() { + public void init_multivaluedStringOutputField_shouldNotThrowException() throws Exception { UpdateRequestProcessor instance = createUpdateProcessor("string_field", "enriched_field_multi", collection1, "model-mv"); assertNotNull(instance); + restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/model-mv"); } @Test - public void init_multivaluedStringOutputField_buildResponseFormat_shouldProduceArraySchema() { + public void init_multivaluedStringOutputField_buildResponseFormat_shouldProduceArraySchema() throws Exception { NamedList args = new NamedList<>(); ManagedChatModelStore.getManagedModelStore(collection1) .addModel(new SolrChatModel("model-rf", null, null)); @@ -356,6 +357,7 @@ public void init_multivaluedStringOutputField_buildResponseFormat_shouldProduceA assertEquals( dev.langchain4j.model.chat.request.ResponseFormatType.JSON, responseFormat.type()); assertNotNull(responseFormat.jsonSchema()); + restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/model-rf"); } @Test @@ -371,14 +373,15 @@ public void init_singleValuedStringOutputField_buildResponseFormat_shouldProduce } @Test - public void init_dynamicInputField_shouldNotThrowException() { + public void init_dynamicInputField_shouldNotThrowException() throws Exception{ UpdateRequestProcessor instance = createUpdateProcessor("text_s", "enriched_field", collection1, "model2"); assertNotNull(instance); + restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/model2"); } @Test - public void init_multipleDynamicInputFields_shouldNotThrowException() { + public void init_multipleDynamicInputFields_shouldNotThrowException() throws Exception{ NamedList args = new NamedList<>(); ManagedChatModelStore.getManagedModelStore(collection1) .addModel(new SolrChatModel("model1", null, null)); @@ -394,6 +397,7 @@ public void init_multipleDynamicInputFields_shouldNotThrowException() { SolrQueryRequestBase req = new SolrQueryRequestBase(collection1, params) {}; assertNotNull(factory.getInstance(req, null, null)); + restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/model1"); } private UpdateRequestProcessor createUpdateProcessor( diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java index 5349de833df3..e88c8e549a33 100644 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java @@ -628,6 +628,78 @@ public void processAdd_multivaluedDateOutputField_shouldPopulateAllValues() thro "/response/docs/[1]/output_date_multi/[1]=='2025-06-30T00:00:00Z'"); } + // --- LLM response contract violation tests --- + + @Test + public void processAdd_llmResponseMissingValueKey_shouldLogAndIndexWithNoEnrichedField() + throws Exception { + // Model returns valid JSON but without the required "value" key + loadTestChatModel("dummy-chat-model-missing-value-key.json", "dummy-chat-1"); + + addWithChain(sdoc("id", "99", "string_field", "Vegeta is the saiyan prince."), "documentEnrichment"); + addWithChain(sdoc("id", "98", "string_field", "Kakaroth is a saiyan grown up on planet Earth."), "documentEnrichment"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("enriched_field"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "!/response/docs/[0]/enriched_field==", + "/response/docs/[1]/id=='98'", + "!/response/docs/[1]/enriched_field=="); + } + + @Test + public void processAdd_llmResponseMalformedJson_shouldLogAndIndexWithNoEnrichedField() + throws Exception { + // Model returns a plain string that cannot be parsed as JSON + loadTestChatModel("dummy-chat-model-malformed-json.json", "dummy-chat-1"); + + addWithChain(sdoc("id", "99", "string_field", "Vegeta is the saiyan prince."), "documentEnrichment"); + addWithChain(sdoc("id", "98", "string_field", "Kakaroth is a saiyan grown up on planet Earth."), "documentEnrichment"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("enriched_field"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "!/response/docs/[0]/enriched_field==", + "/response/docs/[1]/id=='98'", + "!/response/docs/[1]/enriched_field=="); + } + + // --- multivalued output field / scalar response test --- + + @Test + public void processAdd_multivaluedOutputField_scalarLlmResponse_shouldStoreSingleValue() + throws Exception { + // Model returns {"value": "a single string"} for a multivalued output field. + // The scalar falls through the List check and is stored as a single-element value. + loadTestChatModel("dummy-chat-model-multivalued-scalar.json", "dummy-chat-multivalued-1"); + + addWithChain( + sdoc("id", "99", "string_field", "Vegeta is the saiyan prince."), + "documentEnrichmentMultivaluedString"); + addWithChain( + sdoc("id", "98", "string_field", "Kakaroth is a saiyan grown up on planet Earth."), + "documentEnrichmentMultivaluedString"); + assertU(commit()); + + final SolrQuery query = getEnrichmentQuery("enriched_field_multi"); + + assertJQ( + "/query" + query.toQueryString(), + "/response/numFound==2]", + "/response/docs/[0]/id=='99'", + "/response/docs/[0]/enriched_field_multi/[0]=='a single string'", + "/response/docs/[1]/id=='98'", + "/response/docs/[1]/enriched_field_multi/[0]=='a single string'"); + } + private SolrQuery getEnrichmentQuery(String enrichedFieldName) { final SolrQuery query = new SolrQuery(); query.setQuery("*:*"); diff --git a/solr/solr-ref-guide/modules/indexing-guide/indexing-nav.adoc b/solr/solr-ref-guide/modules/indexing-guide/indexing-nav.adoc index 9b50849716c3..940225e8d4ef 100644 --- a/solr/solr-ref-guide/modules/indexing-guide/indexing-nav.adoc +++ b/solr/solr-ref-guide/modules/indexing-guide/indexing-nav.adoc @@ -58,5 +58,6 @@ ** xref:partial-document-updates.adoc[] ** xref:reindexing.adoc[] ** xref:language-detection.adoc[] +** xref:document-enrichment-with-llms.adoc[] ** xref:de-duplication.adoc[] ** xref:content-streams.adoc[] diff --git a/solr/solr-ref-guide/modules/indexing-guide/pages/document-enrichment-with-llms.adoc b/solr/solr-ref-guide/modules/indexing-guide/pages/document-enrichment-with-llms.adoc new file mode 100644 index 000000000000..4207a892e274 --- /dev/null +++ b/solr/solr-ref-guide/modules/indexing-guide/pages/document-enrichment-with-llms.adoc @@ -0,0 +1,19 @@ += Document Enrichment with LLMs +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +This module brings the power of *Language Models* to Solr. From 570c2aaf4cf1f0382ee8ecb3cbcab77249f8996a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Rinaldi?= Date: Wed, 1 Apr 2026 11:11:54 +0200 Subject: [PATCH 7/9] [llm-document-enrichment] updated supported models + added tests --- gradle/libs.versions.toml | 3 + solr/modules/language-models/build.gradle | 3 + solr/modules/language-models/gradle.lockfile | 3 + .../anthropic-chat-model.json | 13 ++++ .../modelChatExamples/gemini-chat-model.json | 12 ++++ .../modelChatExamples/ollama-chat-model.json | 11 ++++ .../store/rest/TestChatModelManager.java | 59 +++++++++++++++++++ ...DocumentEnrichmentUpdateProcessorTest.java | 2 +- 8 files changed, 105 insertions(+), 1 deletion(-) create mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/anthropic-chat-model.json create mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/gemini-chat-model.json create mode 100644 solr/modules/language-models/src/test-files/modelChatExamples/ollama-chat-model.json diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 8c854fb41b01..eadc0e41ce3f 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -450,11 +450,14 @@ ktor-client-serialization-json = { module = "io.ktor:ktor-serialization-kotlinx- ktor-server-cio = { module = "io.ktor:ktor-server-cio", version.ref = "ktor" } ktor-server-core = { module = "io.ktor:ktor-server-core", version.ref = "ktor" } ktor-server-htmlBuilder = { module = "io.ktor:ktor-server-html-builder", version.ref = "ktor" } +langchain4j-anthropic = { module = "dev.langchain4j:langchain4j-anthropic" } langchain4j-bom = { module = "dev.langchain4j:langchain4j-bom", version.ref = "langchain4j-bom" } langchain4j-cohere = { module = "dev.langchain4j:langchain4j-cohere" } langchain4j-core = { module = "dev.langchain4j:langchain4j-core" } +langchain4j-google-ai-gemini = { module = "dev.langchain4j:langchain4j-google-ai-gemini" } langchain4j-hugging-face = { module = "dev.langchain4j:langchain4j-hugging-face" } langchain4j-mistral-ai = { module = "dev.langchain4j:langchain4j-mistral-ai" } +langchain4j-ollama = { module = "dev.langchain4j:langchain4j-ollama" } langchain4j-open-ai = { module = "dev.langchain4j:langchain4j-open-ai" } lmax-disruptor = { module = "com.lmax:disruptor", version.ref = "lmax-disruptor" } locationtech-spatial4j = { module = "org.locationtech.spatial4j:spatial4j", version.ref = "spatial4j" } diff --git a/solr/modules/language-models/build.gradle b/solr/modules/language-models/build.gradle index a4dc82fc15cb..17d9716cfd20 100644 --- a/solr/modules/language-models/build.gradle +++ b/solr/modules/language-models/build.gradle @@ -29,9 +29,12 @@ dependencies { implementation libs.apache.lucene.core implementation libs.langchain4j.core + runtimeOnly libs.langchain4j.anthropic runtimeOnly libs.langchain4j.cohere + runtimeOnly libs.langchain4j.google.ai.gemini runtimeOnly libs.langchain4j.hugging.face runtimeOnly libs.langchain4j.mistral.ai + runtimeOnly libs.langchain4j.ollama runtimeOnly libs.langchain4j.open.ai implementation libs.slf4j.api diff --git a/solr/modules/language-models/gradle.lockfile b/solr/modules/language-models/gradle.lockfile index 27221b30a3e7..1427966a1ddc 100644 --- a/solr/modules/language-models/gradle.lockfile +++ b/solr/modules/language-models/gradle.lockfile @@ -40,13 +40,16 @@ com.tdunning:t-digest:3.3=jarValidation,runtimeClasspath,runtimeLibs,solrPlatfor commons-cli:commons-cli:1.10.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath commons-codec:commons-codec:1.19.0=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath commons-io:commons-io:2.20.0=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testCompileClasspath,testRuntimeClasspath +dev.langchain4j:langchain4j-anthropic:1.9.1=jarValidation,runtimeClasspath,runtimeLibs,testRuntimeClasspath dev.langchain4j:langchain4j-bom:1.9.1=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath dev.langchain4j:langchain4j-cohere:1.9.1-beta17=jarValidation,runtimeClasspath,runtimeLibs,testRuntimeClasspath dev.langchain4j:langchain4j-core:1.9.1=compileClasspath,jarValidation,runtimeClasspath,runtimeLibs,testCompileClasspath,testRuntimeClasspath +dev.langchain4j:langchain4j-google-ai-gemini:1.9.1=jarValidation,runtimeClasspath,runtimeLibs,testRuntimeClasspath dev.langchain4j:langchain4j-http-client-jdk:1.9.1=jarValidation,runtimeClasspath,runtimeLibs,testRuntimeClasspath dev.langchain4j:langchain4j-http-client:1.9.1=jarValidation,runtimeClasspath,runtimeLibs,testRuntimeClasspath dev.langchain4j:langchain4j-hugging-face:1.9.1-beta17=jarValidation,runtimeClasspath,runtimeLibs,testRuntimeClasspath dev.langchain4j:langchain4j-mistral-ai:1.9.1=jarValidation,runtimeClasspath,runtimeLibs,testRuntimeClasspath +dev.langchain4j:langchain4j-ollama:1.9.1=jarValidation,runtimeClasspath,runtimeLibs,testRuntimeClasspath dev.langchain4j:langchain4j-open-ai:1.9.1=jarValidation,runtimeClasspath,runtimeLibs,testRuntimeClasspath io.dropwizard.metrics:metrics-annotation:4.2.26=jarValidation,testRuntimeClasspath io.dropwizard.metrics:metrics-core:4.2.26=jarValidation,runtimeClasspath,runtimeLibs,solrPlatformLibs,testRuntimeClasspath diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/anthropic-chat-model.json b/solr/modules/language-models/src/test-files/modelChatExamples/anthropic-chat-model.json new file mode 100644 index 000000000000..c4bd85ada4bb --- /dev/null +++ b/solr/modules/language-models/src/test-files/modelChatExamples/anthropic-chat-model.json @@ -0,0 +1,13 @@ +{ + "class": "dev.langchain4j.model.anthropic.AnthropicChatModel", + "name": "anthropic-chat-1", + "params": { + "baseUrl": "https://api.anthropic.com/v1", + "apiKey": "apiKey-anthropic", + "modelName": "claude-3-5-haiku-latest", + "timeout": 60, + "logRequests": true, + "logResponses": true, + "maxRetries": 5 + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/gemini-chat-model.json b/solr/modules/language-models/src/test-files/modelChatExamples/gemini-chat-model.json new file mode 100644 index 000000000000..0ac0a612daa2 --- /dev/null +++ b/solr/modules/language-models/src/test-files/modelChatExamples/gemini-chat-model.json @@ -0,0 +1,12 @@ +{ + "class": "dev.langchain4j.model.googleai.GoogleAiGeminiChatModel", + "name": "gemini-chat-1", + "params": { + "apiKey": "apiKey-gemini", + "modelName": "gemini-2.0-flash", + "timeout": 60, + "logRequests": true, + "logResponses": true, + "maxRetries": 5 + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test-files/modelChatExamples/ollama-chat-model.json b/solr/modules/language-models/src/test-files/modelChatExamples/ollama-chat-model.json new file mode 100644 index 000000000000..411a6468452b --- /dev/null +++ b/solr/modules/language-models/src/test-files/modelChatExamples/ollama-chat-model.json @@ -0,0 +1,11 @@ +{ + "class": "dev.langchain4j.model.ollama.OllamaChatModel", + "name": "ollama-chat-1", + "params": { + "baseUrl": "http://localhost:11434", + "modelName": "llama3.2", + "timeout": 60, + "logRequests": true, + "logResponses": true + } +} \ No newline at end of file diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManager.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManager.java index 49c1b70ce2e0..25880eecbcd6 100644 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManager.java +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/store/rest/TestChatModelManager.java @@ -162,6 +162,65 @@ public void loadChatModel_mistralAi_shouldLoadModelConfig() throws Exception { restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/" + modelName); } + @Test + public void loadChatModel_anthropic_shouldLoadModelConfig() throws Exception { + loadChatModel("anthropic-chat-model.json"); + + final String modelName = "anthropic-chat-1"; + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/name=='" + modelName + "'"); + assertJQ( + ManagedChatModelStore.REST_END_POINT, + "/models/[0]/params/baseUrl=='https://api.anthropic.com/v1'"); + assertJQ( + ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/apiKey=='apiKey-anthropic'"); + assertJQ( + ManagedChatModelStore.REST_END_POINT, + "/models/[0]/params/modelName=='claude-3-5-haiku-latest'"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/timeout==60"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/logRequests==true"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/logResponses==true"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/maxRetries==5"); + + restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/" + modelName); + } + + @Test + public void loadChatModel_ollama_shouldLoadModelConfig() throws Exception { + loadChatModel("ollama-chat-model.json"); + + final String modelName = "ollama-chat-1"; + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/name=='" + modelName + "'"); + assertJQ( + ManagedChatModelStore.REST_END_POINT, + "/models/[0]/params/baseUrl=='http://localhost:11434'"); + assertJQ( + ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/modelName=='llama3.2'"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/timeout==60"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/logRequests==true"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/logResponses==true"); + + restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/" + modelName); + } + + @Test + public void loadChatModel_gemini_shouldLoadModelConfig() throws Exception { + loadChatModel("gemini-chat-model.json"); + + final String modelName = "gemini-chat-1"; + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/name=='" + modelName + "'"); + assertJQ( + ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/apiKey=='apiKey-gemini'"); + assertJQ( + ManagedChatModelStore.REST_END_POINT, + "/models/[0]/params/modelName=='gemini-2.0-flash'"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/timeout==60"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/logRequests==true"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/logResponses==true"); + assertJQ(ManagedChatModelStore.REST_END_POINT, "/models/[0]/params/maxRetries==5"); + + restTestHarness.delete(ManagedChatModelStore.REST_END_POINT + "/" + modelName); + } + @Test public void loadChatModel_dummyUnsupportedParam_shouldRaiseError() throws Exception { loadChatModel("dummy-chat-model-unsupported.json", "400"); diff --git a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java index e88c8e549a33..048e073da9f0 100644 --- a/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java +++ b/solr/modules/language-models/src/test/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorTest.java @@ -165,7 +165,7 @@ public void processAtomicUpdate_shouldTriggerEnrichmentAndFetchTheStoredContent( SolrInputDocument atomicDoc = new SolrInputDocument(); atomicDoc.setField("id", "99"); - atomicDoc.setField("enriched", Map.of("set", "true")); + atomicDoc.setField("enriched", Map.of("set", true)); addWithChain(atomicDoc, "documentEnrichmentForPartialUpdates"); assertU(commit()); From 184f5797264bd6e3ce3cfca71636cf65b5a1b9cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Rinaldi?= Date: Wed, 1 Apr 2026 11:12:26 +0200 Subject: [PATCH 8/9] [llm-document-enrichment] added documentation for 'Document Enrichment with LLMs' module --- .../pages/document-enrichment-with-llms.adoc | 461 +++++++++++++++++- 1 file changed, 460 insertions(+), 1 deletion(-) diff --git a/solr/solr-ref-guide/modules/indexing-guide/pages/document-enrichment-with-llms.adoc b/solr/solr-ref-guide/modules/indexing-guide/pages/document-enrichment-with-llms.adoc index 4207a892e274..0681e99724aa 100644 --- a/solr/solr-ref-guide/modules/indexing-guide/pages/document-enrichment-with-llms.adoc +++ b/solr/solr-ref-guide/modules/indexing-guide/pages/document-enrichment-with-llms.adoc @@ -16,4 +16,463 @@ // specific language governing permissions and limitations // under the License. -This module brings the power of *Language Models* to Solr. +This module brings the power of *Large Language Models* to Solr. + +More specifically, it provides the capability, at indexing time, given a prompt and a set of input fields, of calling an +LLM through https://github.com/langchain4j/langchain4j[LangChain4j] for each document and store the result of the call +in an `outputField`, that can be of multiple types and even multivalued. + +_Without_ this module, the LLM calls must be done _outside_ Solr, before indexing. + +[IMPORTANT] +==== +This module sends your documents off to some hosted service on the internet. +There are cost, privacy, performance, and service availability implications on such a strong dependency that should be +diligently examined before employing this module in a serious way. + +==== + +At the moment a subset of LLM providers supported by LangChain4j is supported by Solr. + +*Disclaimer*: Apache Solr is *in no way* affiliated to any of these corporations or services. + +If you want to add support for additional services or improve the support for the existing ones, feel free to +contribute: + +* https://github.com/apache/solr/blob/main/CONTRIBUTING.md[Contributing to Solr] + +== Module + +This is provided via the `language-models` xref:configuration-guide:solr-modules.adoc[Solr Module] that needs to be +enabled before use. + +== Language Model Configuration + +Language Models is a module and therefore its plugins must be configured in `solrconfig.xml`. + +=== Minimum Requirements + +* Enable the `language-models` module to make the Language Models classes available on Solr's classpath. +See xref:configuration-guide:solr-modules.adoc[Solr Module] for more details. + +* An update processor, similar to the one below, must be declared in `solrconfig.xml`: ++ +[source,xml] +---- + + + string_field + summary + Summarize this content: {string_field} + model-name + + + +---- +[NOTE] +==== +If no component is configured in `solrconfig.xml`, the `ChatModel` store will not be registered and requests to `/schema/chat-model-store` will return an error. +==== + +== Document Enrichment Lifecycle + +=== Models + +* A model in this module is a chat model, that answers with text given a prompt. +* A model in this Solr module is a reference to an external API that runs the Large Language Model responsible chat +completion. + +[IMPORTANT] +==== +the Solr chat model specifies the parameters to access the APIs, the LLM doesn't run internally in Solr + +==== + +A model is described by these parameters: + + +`class`:: ++ +[%autowidth,frame=none] +|=== +s|Required |Default: none +|=== ++ +The model implementation. +Accepted values: + +* `dev.langchain4j.model.ollama.OllamaChatModel` +* `dev.langchain4j.model.mistralai.MistralAiChatModel` +* `dev.langchain4j.model.anthropic.AnthropicChatModel` +* `dev.langchain4j.model.openai.OpenAiChatModel` +* `dev.langchain4j.model.googleai.GoogleAiGeminiChatModel` + +`name`:: ++ +[%autowidth,frame=none] +|=== +s|Required |Default: none +|=== ++ +The identifier of your model, this is used by any component that intends to use the model (e.g., `DocumentEnrichmentUpdateProcessorFactory` update processor). + +`params`:: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: none +|=== ++ +Each model class has potentially different params. +Many are shared but for the full set of parameters of the model you are interested in please refer to the official documentation of the LangChain4j version included in Solr: https://docs.langchain4j.dev/category/language-models[Chat Models in LangChain4j]. + +=== Supported Models +Apache Solr uses https://github.com/langchain4j/langchain4j[LangChain4j] to support document enrichement with LLMs. +The models currently supported are: + +[tabs#supported-chat-models] +====== +Ollama:: ++ +==== + +[source,json] +---- +{ + "class": "dev.langchain4j.model.ollama.OllamaChatModel", + "name": "", + "params": { + "baseUrl": "http://localhost:11434", + "modelName": "", + "timeout": 300, + "logRequests": true, + "logResponses": true, + "maxRetries": 5 + } +} +---- +==== + +MistralAI:: ++ +==== +[source,json] +---- +{ + "class": "dev.langchain4j.model.mistralai.MistralAiChatModel", + "name": "", + "params": { + "baseUrl": "https://api.mistral.ai/v1", + "apiKey": "", + "modelName": "", + "timeout": 60, + "logRequests": true, + "logResponses": true, + "maxRetries": 5 + } +} +---- +==== +OpenAI:: ++ +==== +[source,json] +---- +{ + "class": "dev.langchain4j.model.openai.OpenAiChatModel", + "name": "", + "params": { + "baseUrl": "https://api.openai.com/v1", + "apiKey": "", + "modelName": "", + "timeout": 60, + "logRequests": true, + "logResponses": true, + "maxRetries": 5 + } +} +---- +==== + +Anthropic:: ++ +==== +[source,json] +---- +{ + "class": "dev.langchain4j.model.anthropic.AnthropicChatModel", + "name": "", + "params": { + "baseUrl": "https://api.anthropic.com/v1/", + "apiKey": "", + "modelName": "", + "timeout": 60, + "logRequests": true, + "logResponses": true, + "maxRetries": 5 + } +} +---- +==== + +Gemini:: ++ +==== +[source,json] +---- +{ + "class": "dev.langchain4j.model.googleai.GoogleAiGeminiChatModel", + "name": "", + "params": { + "baseUrl": "https://generativelanguage.googleapis.com/v1beta/", + "apiKey": "", + "modelName": "", + "timeout": 60, + "logRequests": true, + "logResponses": true, + "maxRetries": 5 + } +} +---- +==== +====== + +=== Uploading a Model + +To upload the model in a `/path/myModel.json` file, please run: + +[source,bash] +---- +curl -XPUT 'http://localhost:8983/solr/YOUR_COLLECTION/schema/chat-model-store' --data-binary "@/path/myModel.json" -H 'Content-type:application/json' +---- + +To delete the `currentModel` model: + +[source,bash] +---- +curl -XDELETE 'http://localhost:8983/solr/YOUR_COLLECTION/schema/chat-model-store/currentModel' +---- + +To view all models: + +[source,text] +http://localhost:8983/solr/YOUR_COLLECTION/schema/chat-model-store + + +.Example: /path/myModel.json +[source,json] +---- +{ + "class": "dev.langchain4j.model.openai.OpenAiChatModel", + "name": "openai-1", + "params": { + "baseUrl": "https://api.openai.com/v1", + "apiKey": "apiKey-openAI", + "modelName": "gpt-5.4-nano", + "timeout": 60, + "logRequests": true, + "logResponses": true, + "maxRetries": 5 + } +} +---- + +=== How to Trigger Document Enrichment during Indexing +To create new fields starting from existent ones in your documents at indexing time you need to configure an {solr-javadocs}/core/org/apache/solr/update/processor/UpdateRequestProcessorChain.html[Update Request Processor Chain] that includes at least one `DocumentEnrichmentUpdateProcessor` update request processor in one of the 2 following way: + +* Update processor with parameter `prompt` ++ +[source,xml] +---- + + + string_field + summary + Summarize this content: {string_field} + model-name + + + +---- + +* Update processor with parameter `promptFile`: in this case, the file `prompt.txt` must be uploaded to Solr similarly to any other configuration file (e.g., `solrconfig.xml`, `synonyms.txt`, etc.) ++ +[source,xml] +---- + + + string_field + summary + prompt.txt + model-name + + + +---- + +Exactly one of the following parameters is required: `prompt` or `promptFile`. + +Another important feature of this module is that one (or more) `inputField` needs to be injected in the prompt. This is +done by some special tokens, that are the `fieldName` surrounded by curly brackets (e.g., `{fieldName}`). These tokens +are _mandatory_ for this module to work properly. Solr will throw an error if the parameters are not properly defined. +For example, both the prompt and the content of the file prompt.txt, must contain the text '{string_field}', which +will be substituted with the content of the `string_field` field for each document. An example of a valid prompt with +multiple input fields is as follows: + +[source,xml] +---- + + + title + body + summary + Summarize with the following information. Title: {title}. Body: {body}. + chat-model + + + +---- + +The LLM response is mapped to the specified `outputField`. Note that this module only supports a subset of Solr's +available field types, which includes: + +* *String/Text*: `StrField`, `TextField` +* *Date*: `DatePointField` +* *Numeric*: `IntPointField`, `LongPointField, `FloatPointField`, `DoublePointField` +* *Boolean*: `BoolField` + + +This fields _can_ be multivalued. Solr uses structured output form LangChain4j to deal with LLMs' responses. + + +For more details on how to work with update request processors in Apache Solr, please refer to the dedicated page: +xref:configuration-guide:update-request-processors.adoc[Update Request Processor] + +[IMPORTANT] +==== +This update processor sends your document field content off to some hosted service on the internet. +There are serious performance implications that should be diligently examined before employing this component in production. +It will slow down substantially your indexing pipeline so make sure to stress test your solution before going live. + +==== + +=== Index first and enrich your documents on a second pass +LLM calls are usually quite slow, so, depending on your use case it could be a good idea to index first your documents +enrich them with new LLM-generated fields later on. + +This can be done in Solr defining two update request processors chains: one that includes all the processors you need, +excluded the `DocumentEnrichmentUpdateProcessor` (let's call it 'no-enrichment') and one that includes the +`DocumentEnrichmentUpdateProcessor` (let's call it 'enrichment'). + +[source,xml] +---- + + + ... + + ... + + ... + + + +---- + +[source,xml] +---- + + + ... + + ... + + ... + + + string_field + summary + Summarize this content: {string_field} + chat-model + + + +---- + +You would index your documents first using the 'no-enrichment' and when finished, incrementally repeat the indexing +targeting the 'enrichment' chain. + +[IMPORTANT] +==== +This implies you need to send the documents you want to index to Solr twice and re-run any other update request +processor you need, in the second chain. This has data traffic implications (you transfer your documents over the +network twice) and processing implications (if you have other update request processors in your chain, those must be +repeated the second time as we are literally replacing the indexed documents one by one). +==== + +If your use case is compatible with xref:indexing-guide:partial-document-updates.adoc[Partial Updates], you can do better: + +You still define two chains, but this time the 'enrichment' one only includes the 'DocumentEnrichmentUpdateProcessor' +(and the xref:configuration-guide:update-request-processors.adoc[Mandatory Processors] ) + +[source,xml] +---- + + + ... + + ... + + ... + + + +---- + +[source,xml] +---- + + + + string_field + summary + Summarize this content: {string_field} + chat-model + + + +---- + +[NOTE] +==== +Since partial updates are resolved by `DistributedUpdateProcessorFactory`, be sure to place +`DocumentEnrichmentUpdateProcessorFactory` afterwards so that it sees normal/complete documents. +==== + +Add to your schema a simple field that will be useful to track the enrichment process and use atomic updates: + +[source,xml] +---- + + +---- + +In the first pass just index your documents using your reliable and fast 'no-enrichment' chain. + +On the second pass, re-index all your documents using atomic updates and targeting the 'enrichment' chain: + +[source,json] +---- +{ + "id":"mydoc", + "enriched": { + "set": true + } +} +---- + +What will happen is that internally Solr fetches the stored content of the docs to update, all the existing fields are +retrieved and a re-indexing happens, targeting the 'enrichment' chain that will add the LLM-generated fields and set the +boolean `enriched` field to `true`. + +Faceting or querying on the boolean `enriched` field can also give you a quick idea on how many documents have been +enriched with the new generated fields. From 1b7c972756b2e0a76ebb9cf84a8be9a56cbc7eee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Rinaldi?= Date: Wed, 1 Apr 2026 17:29:26 +0200 Subject: [PATCH 9/9] [llm-document-enrichment] cleanup of DocumentEnrichmentUpdateProcessorFactory --- ...umentEnrichmentUpdateProcessorFactory.java | 28 ++++++++----------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java index 659a20897eb4..93d730c852f7 100644 --- a/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java +++ b/solr/modules/language-models/src/java/org/apache/solr/languagemodels/documentenrichment/update/processor/DocumentEnrichmentUpdateProcessorFactory.java @@ -112,14 +112,11 @@ public class DocumentEnrichmentUpdateProcessorFactory extends UpdateRequestProce private static final String MODEL_NAME = "model"; private static final Pattern PLACEHOLDER_PATTERN = Pattern.compile("\\{([^}]+)\\}"); - private ManagedChatModelStore modelStore = null; - private List inputFields; private String outputField; - private String prompt; + private String promptText; private String promptFile; private String modelName; - private SolrParams params; @Override public void init(final NamedList args) { @@ -133,7 +130,7 @@ public void init(final NamedList args) { } inputFields = List.copyOf(fieldNames); - params = args.toSolrParams(); + SolrParams params = args.toSolrParams(); RequiredSolrParams required = params.required(); outputField = required.get(OUTPUT_FIELD_PARAM); modelName = required.get(MODEL_NAME); @@ -153,7 +150,7 @@ public void init(final NamedList args) { } if (inlinePrompt != null) { validatePromptPlaceholders(inlinePrompt, inputFields); - this.prompt = inlinePrompt; + this.promptText = inlinePrompt; } this.promptFile = promptFilePath; } @@ -164,25 +161,22 @@ public void inform(SolrCore core) { ManagedChatModelStore.registerManagedChatModelStore(solrResourceLoader, this); if (promptFile != null) { try (InputStream is = solrResourceLoader.openResource(promptFile)) { - prompt = new String(is.readAllBytes(), StandardCharsets.UTF_8).trim(); + promptText = new String(is.readAllBytes(), StandardCharsets.UTF_8).trim(); } catch (IOException e) { throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, "Cannot read prompt file: " + promptFile, e); } - validatePromptPlaceholders(prompt, inputFields); + validatePromptPlaceholders(promptText, inputFields); } } @Override public void onManagedResourceInitialized(NamedList args, ManagedResource res) throws SolrException { - if (res instanceof ManagedChatModelStore) { - modelStore = (ManagedChatModelStore) res; - } - if (modelStore != null) { - modelStore.loadStoredModels(); + if (res instanceof ManagedChatModelStore store) { + store.loadStoredModels(); } } @@ -203,8 +197,8 @@ public UpdateRequestProcessor getInstance( ResponseFormat responseFormat = buildResponseFormat(outputFieldSchema); boolean multiValued = outputFieldSchema.multiValued(); - ManagedChatModelStore modelStore = ManagedChatModelStore.getManagedModelStore(req.getCore()); - SolrChatModel chatModel = modelStore.getModel(modelName); + ManagedChatModelStore store = ManagedChatModelStore.getManagedModelStore(req.getCore()); + SolrChatModel chatModel = store.getModel(modelName); if (chatModel == null) { throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, @@ -215,7 +209,7 @@ public UpdateRequestProcessor getInstance( } return new DocumentEnrichmentUpdateProcessor( - inputFields, outputField, prompt, chatModel, multiValued, responseFormat, req, next); + inputFields, outputField, promptText, chatModel, multiValued, responseFormat, req, next); } /** @@ -306,7 +300,7 @@ public String getOutputField() { } public String getPrompt() { - return prompt; + return promptText; } public String getModelName() {