keyPath : primaryKeys) {
+ if (!keyPath.isEmpty()) {
+ pkFields.add(keyPath.get(0));
+ }
+ }
+ return pkFields;
+ }
+}
diff --git a/asterixdb/asterix-spidersilk/src/main/java/org/apache/asterix/spidersilk/servlet/NL2SqlServlet.java b/asterixdb/asterix-spidersilk/src/main/java/org/apache/asterix/spidersilk/servlet/NL2SqlServlet.java
new file mode 100644
index 00000000000..6e748c3de22
--- /dev/null
+++ b/asterixdb/asterix-spidersilk/src/main/java/org/apache/asterix/spidersilk/servlet/NL2SqlServlet.java
@@ -0,0 +1,141 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.spidersilk.servlet;
+
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.util.concurrent.ConcurrentMap;
+
+import org.apache.asterix.spidersilk.api.INl2SqlTranslator;
+import org.apache.asterix.spidersilk.api.Nl2SqlException;
+import org.apache.asterix.spidersilk.api.SchemaContext;
+import org.apache.hyracks.http.api.IServletRequest;
+import org.apache.hyracks.http.api.IServletResponse;
+import org.apache.hyracks.http.server.AbstractServlet;
+import org.apache.hyracks.http.server.utils.HttpUtil;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.node.ObjectNode;
+
+import io.netty.handler.codec.http.HttpResponseStatus;
+
+/**
+ * HTTP servlet exposing the NL2SQL++ translation API on the JSON API server.
+ *
+ * Endpoint: {@code POST /query/nl2sql}
+ *
+ *
Request parameters (form or JSON body):
+ *
+ * - {@code statement} (required) — the natural language query
+ * - {@code dataverse} (optional) — target dataverse for schema context
+ *
+ *
+ * Response (JSON):
+ *
+ * {
+ * "sqlpp": "SELECT VALUE t FROM TweetMessages t WHERE ...",
+ * "status": "success"
+ * }
+ *
+ *
+ * When the {@code INl2SqlTranslator} implementation is not yet available,
+ * the endpoint returns HTTP 501 (Not Implemented) with an informative message,
+ * allowing the servlet to be registered and tested without a live LLM backend.
+ */
+public class NL2SqlServlet extends AbstractServlet {
+
+ private static final Logger LOGGER = LogManager.getLogger();
+ protected static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+ /** Request parameter name for the natural language query. */
+ public static final String PARAM_STATEMENT = "statement";
+ /** Optional request parameter specifying the target dataverse. */
+ public static final String PARAM_DATAVERSE = "dataverse";
+
+ /**
+ * The translator is injected at construction time and may be {@code null}
+ * until a concrete LLM implementation is provided (Phase 2 of development).
+ */
+ private final INl2SqlTranslator translator;
+
+ public NL2SqlServlet(ConcurrentMap ctx, String[] paths, INl2SqlTranslator translator) {
+ super(ctx, paths);
+ this.translator = translator;
+ }
+
+ @Override
+ protected void post(IServletRequest request, IServletResponse response) throws IOException {
+ String naturalLanguage = request.getParameter(PARAM_STATEMENT);
+ String dataverse = request.getParameter(PARAM_DATAVERSE);
+
+ if (naturalLanguage == null || naturalLanguage.isBlank()) {
+ sendError(request, response, HttpResponseStatus.BAD_REQUEST, "Parameter 'statement' is required.");
+ return;
+ }
+
+ if (translator == null) {
+ sendError(request, response, HttpResponseStatus.NOT_IMPLEMENTED,
+ "NL2SQL++ translator is not yet configured. "
+ + "Set nl2sql.model.type and related properties in cc.conf and restart the server.");
+ return;
+ }
+
+ try {
+ // Build schema context from metadata if a dataverse is provided.
+ // SchemaContextBuilder integration will be added in the next phase.
+ SchemaContext schemaContext = dataverse != null ? new SchemaContext(dataverse, java.util.List.of()) : null;
+
+ String sqlpp = translator.translate(naturalLanguage, schemaContext);
+
+ HttpUtil.setContentType(response, HttpUtil.ContentType.APPLICATION_JSON, request);
+ response.setStatus(HttpResponseStatus.OK);
+
+ ObjectNode result = OBJECT_MAPPER.createObjectNode();
+ result.put("sqlpp", sqlpp);
+ result.put("status", "success");
+
+ PrintWriter writer = response.writer();
+ writer.write(result.toString());
+ writer.flush();
+
+ } catch (Nl2SqlException e) {
+ LOGGER.warn("NL2SQL translation failed for query: {}", naturalLanguage, e);
+ sendError(request, response, HttpResponseStatus.INTERNAL_SERVER_ERROR, e.getMessage());
+ }
+ }
+
+ @Override
+ protected void get(IServletRequest request, IServletResponse response) throws IOException {
+ sendError(request, response, HttpResponseStatus.METHOD_NOT_ALLOWED, "Use POST with parameter 'statement'.");
+ }
+
+ private void sendError(IServletRequest request, IServletResponse response, HttpResponseStatus status,
+ String message) throws IOException {
+ HttpUtil.setContentType(response, HttpUtil.ContentType.APPLICATION_JSON, request);
+ response.setStatus(status);
+ ObjectNode error = OBJECT_MAPPER.createObjectNode();
+ error.put("status", "error");
+ error.put("message", message);
+ PrintWriter writer = response.writer();
+ writer.write(error.toString());
+ writer.flush();
+ }
+}
diff --git a/asterixdb/asterix-spidersilk/src/main/java/org/apache/asterix/spidersilk/servlet/NL2SqlServletRegistrant.java b/asterixdb/asterix-spidersilk/src/main/java/org/apache/asterix/spidersilk/servlet/NL2SqlServletRegistrant.java
new file mode 100644
index 00000000000..9f0e1a74559
--- /dev/null
+++ b/asterixdb/asterix-spidersilk/src/main/java/org/apache/asterix/spidersilk/servlet/NL2SqlServletRegistrant.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.spidersilk.servlet;
+
+import org.apache.asterix.api.http.IApiServerRegistrant;
+import org.apache.asterix.common.dataflow.ICcApplicationContext;
+import org.apache.asterix.common.utils.Servlets;
+import org.apache.hyracks.http.server.HttpServer;
+
+/**
+ * Registers the {@link NL2SqlServlet} on the JSON API server via the
+ * {@link IApiServerRegistrant} ServiceLoader extension point.
+ *
+ * This class is discovered automatically at runtime through:
+ * {@code META-INF/services/org.apache.asterix.api.http.IApiServerRegistrant}
+ *
+ * No modification to {@code CCApplication.java} is required beyond the
+ * one-time addition of the ServiceLoader call in {@code setupJSONAPIServer()}.
+ */
+public class NL2SqlServletRegistrant implements IApiServerRegistrant {
+
+ @Override
+ public void register(ICcApplicationContext appCtx, HttpServer apiServer) {
+ // The translator is null here; it will be initialized from configuration
+ // in a follow-up phase when LangChain4j integration is added.
+ apiServer.addServlet(new NL2SqlServlet(apiServer.ctx(), new String[] { Servlets.NL2SQL_SERVICE }, null));
+ }
+}
diff --git a/asterixdb/asterix-spidersilk/src/main/resources/META-INF/services/org.apache.asterix.api.http.IApiServerRegistrant b/asterixdb/asterix-spidersilk/src/main/resources/META-INF/services/org.apache.asterix.api.http.IApiServerRegistrant
new file mode 100644
index 00000000000..0a4c6a71fde
--- /dev/null
+++ b/asterixdb/asterix-spidersilk/src/main/resources/META-INF/services/org.apache.asterix.api.http.IApiServerRegistrant
@@ -0,0 +1,19 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+org.apache.asterix.spidersilk.servlet.NL2SqlServletRegistrant
diff --git a/asterixdb/asterix-spidersilk/src/test/java/org/apache/asterix/spidersilk/NL2SqlServletTest.java b/asterixdb/asterix-spidersilk/src/test/java/org/apache/asterix/spidersilk/NL2SqlServletTest.java
new file mode 100644
index 00000000000..9ee95c7cedd
--- /dev/null
+++ b/asterixdb/asterix-spidersilk/src/test/java/org/apache/asterix/spidersilk/NL2SqlServletTest.java
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.spidersilk;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.asterix.spidersilk.api.INl2SqlTranslator;
+import org.apache.asterix.spidersilk.api.Nl2SqlException;
+import org.apache.asterix.spidersilk.api.SchemaContext;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * Unit tests for the NL2SQL++ module skeleton.
+ *
+ * These tests verify the core API contracts without requiring a running AsterixDB
+ * instance or a live LLM service. Full integration tests will be added in Phase 2
+ * when LangChain4j translation is implemented.
+ */
+public class NL2SqlServletTest {
+
+ @Test
+ public void testSchemaContextToPromptString() {
+ SchemaContext ctx =
+ new SchemaContext("TinySocial", Arrays.asList("Dataset TweetMessages (tweetid: bigint, text: string)",
+ "Dataset FacebookUsers (id: bigint, name: string)"));
+
+ String prompt = ctx.toPromptString();
+
+ Assert.assertTrue("Prompt should contain dataverse name", prompt.contains("TinySocial"));
+ Assert.assertTrue("Prompt should contain TweetMessages dataset", prompt.contains("TweetMessages"));
+ Assert.assertTrue("Prompt should contain FacebookUsers dataset", prompt.contains("FacebookUsers"));
+ }
+
+ @Test
+ public void testSchemaContextImmutable() {
+ List descriptions = new ArrayList<>();
+ descriptions.add("Dataset Foo (id: bigint)");
+ SchemaContext ctx = new SchemaContext("TestDV", descriptions);
+
+ // Modifying the original list should not affect the SchemaContext
+ descriptions.add("Dataset Bar (id: bigint)");
+
+ Assert.assertEquals("SchemaContext should hold an immutable copy of the descriptions", 1,
+ ctx.getDatasetDescriptions().size());
+ }
+
+ @Test
+ public void testNl2SqlExceptionMessage() {
+ Nl2SqlException ex = new Nl2SqlException("LLM service unavailable");
+ Assert.assertEquals("LLM service unavailable", ex.getMessage());
+ }
+
+ @Test
+ public void testNl2SqlExceptionWithCause() {
+ RuntimeException cause = new RuntimeException("connection refused");
+ Nl2SqlException ex = new Nl2SqlException("Translation failed", cause);
+
+ Assert.assertEquals("Translation failed", ex.getMessage());
+ Assert.assertSame(cause, ex.getCause());
+ }
+
+ /**
+ * Verifies that a mock implementation of INl2SqlTranslator correctly
+ * returns a SQL++ string. This ensures the interface contract is stable.
+ */
+ @Test
+ public void testTranslatorInterfaceContract() throws Nl2SqlException {
+ INl2SqlTranslator mockTranslator =
+ (nl, schema) -> "SELECT VALUE t FROM TweetMessages t WHERE t.text LIKE '%" + nl + "%'";
+
+ SchemaContext ctx =
+ new SchemaContext("TinySocial", Arrays.asList("Dataset TweetMessages (tweetid: bigint, text: string)"));
+
+ String result = mockTranslator.translate("AsterixDB", ctx);
+
+ Assert.assertNotNull("Translator must return a non-null SQL++ string", result);
+ Assert.assertTrue("Result should reference the dataset", result.contains("TweetMessages"));
+ Assert.assertTrue("Result should be a SELECT statement", result.startsWith("SELECT"));
+ }
+}
diff --git a/asterixdb/asterix-spidersilk/src/test/java/org/apache/asterix/spidersilk/schema/SchemaContextBuilderTest.java b/asterixdb/asterix-spidersilk/src/test/java/org/apache/asterix/spidersilk/schema/SchemaContextBuilderTest.java
new file mode 100644
index 00000000000..5bbf76743fa
--- /dev/null
+++ b/asterixdb/asterix-spidersilk/src/test/java/org/apache/asterix/spidersilk/schema/SchemaContextBuilderTest.java
@@ -0,0 +1,197 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.spidersilk.schema;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.asterix.om.types.AOrderedListType;
+import org.apache.asterix.om.types.ARecordType;
+import org.apache.asterix.om.types.AUnionType;
+import org.apache.asterix.om.types.BuiltinType;
+import org.apache.asterix.om.types.IAType;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * Unit tests for the PR-2 schema extraction components.
+ *
+ * These tests exercise {@link DatasetSchemaFormatter} and {@link DatasetSchema}
+ * using ADM type objects constructed directly in-memory, with no dependency on
+ * a running AsterixDB instance or MetadataManager.
+ *
+ * Integration tests that verify the full {@link SchemaContextBuilder#build(String)}
+ * path against a live AsterixDB + TinySocial dataset are left for the integration
+ * test suite (require a running cluster).
+ */
+public class SchemaContextBuilderTest {
+
+ private final DatasetSchemaFormatter formatter = new DatasetSchemaFormatter();
+
+ // -------------------------------------------------------------------------
+ // DatasetSchemaFormatter tests
+ // -------------------------------------------------------------------------
+
+ @Test
+ public void testFormatPrimitiveTypes() {
+ Assert.assertEquals("int64", formatter.formatType(BuiltinType.AINT64));
+ Assert.assertEquals("string", formatter.formatType(BuiltinType.ASTRING));
+ Assert.assertEquals("boolean", formatter.formatType(BuiltinType.ABOOLEAN));
+ Assert.assertEquals("double", formatter.formatType(BuiltinType.ADOUBLE));
+ }
+
+ @Test
+ public void testFormatNullType() {
+ Assert.assertEquals("any", formatter.formatType(null));
+ }
+
+ @Test
+ public void testFormatOrderedList() {
+ // [string] — ordered list of strings (SQL++ array)
+ AOrderedListType listType = new AOrderedListType(BuiltinType.ASTRING, "string-list");
+ String result = formatter.formatType(listType);
+ Assert.assertEquals("[string]", result);
+ }
+
+ @Test
+ public void testFormatNullableField() {
+ // string? — union of string + missing (nullable field)
+ AUnionType unionType =
+ new AUnionType(Arrays.asList(BuiltinType.ASTRING, BuiltinType.AMISSING), "nullable-string");
+ String result = formatter.formatType(unionType);
+ Assert.assertEquals("string?", result);
+ }
+
+ @Test
+ public void testFormatNestedRecord() {
+ // Nested record: { street: string, city: string }
+ ARecordType addressType = new ARecordType("AddressType", new String[] { "street", "city" },
+ new IAType[] { BuiltinType.ASTRING, BuiltinType.ASTRING }, false);
+
+ // Top-level record with a nested field
+ ARecordType personType = new ARecordType("PersonType", new String[] { "name", "address" },
+ new IAType[] { BuiltinType.ASTRING, addressType }, false);
+
+ // formatType on the top-level record (depth=0) should not wrap in braces
+ String result = formatter.formatType(personType);
+ Assert.assertTrue("Should contain nested field 'address'", result.contains("address"));
+ Assert.assertTrue("Should contain nested field 'street'", result.contains("street"));
+ Assert.assertTrue("Should contain nested field 'city'", result.contains("city"));
+ }
+
+ @Test
+ public void testFormatTweetMessagesSchema() {
+ // Mimics the TinySocial TweetMessages item type
+ AOrderedListType topicsType = new AOrderedListType(BuiltinType.ASTRING, "topics-list");
+ ARecordType tweetType = new ARecordType("TweetMessageType",
+ new String[] { "tweetid", "sender-location", "send-time", "referred-topics", "message-text",
+ "author-id" },
+ new IAType[] { BuiltinType.AINT64, BuiltinType.ANY, BuiltinType.ADATETIME, topicsType,
+ BuiltinType.ASTRING, BuiltinType.AINT64 },
+ false);
+
+ String result = formatter.formatType(tweetType);
+ Assert.assertTrue(result.contains("tweetid"));
+ Assert.assertTrue(result.contains("int64"));
+ Assert.assertTrue(result.contains("message-text"));
+ Assert.assertTrue(result.contains("referred-topics"));
+ Assert.assertTrue(result.contains("[string]"));
+ }
+
+ // -------------------------------------------------------------------------
+ // ColumnInfo tests
+ // -------------------------------------------------------------------------
+
+ @Test
+ public void testColumnInfoPrimaryKeyDescription() {
+ ColumnInfo pk = new ColumnInfo("tweetid", "bigint", true);
+ Assert.assertEquals("tweetid: bigint [PK]", pk.toDescriptionString());
+ }
+
+ @Test
+ public void testColumnInfoNonPrimaryKeyDescription() {
+ ColumnInfo col = new ColumnInfo("message-text", "string", false);
+ Assert.assertEquals("message-text: string", col.toDescriptionString());
+ }
+
+ // -------------------------------------------------------------------------
+ // DatasetSchema tests
+ // -------------------------------------------------------------------------
+
+ @Test
+ public void testDatasetSchemaDescriptionString() {
+ List columns = Arrays.asList(new ColumnInfo("tweetid", "int64", true),
+ new ColumnInfo("message-text", "string", false), new ColumnInfo("author-id", "int64", false));
+
+ DatasetSchema schema = new DatasetSchema("TweetMessages", columns);
+ String desc = schema.toDescriptionString();
+
+ Assert.assertTrue(desc.startsWith("Dataset TweetMessages ("));
+ Assert.assertTrue(desc.contains("tweetid: int64 [PK]"));
+ Assert.assertTrue(desc.contains("message-text: string"));
+ Assert.assertTrue(desc.endsWith(")"));
+ }
+
+ @Test
+ public void testDatasetSchemaFallsBackToAllColumnsBeforePruning() {
+ List columns =
+ Arrays.asList(new ColumnInfo("id", "bigint", true), new ColumnInfo("name", "string", false));
+
+ DatasetSchema schema = new DatasetSchema("Users", columns);
+
+ // Before pruning, getEffectiveColumns() returns the full list
+ Assert.assertEquals(2, schema.getEffectiveColumns().size());
+ }
+
+ @Test
+ public void testDatasetSchemaUsesPrunedColumnsAfterPruning() {
+ List allColumns = Arrays.asList(new ColumnInfo("id", "bigint", true),
+ new ColumnInfo("name", "string", false), new ColumnInfo("created-at", "datetime", false));
+
+ DatasetSchema schema = new DatasetSchema("Users", allColumns);
+
+ // Simulate ColumnPruner keeping only id and name
+ List pruned =
+ Arrays.asList(new ColumnInfo("id", "bigint", true), new ColumnInfo("name", "string", false));
+ schema.setPrunedColumns(pruned);
+
+ Assert.assertEquals(2, schema.getEffectiveColumns().size());
+ String desc = schema.toDescriptionString();
+ Assert.assertFalse("Pruned field should not appear", desc.contains("created-at"));
+ }
+
+ @Test
+ public void testDatasetSchemaImmutableAllColumns() {
+ List mutable = new java.util.ArrayList<>();
+ mutable.add(new ColumnInfo("id", "bigint", true));
+ DatasetSchema schema = new DatasetSchema("Foo", mutable);
+
+ // Modifying original list must not affect the schema
+ mutable.add(new ColumnInfo("extra", "string", false));
+ Assert.assertEquals(1, schema.getAllColumns().size());
+ }
+
+ @Test
+ public void testEmptyDatasetDescription() {
+ DatasetSchema schema = new DatasetSchema("EmptyDataset", Collections.emptyList());
+ String desc = schema.toDescriptionString();
+ Assert.assertEquals("Dataset EmptyDataset ()", desc);
+ }
+}