Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
import java.util.Set;
import java.util.concurrent.ConcurrentMap;

import org.apache.asterix.api.http.IApiServerRegistrant;
import org.apache.asterix.api.http.IQueryWebServerRegistrant;
import org.apache.asterix.api.http.server.ActiveRequestsServlet;
import org.apache.asterix.api.http.server.ActiveStatsApiServlet;
Expand Down Expand Up @@ -361,6 +362,9 @@ protected HttpServer setupJSONAPIServer(ExternalProperties externalProperties) t
addServlet(jsonAPIServer, Servlets.CLUSTER_STATE_CC_DETAIL); // must not precede add of CLUSTER_STATE
addServlet(jsonAPIServer, Servlets.DIAGNOSTICS);
addServlet(jsonAPIServer, Servlets.ACTIVE_STATS);
// Load extension servlets registered via ServiceLoader (e.g., NL2SQL++ from asterix-spidersilk)
ServiceLoader.load(IApiServerRegistrant.class)
.forEach(registrant -> registrant.register(appCtx, jsonAPIServer));
return jsonAPIServer;
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.asterix.api.http;

/**
* Extension point for registering servlets on the JSON API server (default port 19002).
* Implementations are discovered via {@link java.util.ServiceLoader}.
*
* To register a servlet, create an implementation of this interface and declare it in:
* {@code META-INF/services/org.apache.asterix.api.http.IApiServerRegistrant}
*
* @see IQueryWebServerRegistrant for the equivalent mechanism on the query web server (port 19006)
*/
public interface IApiServerRegistrant extends IServletRegistrant {
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ public class Servlets {
public static final String QUERY_STATUS = "/query/service/status/*";
public static final String QUERY_RESULT = "/query/service/result/*";
public static final String QUERY_SERVICE = "/query/service";
public static final String NL2SQL_SERVICE = "/query/nl2sql";
public static final String CONNECTOR = "/connector";
public static final String REBALANCE = "/admin/rebalance";
public static final String SHUTDOWN = "/admin/shutdown";
Expand Down
44 changes: 44 additions & 0 deletions asterixdb/asterix-spidersilk/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,56 @@
<artifactId>asterix-spidersilk</artifactId>
<name>asterix-spidersilk</name>

<properties>
<root.dir>${basedir}/..</root.dir>
</properties>

<parent>
<groupId>org.apache.asterix</groupId>
<artifactId>apache-asterixdb</artifactId>
<version>0.9.10-SNAPSHOT</version>
</parent>

<dependencies>
<dependency>
<groupId>org.apache.asterix</groupId>
<artifactId>asterix-common</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.asterix</groupId>
<artifactId>asterix-metadata</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.asterix</groupId>
<artifactId>asterix-om</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hyracks</groupId>
<artifactId>hyracks-http</artifactId>
</dependency>
<dependency>
<groupId>io.netty</groupId>
<artifactId>netty-codec-http</artifactId>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-api</artifactId>
</dependency>
<!-- Test dependencies -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
</dependencies>

<build>
<plugins>
<plugin>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.asterix.spidersilk.api;

/**
* Core interface for natural language to SQL++ translation.
*
* Implementations are model-agnostic: any LLM backend (OpenAI, Ollama, etc.)
* can be used by providing a different implementation. The LangChain4j framework
* is used internally to manage LLM communication, prompt templating, and retries.
*
* <p>Usage example:
* <pre>
* INl2SqlTranslator translator = new LangChain4jTranslator(config);
* SchemaContext schema = schemaBuilder.buildContext("TinySocial");
* String sqlpp = translator.translate("Find all tweets mentioning AsterixDB", schema);
* // sqlpp => "SELECT VALUE t FROM TweetMessages t WHERE t.message_text LIKE '%AsterixDB%'"
* </pre>
*/
public interface INl2SqlTranslator {

/**
* Translates a natural language query into an executable SQL++ statement.
*
* The implementation should:
* <ol>
* <li>Build a schema-aware prompt from {@code schemaContext}</li>
* <li>Call the configured LLM to generate a SQL++ candidate</li>
* <li>Validate the candidate using the AsterixDB SQL++ parser</li>
* <li>Retry with error feedback if validation fails (up to a configured max)</li>
* </ol>
*
* @param naturalLanguage the user's natural language query (non-null, non-empty)
* @param schemaContext schema information for the target dataverse; may be
* {@code null} if no dataverse is specified
* @return a syntactically valid SQL++ query string
* @throws Nl2SqlException if translation fails after exhausting retries,
* or if the LLM service is unavailable
*/
String translate(String naturalLanguage, SchemaContext schemaContext) throws Nl2SqlException;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.asterix.spidersilk.api;

/**
* Thrown when natural language to SQL++ translation fails.
* Common causes:
* <ul>
* <li>LLM service unavailable or misconfigured</li>
* <li>Generated SQL++ fails syntax validation after max retries</li>
* <li>Input natural language query is ambiguous or unsupported</li>
* </ul>
*/
public class Nl2SqlException extends Exception {

private static final long serialVersionUID = 1L;

public Nl2SqlException(String message) {
super(message);
}

public Nl2SqlException(String message, Throwable cause) {
super(message, cause);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.asterix.spidersilk.api;

import java.util.Collections;
import java.util.List;

/**
* Encapsulates the database schema information extracted from AsterixDB metadata.
* This context is injected into the LLM prompt to enable schema-aware SQL++ generation.
*
* The schema is extracted from the {@code asterix-metadata} module via MetadataManager,
* including Dataset definitions, type information, and index metadata.
*/
public class SchemaContext {

private final String dataverse;
private final List<String> datasetDescriptions;

public SchemaContext(String dataverse, List<String> datasetDescriptions) {
this.dataverse = dataverse;
this.datasetDescriptions = Collections.unmodifiableList(new java.util.ArrayList<>(datasetDescriptions));
}

/**
* @return the target dataverse name
*/
public String getDataverse() {
return dataverse;
}

/**
* @return human-readable schema descriptions for each dataset in the dataverse,
* formatted for inclusion in an LLM prompt
*/
public List<String> getDatasetDescriptions() {
return datasetDescriptions;
}

/**
* Renders the schema context as a prompt-ready string.
* Example output:
* <pre>
* Dataverse: TinySocial
* Dataset TweetMessages (tweetid: bigint, sender-location: point, text: string, ...)
* Dataset FacebookUsers (id: bigint, name: string, employment: [object], ...)
* </pre>
*/
public String toPromptString() {
StringBuilder sb = new StringBuilder();
sb.append("Dataverse: ").append(dataverse).append('\n');
for (String desc : datasetDescriptions) {
sb.append(desc).append('\n');
}
return sb.toString();
}

@Override
public String toString() {
return "SchemaContext{dataverse='" + dataverse + "', datasets=" + datasetDescriptions.size() + "}";
}
}
Loading