ThemisDB/include/analytics/ml_serving.h at develop · makr-code/ThemisDB · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
/*
╔═════════════════════════════════════════════════════════════════════╗
║ ThemisDB - Hybrid Database System                                   ║
╠═════════════════════════════════════════════════════════════════════╣
  File:            ml_serving.h                                       ║
  Version:         0.0.2                                              ║
  Last Modified:   2026-03-09 03:52:32                                ║
  Author:          unknown                                            ║
╠═════════════════════════════════════════════════════════════════════╣
  Quality Metrics:                                                    ║
    • Maturity Level:  🟢 PRODUCTION-READY                             ║
    • Quality Score:   100.0/100                                      ║
    • Total Lines:     360                                            ║
    • Open Issues:     TODOs: 0, Stubs: 0                             ║
╠═════════════════════════════════════════════════════════════════════╣
  Revision History:                                                   ║
    • 2a1fb0423  2026-03-03  Merge branch 'develop' into copilot/audit-src-module-docu... ║
    • 197b8b5b1  2026-02-24  feat(analytics): integrate ONNX Runtime and TensorFlow Se... ║
╠═════════════════════════════════════════════════════════════════════╣
  Status: ✅ Production Ready                                          ║
╚═════════════════════════════════════════════════════════════════════╝
 */

/**
 * ThemisDB Analytics ML Serving Integration
 *
 * Provides a unified abstraction for integrating external ML inference
 * engines with the ThemisDB analytics module.  Two backends are supported:
 *
 *   ONNXServingBackend  – Runs ONNX models locally via ONNX Runtime.
 *                         Requires compile-time flag THEMIS_HAS_ONNX=1 and
 *                         the onnxruntime library (vcpkg: onnxruntime).
 *
 *   TFServingBackend    – Calls a TensorFlow Serving instance over its REST
 *                         API (POST /v1/models/<name>:predict).
 *                         Requires compile-time flag THEMIS_HAS_TF_SERVING=1
 *                         and libcurl (THEMIS_HAS_CURL=1).
 *
 * When neither backend is available the MLServingClient returns an
 * UNAVAILABLE error so callers can degrade gracefully.
 *
 * DataPoint integration:
 *   MLServingClient::inferFromDataPoint() converts the numeric fields of a
 *   DataPoint (from analytics/anomaly_detection.h) into a flat float32 tensor
 *   and returns the model output as an MLServingResponse.  The field names are
 *   sorted deterministically (alphabetical) to match training-time conventions.
 *
 * Thread-safety:
 *   - MLServingClient is thread-safe after construction.
 *   - ONNXServingBackend::infer() is thread-safe (ONNX Runtime sessions are
 *     safe for concurrent inference).
 *   - TFServingBackend::infer() is thread-safe (libcurl handles are
 *     per-call).
 *
 * Copyright (c) 2025 VCC-URN Project
 * SPDX-License-Identifier: Apache-2.0
 */

#pragma once

#include <cstdint>
#include <map>
#include <memory>
#include <mutex>
#include <optional>
#include <string>
#include <vector>

// Re-use DataPoint for seamless integration with the anomaly detection module.
#include "analytics/anomaly_detection.h"

namespace themisdb {
namespace analytics {

// ============================================================================
// Forward declarations
// ============================================================================

class IMLServingBackend;
class ONNXServingBackend;
class TFServingBackend;
class MLServingClient;

// ============================================================================
// Tensor representation
// ============================================================================

/**
 * A named, shaped float32 tensor exchanged with an ML backend.
 *
 * All numeric data is stored as float32 to match the most common ML
 * framework convention.  Callers that need higher precision should cast
 * externally.
 */
struct MLTensor {
    std::string            name;   ///< Tensor name (must match model input/output name)
    std::vector<int64_t>   shape;  ///< Dimensions, e.g. {batch_size, num_features}
    std::vector<float>     data;   ///< Row-major float32 values

    /** Total number of elements (product of shape dimensions). */
    std::size_t numElements() const noexcept;
};

// ============================================================================
// Request / Response
// ============================================================================

/**
 * Inference request sent to an MLServingBackend.
 */
struct MLServingRequest {
    std::string              model_name;    ///< Model identifier
    std::string              model_version; ///< Version tag ("" = latest)
    std::vector<MLTensor>    inputs;        ///< Named input tensors
};

/** Status codes returned by MLServingResponse. */
enum class MLServingStatus {
    OK,           ///< Inference completed successfully
    UNAVAILABLE,  ///< Backend or model not available
    INVALID_INPUT,///< Input shape/type mismatch
    BACKEND_ERROR ///< Internal backend error (see error_message)
};

/**
 * Inference response from an MLServingBackend.
 */
struct MLServingResponse {
    MLServingStatus       status       = MLServingStatus::OK;
    std::string           error_message;
    std::vector<MLTensor> outputs;      ///< Named output tensors
    double                latency_ms   = 0.0; ///< End-to-end call latency

    /** Returns true when status == OK. */
    bool ok() const noexcept { return status == MLServingStatus::OK; }
};

// ============================================================================
// Backend interface
// ============================================================================

/**
 * Abstract interface implemented by each ML inference backend.
 */
class IMLServingBackend {
public:
    virtual ~IMLServingBackend() = default;

    /** Human-readable name of this backend (e.g. "ONNX Runtime 1.17.0"). */
    virtual std::string backendName() const = 0;

    /** Returns true if the backend is usable (libraries found, server reachable, etc.). */
    virtual bool isAvailable() const = 0;

    /** Run synchronous inference. */
    virtual MLServingResponse infer(const MLServingRequest& req) = 0;
};

// ============================================================================
// ONNX Runtime backend
// ============================================================================

/**
 * Configuration for ONNXServingBackend.
 */
struct ONNXBackendConfig {
    std::string model_directory = "./models"; ///< Directory searched for *.onnx files
    bool        enable_cpu      = true;       ///< Use CPU execution provider
    bool        enable_cuda     = false;      ///< Use CUDA execution provider (if available)
    int         intra_op_threads = 0;         ///< 0 = auto (hardware_concurrency)
    int         inter_op_threads = 0;         ///< 0 = auto
    std::size_t memory_limit_mb  = 0;         ///< 0 = unlimited
};

/**
 * ONNXServingBackend – loads and runs ONNX models via ONNX Runtime.
 *
 * When THEMIS_HAS_ONNX is defined the backend uses the real
 * onnxruntime C++ API.  When it is absent the backend reports
 * isAvailable() == false and every infer() call returns UNAVAILABLE.
 *
 * Models are loaded lazily on the first call to infer() with a new
 * model_name.  The model file is resolved as:
 *   <model_directory>/<model_name>.onnx
 *
 * Thread-safety: multiple threads may call infer() concurrently.
 */
class ONNXServingBackend : public IMLServingBackend {
public:
    explicit ONNXServingBackend(const ONNXBackendConfig& config = {});
    ~ONNXServingBackend() override;

    std::string backendName() const override;
    bool        isAvailable() const override;
    MLServingResponse infer(const MLServingRequest& req) override;

private:
    struct Impl;
    std::unique_ptr<Impl> impl_;
};

// ============================================================================
// TensorFlow Serving backend
// ============================================================================

/**
 * Configuration for TFServingBackend.
 */
struct TFServingConfig {
    std::string base_url   = "http://localhost:8501"; ///< TF Serving REST API base URL
    int         timeout_ms = 5000;                    ///< HTTP request timeout
    bool        verify_ssl = true;                    ///< Verify TLS certificates
    std::string api_key;                              ///< Optional bearer token / API key
};

/**
 * TFServingBackend – calls a TensorFlow Serving instance over its REST API.
 *
 * Endpoint: POST <base_url>/v1/models/<model_name>[:predict]
 *   (optionally /versions/<version> when model_version is set)
 *
 * Requires THEMIS_HAS_TF_SERVING=1 (and THEMIS_HAS_CURL=1 transitively).
 * When either flag is absent the backend reports isAvailable() == false.
 *
 * The REST payload follows the TF Serving JSON API:
 *   { "inputs": { "<name>": [[...]] } }
 *
 * Thread-safety: each infer() creates an independent libcurl easy handle so
 * concurrent calls are safe.
 */
class TFServingBackend : public IMLServingBackend {
public:
    explicit TFServingBackend(const TFServingConfig& config = {});
    ~TFServingBackend() override;

    std::string backendName() const override;
    bool        isAvailable() const override;
    MLServingResponse infer(const MLServingRequest& req) override;

private:
    struct Impl;
    std::unique_ptr<Impl> impl_;
};

// ============================================================================
// Unified client
// ============================================================================

/** Which backend the MLServingClient should use. */
enum class MLBackendType {
    AUTO,         ///< Prefer ONNX Runtime; fall back to TF Serving
    ONNX_RUNTIME, ///< Force ONNX Runtime backend
    TF_SERVING    ///< Force TF Serving backend
};

/**
 * Configuration for MLServingClient.
 */
struct MLServingConfig {
    MLBackendType     backend      = MLBackendType::AUTO;
    ONNXBackendConfig onnx_config;
    TFServingConfig   tf_config;
};

/**
 * MLServingClient – high-level, thread-safe interface to ML inference.
 *
 * Usage example:
 * @code
 *   using namespace themisdb::analytics;
 *
 *   MLServingClient client(MLServingConfig{
 *       .backend    = MLBackendType::ONNX_RUNTIME,
 *       .onnx_config = { .model_directory = "/opt/models" }
 *   });
 *
 *   if (!client.isBackendAvailable(MLBackendType::ONNX_RUNTIME)) {
 *       // handle graceful degradation
 *   }
 *
 *   MLServingRequest req;
 *   req.model_name = "churn_classifier";
 *   req.inputs.push_back({ "input", {1, 4}, {0.5f, 1.2f, -0.3f, 0.9f} });
 *
 *   auto resp = client.infer(req);
 *   if (resp.ok()) {
 *       // resp.outputs[0].data contains probabilities
 *   }
 * @endcode
 *
 * DataPoint integration:
 * @code
 *   DataPoint dp;
 *   dp.set("feature_a", 1.5);
 *   dp.set("feature_b", -0.3);
 *
 *   auto resp = client.inferFromDataPoint("churn_classifier", dp);
 * @endcode
 */
class MLServingClient {
public:
    explicit MLServingClient(const MLServingConfig& config = {});
    ~MLServingClient();

    // ─── Backend introspection ───────────────────────────────────────────────

    /** Returns true if the specified backend type is compiled in and available. */
    bool isBackendAvailable(MLBackendType type) const;

    /** Returns the name of the active backend. */
    std::string activeBackendName() const;

    // ─── Inference ──────────────────────────────────────────────────────────

    /** Run inference using the active backend. */
    MLServingResponse infer(const MLServingRequest& req);

    /**
     * Convenience overload: converts the numeric fields of @p point into a
     * single flat float32 input tensor named "input" and calls infer().
     *
     * Fields are sorted alphabetically (consistent with AutoML feature
     * engineering conventions in this module).  Non-numeric fields are
     * silently ignored.
     *
     * @param model_name  Model to query.
     * @param point       DataPoint whose numeric fields form the input vector.
     * @param input_name  Name of the input tensor (default: "input").
     * @return MLServingResponse with status and output tensors.
     */
    MLServingResponse inferFromDataPoint(const std::string& model_name,
                                         const DataPoint&   point,
                                         const std::string& input_name = "input");

    // ─── Factory ─────────────────────────────────────────────────────────────

    /** Create an ONNX Runtime backend with the given config. */
    static std::unique_ptr<IMLServingBackend>
    makeONNXBackend(const ONNXBackendConfig& config = {});

    /** Create a TF Serving backend with the given config. */
    static std::unique_ptr<IMLServingBackend>
    makeTFServingBackend(const TFServingConfig& config = {});

private:
    struct Impl;
    std::unique_ptr<Impl> impl_;
};

// ============================================================================
// Helper utilities
// ============================================================================

/** Returns a human-readable string for the given status code. */
std::string mlServingStatusName(MLServingStatus status);

/** Returns a human-readable string for the given backend type. */
std::string mlBackendTypeName(MLBackendType type);

} // namespace analytics
} // namespace themisdb