-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathmodel_serving.h
More file actions
318 lines (277 loc) · 12.6 KB
/
model_serving.h
File metadata and controls
318 lines (277 loc) · 12.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
/*
╔═════════════════════════════════════════════════════════════════════╗
║ ThemisDB - Hybrid Database System ║
╠═════════════════════════════════════════════════════════════════════╣
File: model_serving.h ║
Version: 0.0.2 ║
Last Modified: 2026-03-09 03:52:33 ║
Author: unknown ║
╠═════════════════════════════════════════════════════════════════════╣
Quality Metrics: ║
• Maturity Level: 🟢 PRODUCTION-READY ║
• Quality Score: 100.0/100 ║
• Total Lines: 317 ║
• Open Issues: TODOs: 0, Stubs: 0 ║
╠═════════════════════════════════════════════════════════════════════╣
Revision History: ║
• 2a1fb0423 2026-03-03 Merge branch 'develop' into copilot/audit-src-module-docu... ║
• 5a7ca4018 2026-02-24 audit: remove unused headers, fix spelling, complete ROAD... ║
• 90cdb41ff 2026-02-24 feat(analytics): implement model serving and online infer... ║
╠═════════════════════════════════════════════════════════════════════╣
Status: ✅ Production Ready ║
╚═════════════════════════════════════════════════════════════════════╝
*/
/**
* ThemisDB Model Serving and Online Inference Pipeline
*
* A thread-safe registry for trained AutoML models that provides
* low-latency online inference, batch inference, health-metric
* tracking, and round-trip serialization / deserialization.
*
* Features:
* - Named + versioned model registry
* - Single-record online inference (predictOne)
* - Batch inference (predictBatch)
* - Class-probability output (predictProba, classification only)
* - Per-model health metrics (prediction count, latency percentiles)
* - Model serialization / deserialization (round-trip via AutoMLModel)
* - Configurable registry capacity and latency-tracking window
*
* Thread-safety:
* - registerModel / unregisterModel / loadModel are guarded by an
* exclusive lock; they are NOT suitable for high-frequency calls.
* - predict / predictBatch / predictProba use a shared lock so
* multiple goroutines may infer concurrently without blocking each
* other.
* - listModels / modelInfo / healthMetrics / isRegistered are
* read-only and also use the shared lock.
*
* Copyright (c) 2025 VCC-URN Project
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once
#include <algorithm>
#include <chrono>
#include <map>
#include <memory>
#include <optional>
#include <string>
#include <vector>
// Reuse DataPoint and AutoMLModel from the AutoML module.
#include "analytics/automl.h"
namespace themisdb {
namespace analytics {
// ============================================================================
// Forward declarations
// ============================================================================
class ModelServingEngine;
// ============================================================================
// Configuration
// ============================================================================
/**
* Configuration passed to ModelServingEngine at construction time.
*/
struct ModelServingConfig {
/// Maximum number of models that may be registered simultaneously.
size_t max_models = 100;
/// Maximum number of data-points accepted in a single predictBatch call.
size_t max_batch_size = 10'000;
/// Collect per-call latency observations (small overhead).
bool track_latency = true;
/// Sliding-window size used to compute p99 latency.
size_t latency_window = 1'000;
};
// ============================================================================
// ModelInfo
// ============================================================================
/**
* Metadata about a model that has been registered with the engine.
*/
struct ModelInfo {
std::string name; ///< Logical model name
std::string version; ///< Version string (e.g. "1.0", "2024-01")
AutoMLTask task = AutoMLTask::CLASSIFICATION;
ModelAlgorithm algorithm = ModelAlgorithm::DECISION_TREE;
EvalMetrics metrics; ///< CV metrics from training
int64_t registered_at_ms = 0; ///< Unix epoch (ms) when model was registered
bool is_active = true;
};
// ============================================================================
// ModelHealthMetrics
// ============================================================================
/**
* Runtime statistics accumulated for one registered model.
*/
struct ModelHealthMetrics {
std::string name;
std::string version;
uint64_t total_predictions = 0; ///< Cumulative single-record predictions
uint64_t total_batch_calls = 0; ///< Cumulative batch inference calls
uint64_t total_batch_records = 0; ///< Cumulative records processed in batches
double avg_latency_ms = 0.0; ///< Rolling average of per-call latency
double p99_latency_ms = 0.0; ///< p99 latency over the latency_window
double last_latency_ms = 0.0; ///< Latency of the most recent call
int64_t last_used_ms = 0; ///< Epoch-ms of last inference call (0 = never)
};
// ============================================================================
// ModelServingEngine
// ============================================================================
/**
* Central registry for trained AutoML models.
*
* Models are identified by (name, version) pairs. The engine supports
* online single-record inference with sub-millisecond overhead, batch
* inference for throughput-optimised workloads, class-probability
* output, and lightweight health-metric collection.
*
* @code
* using namespace themisdb::analytics;
*
* // --- Train a model (via AutoML) ---
* AutoML automl;
* auto model = automl.trainClassifier(training_data, {
* .target = "churn",
* .metric = AutoMLMetric::F1
* });
*
* // --- Register and serve ---
* ModelServingEngine engine;
* engine.registerModel("churn-predictor", "v1", std::move(model));
*
* // Online inference (single record)
* DataPoint dp;
* dp.set("age", 35.0);
* dp.set("tenure_months", 12.0);
* std::string label = engine.predict("churn-predictor", "v1", dp);
*
* // Batch inference
* auto labels = engine.predictBatch("churn-predictor", "v1", batch);
*
* // Health metrics
* auto h = engine.healthMetrics("churn-predictor", "v1");
* if (h) {
* std::cout << "avg_latency_ms=" << h->avg_latency_ms << "\n";
* }
* @endcode
*/
class ModelServingEngine {
public:
explicit ModelServingEngine(ModelServingConfig config = {});
~ModelServingEngine();
ModelServingEngine(const ModelServingEngine&) = delete;
ModelServingEngine& operator=(const ModelServingEngine&) = delete;
// ---- Registry management ----
/**
* Register a trained AutoML model under (name, version).
*
* @throws std::invalid_argument if name or version is empty.
* @throws std::runtime_error if the registry is full
* (exceeds ModelServingConfig::max_models).
* @throws std::runtime_error if a model with the same (name,version)
* is already registered.
*/
void registerModel(const std::string& name,
const std::string& version,
AutoMLModel model);
/**
* Unregister the model identified by (name, version).
*
* @return true if the model was found and removed; false otherwise.
*/
bool unregisterModel(const std::string& name,
const std::string& version);
// ---- Inference ----
/**
* Predict the label / value for a single DataPoint.
*
* @throws std::out_of_range if no model is registered under (name,version).
*/
std::string predict(const std::string& name,
const std::string& version,
const DataPoint& point) const;
/**
* Predict labels / values for a batch of DataPoints.
*
* Returns one string per input point in the same order.
*
* @throws std::out_of_range if no model is registered under (name,version).
* @throws std::invalid_argument if data.size() > ModelServingConfig::max_batch_size.
*/
std::vector<std::string> predictBatch(
const std::string& name,
const std::string& version,
const std::vector<DataPoint>& data) const;
/**
* Return class probabilities for a batch (classification models only).
*
* Outer vector: one entry per data-point.
* Inner map: class label → probability in [0,1].
*
* For regression models the inner map contains a single entry
* {"value" → predicted_double}.
*
* @throws std::out_of_range if no model is registered under (name,version).
* @throws std::invalid_argument if data.size() > ModelServingConfig::max_batch_size.
*/
std::vector<std::map<std::string, double>> predictProba(
const std::string& name,
const std::string& version,
const std::vector<DataPoint>& data) const;
// ---- Registry queries ----
/**
* Return metadata for all registered models (unordered).
*/
std::vector<ModelInfo> listModels() const;
/**
* Return metadata for a specific model, or nullopt if not registered.
*/
std::optional<ModelInfo> modelInfo(const std::string& name,
const std::string& version) const;
/**
* Return health metrics for a specific model, or nullopt if not registered.
*/
std::optional<ModelHealthMetrics> healthMetrics(const std::string& name,
const std::string& version) const;
/**
* Return true iff (name, version) is currently registered.
*/
bool isRegistered(const std::string& name,
const std::string& version) const;
// ---- Persistence ----
/**
* Serialise a registered model to a string (delegates to AutoMLModel::serialize).
*
* @throws std::out_of_range if not registered.
*/
std::string serializeModel(const std::string& name,
const std::string& version) const;
/**
* Deserialise and register a model previously serialised via serializeModel.
*
* Equivalent to constructing an AutoMLModel via AutoMLModel::deserialize
* and calling registerModel(name, version, std::move(m)).
*
* @throws std::invalid_argument if name or version is empty.
* @throws std::runtime_error if the registry is full or (name,version)
* is already registered.
*/
void loadModel(const std::string& name,
const std::string& version,
const std::string& serialized_data);
private:
struct Impl;
std::unique_ptr<Impl> impl_;
};
// ============================================================================
// Free helpers
// ============================================================================
/**
* Build the canonical registry key from (name, version).
* Exposed so external code can build keys consistently.
*/
inline std::string makeModelKey(const std::string& name,
const std::string& version) {
return name + ":" + version;
}
} // namespace analytics
} // namespace themisdb