Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion include/storage/transaction_retry_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,6 @@ class TransactionRetryManager {

} catch (const std::exception& e) {
attempt++;
stats_.total_retry_attempts.fetch_add(1);

// Classify error
ErrorType error_type = classifyError(e.what());
Expand Down Expand Up @@ -293,6 +292,9 @@ class TransactionRetryManager {
throw std::runtime_error("Max retry attempts exceeded for: " + operation_name);
}

// We will perform another attempt, so count this as a retry.
stats_.total_retry_attempts.fetch_add(1);

// Calculate delay
uint32_t delay_ms = calculateDelay(attempt, policy);

Expand Down
3 changes: 2 additions & 1 deletion src/config/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -140,8 +140,9 @@ Static utility that formats `ConfigPathResolver` metrics in Prometheus text-expo
| `themis_config_cache_capacity` | gauge | Maximum cache capacity (info) |
| `themis_config_cache_ttl_seconds` | gauge | Cache entry TTL in seconds (info) |
| `themis_config_legacy_fallbacks_by_category_total{category}` | counter | Legacy fallbacks broken down by config category |
| `themis_config_legacy_fallbacks_all_total` | counter | Aggregate legacy fallbacks across all categories |

`collect()` is a pure read (no state mutations, no locks beyond the cache mutex); it is suitable for repeated polling in a pull-model scrape. `updateMetricsCollector()` pushes the same values into the central `MetricsCollector` singleton as `_current` gauges for Grafana dashboard integration.
`collect()` performs a pure read unless a Prometheus registry is registered via `registerWithRegistry()`, in which case it also updates the registered counters using deltas before returning serialized text. `updateMetricsCollector()` pushes the same values into the central `MetricsCollector` singleton as `_current` gauges for Grafana dashboard integration.


### PathMappingMetadata
Expand Down
311 changes: 268 additions & 43 deletions src/config/config_metrics_exporter.cpp

Large diffs are not rendered by default.

33 changes: 24 additions & 9 deletions src/config/config_metrics_exporter.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,13 @@

#pragma once

#include <memory>
#include <string>

namespace prometheus {
class Registry;
}

namespace themis {
namespace config {

Expand All @@ -41,16 +46,16 @@ namespace config {
* Exported metric names:
* themis_config_resolution_hits_total - counter
* themis_config_resolution_misses_total - counter
* themis_config_legacy_fallbacks_total - counter
* themis_config_new_path_hits_total - counter
* themis_config_legacy_fallbacks_total{category} - counter (per-category breakdown)
* themis_config_legacy_fallbacks_all_total - counter (aggregate, no category label)
* themis_config_new_path_hits_total - counter (backward compatibility)
* themis_config_unmapped_requests_total - counter
* themis_config_cache_hits_total - counter
* themis_config_cache_misses_total - counter
* themis_config_cache_hits_total - counter (backward compatibility)
* themis_config_cache_misses_total - counter (backward compatibility)
* themis_config_cache_hit_ratio - gauge (derived)
* themis_config_cache_size - gauge
* themis_config_cache_size - gauge (info, backward compatibility)
* themis_config_cache_capacity - gauge (info)
* themis_config_cache_ttl_seconds - gauge (info)
Comment on lines 46 to 58
* themis_config_legacy_fallbacks_by_category_total{category} - counter (per-category breakdown)
*/
class ConfigMetricsExporter {
public:
Expand All @@ -60,9 +65,11 @@ class ConfigMetricsExporter {
* Collect all config-path-resolution metrics and return them in
* Prometheus text-exposition format (UTF-8, newline-terminated).
*
* This is a pure read: it reads the atomic counters from
* ConfigPathResolver::metrics() and the LRU cache stats; no state
* is modified. Suitable for use as a pull-model scrape target.
* This performs a pure read of ConfigPathResolver counters unless
* registerWithRegistry() has been called with a Prometheus registry,
* in which case collect() will also update the registered counters
* using deltas (stateful) before returning serialized text. Suitable
* for use as a pull-model scrape target.
*
* @return Prometheus text-format string ready to be served on /metrics.
*/
Expand All @@ -77,6 +84,14 @@ class ConfigMetricsExporter {
* the atomic counters maintained by ConfigPathResolver.
*/
static void updateMetricsCollector();

/**
* Register Prometheus metric families for config path resolution in the
* provided registry. Should be invoked during server startup so scrape
* handlers can serialize the registry without additional setup. No-op when
* Prometheus support (THEMIS_HAS_PROMETHEUS) is not available.
*/
static void registerWithRegistry(const std::shared_ptr<prometheus::Registry>& registry);
Comment on lines +88 to +94
};

} // namespace config
Expand Down
180 changes: 116 additions & 64 deletions src/config/config_path_resolver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,8 @@ LRUCacheWithTTL<std::string, std::string> ConfigPathResolver::cache_(
std::atomic<bool> ConfigPathResolver::caching_enabled_{true};
ConfigPathResolver::DeprecationAggregator ConfigPathResolver::aggregator_;
std::atomic<bool> ConfigPathResolver::aggregation_enabled_{false};
std::map<std::string, std::atomic<uint64_t>> ConfigPathResolver::legacy_fallbacks_by_category_;
std::once_flag ConfigPathResolver::category_init_flag_;
std::atomic<ConfigEnvironment> ConfigPathResolver::current_env_{
ConfigPathResolver::envFromEnvironmentVariable()};
volatile sig_atomic_t ConfigPathResolver::sighup_pending_ = 0;
Expand Down Expand Up @@ -1294,19 +1296,21 @@ const std::map<std::string, PathMappingMetadata> ConfigPathResolver::METADATA_TA
},
};

static const bool kLegacyCategoryCountersBootstrapped = []() {
(void)ConfigPathResolver::legacyFallbackCategories();
return true;
}();

// ═══════════════════════════════════════════════════════════
// Public API Implementation
// ═══════════════════════════════════════════════════════════

std::string ConfigPathResolver::resolve(const std::string& legacy_path) {
auto result = tryResolve(legacy_path);
if (result) {
metrics_.resolution_hits++;
return *result;
}

metrics_.resolution_misses++;

// Build list of attempted paths for error message
std::vector<std::string> attempted_paths;
std::string normalized = normalizePath(legacy_path);
Expand All @@ -1333,10 +1337,20 @@ std::string ConfigPathResolver::resolve(const std::string& legacy_path) {

std::optional<std::string> ConfigPathResolver::tryResolve(const std::string& legacy_path) {
std::string normalized = normalizePath(legacy_path);
try {
validatePath(normalized);
} catch (const InvalidPathException&) {
metrics_.resolution_misses++;
return std::nullopt;
}

ConfigEnvironment env = current_env_.load();

// Build env-prefixed cache key to prevent cross-environment cache poisoning
std::string cache_key = envToString(env) + ":" + normalized;
std::string resolved_path;
bool was_legacy_fallback = false;
bool from_cache = false;

if (sighup_pending_) {
sighup_pending_ = 0;
Expand All @@ -1348,90 +1362,90 @@ std::optional<std::string> ConfigPathResolver::tryResolve(const std::string& leg
auto cached = cache_.get(cache_key);
if (cached) {
metrics_.cache_hits++;
if (audit_log_.isEnabled()) {
bool is_legacy = isLegacyPath(normalized) && (*cached == normalized);
audit_log_.record({legacy_path, *cached,
std::chrono::system_clock::now(), is_legacy, true});
spdlog::trace("[CONFIG AUDIT] path='{}' resolved='{}' legacy={} cache_hit=true",
legacy_path, *cached, is_legacy);
}
return *cached;
resolved_path = *cached;
was_legacy_fallback = isLegacyPath(normalized) && (*cached == normalized);
from_cache = true;
} else {
metrics_.cache_misses++;
}
metrics_.cache_misses++;
}

try {
validatePath(normalized);
} catch (const InvalidPathException&) {
return std::nullopt;
}

std::string new_path = mapLegacyToNew(normalized);
std::string resolved_path;
bool was_legacy_fallback = false;

if (env != ConfigEnvironment::PROD && !new_path.empty() && new_path != normalized) {
std::string relative_part = new_path;
const std::string config_prefix = "config/";
if (relative_part.starts_with(config_prefix)) {
relative_part = relative_part.substr(config_prefix.size());
}
std::string overlay_path = "config/" + envToString(env) + "/" + relative_part;
if (std::filesystem::exists(overlay_path)) {
spdlog::debug("ConfigPathResolver: Using env overlay path [{}]: {} -> {}",
envToString(env), normalized, overlay_path);
resolved_path = overlay_path;
metrics_.new_path_hits++;
}
}

if (resolved_path.empty()) {
if (!new_path.empty() && std::filesystem::exists(new_path)) {
if (normalized != new_path) {
spdlog::debug("ConfigPathResolver: Using new config path: {} -> {}",
normalized, new_path);
if (env != ConfigEnvironment::PROD && !new_path.empty() && new_path != normalized) {
std::string relative_part = new_path;
const std::string config_prefix = "config/";
if (relative_part.starts_with(config_prefix)) {
relative_part = relative_part.substr(config_prefix.size());
}
std::string overlay_path = "config/" + envToString(env) + "/" + relative_part;
if (std::filesystem::exists(overlay_path)) {
spdlog::debug("ConfigPathResolver: Using env overlay path [{}]: {} -> {}",
envToString(env), normalized, overlay_path);
resolved_path = overlay_path;
metrics_.new_path_hits++;
}
resolved_path = new_path;
} else if (std::filesystem::exists(normalized)) {
if (!new_path.empty() && new_path != normalized) {
aggregator_.incrementUsage(normalized);
was_legacy_fallback = true;

if (!aggregation_enabled_.load()) {
auto metadata = getMetadata(normalized);
if (metadata && metadata->isDeprecated()) {
if (metadata->isRemovalDue()) {
spdlog::error("ConfigPathResolver: {}", metadata->getDeprecationMessage());
}

if (resolved_path.empty()) {
if (!new_path.empty() && std::filesystem::exists(new_path)) {
if (normalized != new_path) {
spdlog::debug("ConfigPathResolver: Using new config path: {} -> {}",
normalized, new_path);
metrics_.new_path_hits++;
}
resolved_path = new_path;
} else if (std::filesystem::exists(normalized)) {
if (!new_path.empty() && new_path != normalized) {
aggregator_.incrementUsage(normalized);
was_legacy_fallback = true;

if (!aggregation_enabled_.load()) {
auto metadata = getMetadata(normalized);
if (metadata && metadata->isDeprecated()) {
if (metadata->isRemovalDue()) {
spdlog::error("ConfigPathResolver: {}", metadata->getDeprecationMessage());
} else {
spdlog::warn("ConfigPathResolver: {}", metadata->getDeprecationMessage());
}
} else {
spdlog::warn("ConfigPathResolver: {}", metadata->getDeprecationMessage());
spdlog::warn("ConfigPathResolver: Using legacy config path: {}. Please migrate to: {}",
normalized, new_path);
}
} else {
spdlog::warn("ConfigPathResolver: Using legacy config path: {}. Please migrate to: {}",
normalized, new_path);
}
}

metrics_.legacy_fallbacks++;
checkFallbackRateThreshold();
metrics_.legacy_fallbacks++;
const std::string category_path = new_path.empty() ? normalized : new_path;
const std::string category = inferCategory(category_path);
auto it = legacy_fallbacks_by_category_.find(category);
if (it != legacy_fallbacks_by_category_.end()) {
it->second.fetch_add(1, std::memory_order_relaxed);
}
checkFallbackRateThreshold();
} else {
metrics_.unmapped_requests++;
}
resolved_path = normalized;
} else {
metrics_.unmapped_requests++;
metrics_.resolution_misses++;
return std::nullopt;
}
resolved_path = normalized;
} else {
return std::nullopt;
}
}

if (caching_enabled_.load()) {
if (caching_enabled_.load() && !from_cache) {
cache_.put(cache_key, resolved_path);
}

metrics_.resolution_hits++;

if (audit_log_.isEnabled()) {
audit_log_.record({legacy_path, resolved_path,
std::chrono::system_clock::now(), was_legacy_fallback, false});
spdlog::trace("[CONFIG AUDIT] path='{}' resolved='{}' legacy={} cache_hit=false",
legacy_path, resolved_path, was_legacy_fallback);
std::chrono::system_clock::now(), was_legacy_fallback, from_cache});
spdlog::trace("[CONFIG AUDIT] path='{}' resolved='{}' legacy={} cache_hit={}",
legacy_path, resolved_path, was_legacy_fallback, from_cache);
}

return resolved_path;
Expand Down Expand Up @@ -1604,6 +1618,21 @@ void ConfigPathResolver::validatePath(const std::string& path) {
}
}

void ConfigPathResolver::initLegacyFallbackCategoryCounters() {
std::call_once(category_init_flag_, []() {
legacy_fallbacks_by_category_.clear();
legacy_fallbacks_by_category_.emplace("unknown", 0);

// Pre-populate all known categories so the label cardinality is fixed
// up front. This allows lock-free increments during fallbacks and
// keeps scrape-time iteration deterministic without touching PATH_MAPPING.
for (const auto& entry : PATH_MAPPING) {
const std::string category = inferCategory(entry.second);
legacy_fallbacks_by_category_.try_emplace(category, 0);
}
});
}

void ConfigPathResolver::resetMetrics() {
metrics_.resolution_hits = 0;
metrics_.resolution_misses = 0;
Expand All @@ -1614,6 +1643,29 @@ void ConfigPathResolver::resetMetrics() {
metrics_.cache_misses = 0;
last_threshold_warn_count_ = 0;
aggregator_.reset();
for (auto& entry : legacy_fallbacks_by_category_) {
entry.second.store(0, std::memory_order_relaxed);
}
}

std::vector<std::pair<std::string, uint64_t>> ConfigPathResolver::legacyFallbacksByCategory() {
initLegacyFallbackCategoryCounters();
std::vector<std::pair<std::string, uint64_t>> snapshot;
snapshot.reserve(legacy_fallbacks_by_category_.size());
for (const auto& entry : legacy_fallbacks_by_category_) {
snapshot.emplace_back(entry.first, entry.second.load(std::memory_order_relaxed));
}
return snapshot;
}

std::vector<std::string> ConfigPathResolver::legacyFallbackCategories() {
initLegacyFallbackCategoryCounters();
std::vector<std::string> categories;
categories.reserve(legacy_fallbacks_by_category_.size());
for (const auto& entry : legacy_fallbacks_by_category_) {
categories.push_back(entry.first);
}
return categories;
}

void ConfigPathResolver::setCachingEnabled(bool enabled) {
Expand Down
23 changes: 23 additions & 0 deletions src/config/config_path_resolver.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include <filesystem>
#include <optional>
#include <atomic>
#include <mutex>
#include <chrono>
#include "config/config_audit_log.h"
#include <csignal>
Expand Down Expand Up @@ -151,6 +152,23 @@ class ConfigPathResolver {
* Get current metrics.
*/
static const Metrics& metrics() { return metrics_; }

/**
* Get legacy fallback counts broken down by config category.
*
* Categories are inferred from the canonical new path via inferCategory()
* when a legacy fallback occurs. Counts are stored in per-category atomic
* counters; the returned vector is a snapshot of (category, count) pairs.
* No external locking is required once initialization has completed.
*/
static std::vector<std::pair<std::string, uint64_t>> legacyFallbacksByCategory();

/**
* Returns the set of category labels used for legacy fallback counters.
* Categories are initialized once from PATH_MAPPING to keep the label
* cardinality stable for Prometheus exports.
*/
static std::vector<std::string> legacyFallbackCategories();

/**
* Reset metrics (primarily for testing).
Expand Down Expand Up @@ -367,6 +385,11 @@ class ConfigPathResolver {

// Active cache configuration (set once at startup from env vars or defaults)
static CacheConfig cache_config_;

// Per-category legacy fallback counters (initialized once, then atomically incremented)
static std::map<std::string, std::atomic<uint64_t>> legacy_fallbacks_by_category_;
static std::once_flag category_init_flag_;
static void initLegacyFallbackCategoryCounters();

// Helper to normalize path separators
static std::string normalizePath(const std::string& path);
Expand Down
Loading
Loading