Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion crates/datadog-agent-config/apm_replace_rule.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,13 @@ pub fn deserialize_apm_replace_rules<'de, D>(
where
D: Deserializer<'de>,
{
let json_string = deserializer.deserialize_any(StringOrReplaceRulesVisitor)?;
let json_string = match deserializer.deserialize_any(StringOrReplaceRulesVisitor) {
Ok(s) => s,
Err(e) => {
tracing::error!("Failed to deserialize APM replace rules: {e}, ignoring");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we're okay with an error rather than a debug level here? i thought we generally prefer debug level unless we're about to take the whole app down.

return Ok(None);
}
};

match parse_rules_from_string(&json_string) {
Ok(rules) => Ok(Some(rules)),
Expand Down
128 changes: 127 additions & 1 deletion crates/datadog-agent-config/env.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ use crate::{
deserialize_optional_bool_from_anything, deserialize_optional_duration_from_microseconds,
deserialize_optional_duration_from_seconds,
deserialize_optional_duration_from_seconds_ignore_zero, deserialize_optional_string,
deserialize_string_or_int, deserialize_trace_propagation_style,
deserialize_string_or_int, deserialize_trace_propagation_style, deserialize_with_default,
flush_strategy::FlushStrategy,
log_level::LogLevel,
logs_additional_endpoints::{LogsAdditionalEndpoint, deserialize_logs_additional_endpoints},
Expand Down Expand Up @@ -43,6 +43,7 @@ pub struct EnvConfig {
///
/// Minimum log level of the Datadog Agent.
/// Valid log levels are: trace, debug, info, warn, and error.
#[serde(deserialize_with = "deserialize_with_default")]
pub log_level: Option<LogLevel>,

/// @env `DD_FLUSH_TIMEOUT`
Expand Down Expand Up @@ -398,6 +399,7 @@ pub struct EnvConfig {
/// @env `DD_SERVERLESS_FLUSH_STRATEGY`
///
/// The flush strategy to use for AWS Lambda.
#[serde(deserialize_with = "deserialize_with_default")]
pub serverless_flush_strategy: Option<FlushStrategy>,
/// @env `DD_ENHANCED_METRICS`
///
Expand Down Expand Up @@ -716,6 +718,130 @@ mod tests {
processing_rule::{Kind, ProcessingRule},
};

/// Comprehensive test: every non-string env var set to an invalid value.
/// The load MUST succeed, and invalid fields must fall back to defaults.
///
/// Note: string-typed fields (DD_SITE, DD_API_KEY, etc.) can't fail from env
/// because all env values are strings. This test covers all non-string types.
///
/// When adding a new non-string field to EnvConfig, add an entry here with
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is there a programmatic way we can ensure that there's no fields that are missing from the test?

/// an invalid value to ensure graceful deserialization is in place.
#[test]
fn test_all_env_fields_wrong_type_fallback_to_default() {
figment::Jail::expect_with(|jail| {
jail.clear_env();

// Numeric fields → invalid string
jail.set_env("DD_FLUSH_TIMEOUT", "not_a_number");
jail.set_env("DD_COMPRESSION_LEVEL", "not_a_number");
jail.set_env("DD_LOGS_CONFIG_COMPRESSION_LEVEL", "not_a_number");
jail.set_env("DD_APM_CONFIG_COMPRESSION_LEVEL", "not_a_number");
jail.set_env("DD_METRICS_CONFIG_COMPRESSION_LEVEL", "not_a_number");
jail.set_env("DD_CAPTURE_LAMBDA_PAYLOAD_MAX_DEPTH", "not_a_number");
jail.set_env("DD_DOGSTATSD_SO_RCVBUF", "not_a_number");
jail.set_env("DD_DOGSTATSD_BUFFER_SIZE", "not_a_number");
jail.set_env("DD_DOGSTATSD_QUEUE_SIZE", "not_a_number");
jail.set_env(
"DD_OTLP_CONFIG_RECEIVER_PROTOCOLS_GRPC_MAX_RECV_MSG_SIZE_MIB",
"not_a_number",
);
jail.set_env("DD_OTLP_CONFIG_METRICS_DELTA_TTL", "not_a_number");
jail.set_env(
"DD_OTLP_CONFIG_TRACES_PROBABILISTIC_SAMPLER_SAMPLING_PERCENTAGE",
"not_a_number",
);

// Bool fields → invalid string
jail.set_env("DD_SKIP_SSL_VALIDATION", "not_a_bool");
jail.set_env("DD_LOGS_CONFIG_USE_COMPRESSION", "not_a_bool");
jail.set_env(
"DD_APM_CONFIG_OBFUSCATION_HTTP_REMOVE_QUERY_STRING",
"not_a_bool",
);
jail.set_env(
"DD_APM_CONFIG_OBFUSCATION_HTTP_REMOVE_PATHS_WITH_DIGITS",
"not_a_bool",
);
jail.set_env("DD_TRACE_PROPAGATION_EXTRACT_FIRST", "not_a_bool");
jail.set_env("DD_TRACE_PROPAGATION_HTTP_BAGGAGE_ENABLED", "not_a_bool");
jail.set_env("DD_TRACE_AWS_SERVICE_REPRESENTATION_ENABLED", "not_a_bool");
jail.set_env("DD_ENHANCED_METRICS", "not_a_bool");
jail.set_env("DD_LAMBDA_PROC_ENHANCED_METRICS", "not_a_bool");
jail.set_env("DD_CAPTURE_LAMBDA_PAYLOAD", "not_a_bool");
jail.set_env("DD_COMPUTE_TRACE_STATS_ON_EXTENSION", "not_a_bool");
jail.set_env("DD_SERVERLESS_APPSEC_ENABLED", "not_a_bool");
jail.set_env("DD_API_SECURITY_ENABLED", "not_a_bool");
jail.set_env("DD_OTLP_CONFIG_TRACES_ENABLED", "not_a_bool");
jail.set_env(
"DD_OTLP_CONFIG_TRACES_SPAN_NAME_AS_RESOURCE_NAME",
"not_a_bool",
);
jail.set_env("DD_OTLP_CONFIG_IGNORE_MISSING_DATADOG_FIELDS", "not_a_bool");
jail.set_env("DD_OTLP_CONFIG_METRICS_ENABLED", "not_a_bool");
jail.set_env(
"DD_OTLP_CONFIG_METRICS_RESOURCE_ATTRIBUTES_AS_TAGS",
"not_a_bool",
);
jail.set_env(
"DD_OTLP_CONFIG_METRICS_INSTRUMENTATION_SCOPE_METADATA_AS_TAGS",
"not_a_bool",
);
jail.set_env(
"DD_OTLP_CONFIG_METRICS_HISTOGRAMS_SEND_COUNT_SUM_METRICS",
"not_a_bool",
);
jail.set_env(
"DD_OTLP_CONFIG_METRICS_HISTOGRAMS_SEND_AGGREGATION_METRICS",
"not_a_bool",
);
jail.set_env("DD_OTLP_CONFIG_LOGS_ENABLED", "not_a_bool");
jail.set_env(
"DD_OBSERVABILITY_PIPELINES_WORKER_LOGS_ENABLED",
"not_a_bool",
);
jail.set_env("DD_SERVERLESS_LOGS_ENABLED", "not_a_bool");
jail.set_env("DD_LOGS_ENABLED", "not_a_bool");

// Enum fields → invalid string
jail.set_env("DD_LOG_LEVEL", "invalid_level_999");
jail.set_env("DD_SERVERLESS_FLUSH_STRATEGY", "[[[invalid");

// Duration fields → invalid string
jail.set_env("DD_SPAN_DEDUP_TIMEOUT", "not_a_number");
jail.set_env("DD_API_KEY_SECRET_RELOAD_INTERVAL", "not_a_number");
jail.set_env("DD_APPSEC_WAF_TIMEOUT", "not_a_number");
jail.set_env("DD_API_SECURITY_SAMPLE_DELAY", "not_a_number");

// JSON fields → invalid JSON
jail.set_env("DD_ADDITIONAL_ENDPOINTS", "not_json{{");
jail.set_env("DD_APM_ADDITIONAL_ENDPOINTS", "not_json{{");
jail.set_env("DD_LOGS_CONFIG_PROCESSING_RULES", "not_json{{");
jail.set_env("DD_LOGS_CONFIG_ADDITIONAL_ENDPOINTS", "not_json{{");
jail.set_env("DD_APM_REPLACE_TAGS", "not_json{{");

// Comma/space-separated fields → invalid (these are lenient but include for completeness)
jail.set_env("DD_SERVICE_MAPPING", "no-colon-here");
jail.set_env("DD_APM_FEATURES", ""); // empty
jail.set_env("DD_APM_FILTER_TAGS_REQUIRE", "");
jail.set_env("DD_APM_FILTER_TAGS_REJECT", "");
jail.set_env("DD_APM_FILTER_TAGS_REGEX_REQUIRE", "");
jail.set_env("DD_APM_FILTER_TAGS_REGEX_REJECT", "");
jail.set_env("DD_OTLP_CONFIG_TRACES_SPAN_NAME_REMAPPINGS", "no-colon");

let mut config = Config::default();
// This MUST succeed — no single field should crash the whole config
EnvConfigSource
.load(&mut config)
.expect("load must not fail when env vars have wrong types");

// No string env vars were set, so string fields stay at default.
// All non-string env vars were set to invalid values, so they also stay at default.
// The entire config should equal the default.
assert_eq!(config, Config::default());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this might be a bit of a more convincing test if we have some non-default values. perhaps we can put something into one of the string configs, to make sure that at least those get set correctly and don't end up being reset to the default.

Ok(())
});
}

#[test]
#[allow(clippy::too_many_lines)]
fn test_merge_config_overrides_with_environment_variables() {
Expand Down
123 changes: 92 additions & 31 deletions crates/datadog-agent-config/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ use serde_json::Value;
use std::path::Path;
use std::time::Duration;
use std::{collections::HashMap, fmt};
use tracing::{debug, error};
use tracing::{debug, error, warn};

use crate::{
apm_replace_rule::deserialize_apm_replace_rules,
Expand Down Expand Up @@ -689,7 +689,13 @@ pub fn deserialize_key_value_pair_array_to_hashmap<'de, D>(
where
D: Deserializer<'de>,
{
let array: Vec<String> = Vec::deserialize(deserializer)?;
let array: Vec<String> = match Vec::deserialize(deserializer) {
Ok(v) => v,
Err(e) => {
error!("Failed to deserialize tags array: {e}, ignoring");
return Ok(HashMap::new());
}
};
let mut map = HashMap::new();
for s in array {
if let Some((key, val)) = parse_key_value_tag(&s) {
Expand Down Expand Up @@ -760,40 +766,82 @@ where
}
}

/// Gracefully deserialize any field, falling back to `T::default()` on error.
///
/// This ensures that a single field with the wrong type never fails the entire
/// struct extraction. Works for any `T` that implements `Deserialize + Default`:
/// - `Option<T>` defaults to `None`
/// - `Vec<T>` defaults to `[]`
/// - `HashMap<K,V>` defaults to `{}`
/// - Structs with `#[derive(Default)]` use their default
pub fn deserialize_with_default<'de, D, T>(deserializer: D) -> Result<T, D::Error>
where
D: Deserializer<'de>,
T: Deserialize<'de> + Default,
{
match T::deserialize(deserializer) {
Ok(value) => Ok(value),
Err(e) => {
warn!("Failed to deserialize field: {}, using default", e);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this sometimes a warn and sometimes an error?

Ok(T::default())
}
}
}

pub fn deserialize_optional_duration_from_microseconds<'de, D: Deserializer<'de>>(
deserializer: D,
) -> Result<Option<Duration>, D::Error> {
Ok(Option::<u64>::deserialize(deserializer)?.map(Duration::from_micros))
match Option::<u64>::deserialize(deserializer) {
Ok(opt) => Ok(opt.map(Duration::from_micros)),
Err(e) => {
error!("Failed to deserialize duration (microseconds): {e}, ignoring");
Ok(None)
}
}
}

pub fn deserialize_optional_duration_from_seconds<'de, D: Deserializer<'de>>(
deserializer: D,
) -> Result<Option<Duration>, D::Error> {
struct DurationVisitor;
impl serde::de::Visitor<'_> for DurationVisitor {
type Value = Option<Duration>;
fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "a duration in seconds (integer or float)")
}
fn visit_u64<E: serde::de::Error>(self, v: u64) -> Result<Self::Value, E> {
Ok(Some(Duration::from_secs(v)))
}
fn visit_i64<E: serde::de::Error>(self, v: i64) -> Result<Self::Value, E> {
if v < 0 {
error!("Failed to parse duration: negative durations are not allowed, ignoring");
return Ok(None);
// Deserialize into a generic Value first to avoid propagating type errors,
// then try to extract a duration from it.
match Value::deserialize(deserializer) {
Ok(Value::Number(n)) => {
if let Some(u) = n.as_u64() {
Ok(Some(Duration::from_secs(u)))
} else if let Some(i) = n.as_i64() {
if i < 0 {
error!(
"Failed to parse duration: negative durations are not allowed, ignoring"
);
Ok(None)
} else {
Ok(Some(Duration::from_secs(i as u64)))
}
} else if let Some(f) = n.as_f64() {
if f < 0.0 {
error!(
"Failed to parse duration: negative durations are not allowed, ignoring"
);
Ok(None)
} else {
Ok(Some(Duration::from_secs_f64(f)))
}
} else {
error!("Failed to parse duration: unsupported number format, ignoring");
Ok(None)
}
self.visit_u64(u64::try_from(v).expect("positive i64 to u64 conversion never fails"))
}
fn visit_f64<E: serde::de::Error>(self, v: f64) -> Result<Self::Value, E> {
if v < 0f64 {
error!("Failed to parse duration: negative durations are not allowed, ignoring");
return Ok(None);
}
Ok(Some(Duration::from_secs_f64(v)))
Ok(Value::Null) => Ok(None),
Ok(other) => {
error!("Failed to parse duration: expected number, got {other}, ignoring");
Ok(None)
}
Err(e) => {
error!("Failed to deserialize duration: {e}, ignoring");
Ok(None)
}
}
deserializer.deserialize_any(DurationVisitor)
}

// Like deserialize_optional_duration_from_seconds(), but return None if the value is 0
Expand All @@ -814,7 +862,13 @@ where
D: Deserializer<'de>,
{
use std::str::FromStr;
let s: String = String::deserialize(deserializer)?;
let s: String = match String::deserialize(deserializer) {
Ok(s) => s,
Err(e) => {
error!("Failed to deserialize trace propagation style: {e}, ignoring");
return Ok(Vec::new());
}
};

Ok(s.split(',')
.filter_map(
Expand Down Expand Up @@ -1477,18 +1531,25 @@ pub mod tests {
serde_json::from_str::<Value>("{}").expect("failed to parse JSON"),
Value { duration: None }
);
serde_json::from_str::<Value>(r#"{"duration":-1}"#)
.expect_err("should have failed parsing");
// Negative and non-integer values gracefully fall back to None
assert_eq!(
serde_json::from_str::<Value>(r#"{"duration":-1}"#).expect("should not fail"),
Value { duration: None }
);
assert_eq!(
serde_json::from_str::<Value>(r#"{"duration":1000000}"#).expect("failed to parse JSON"),
Value {
duration: Some(Duration::from_secs(1))
}
);
serde_json::from_str::<Value>(r#"{"duration":-1.5}"#)
.expect_err("should have failed parsing");
serde_json::from_str::<Value>(r#"{"duration":1.5}"#)
.expect_err("should have failed parsing");
assert_eq!(
serde_json::from_str::<Value>(r#"{"duration":-1.5}"#).expect("should not fail"),
Value { duration: None }
);
assert_eq!(
serde_json::from_str::<Value>(r#"{"duration":1.5}"#).expect("should not fail"),
Value { duration: None }
);
}

#[test]
Expand Down
8 changes: 7 additions & 1 deletion crates/datadog-agent-config/service_mapping.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,13 @@ pub fn deserialize_service_mapping<'de, D>(
where
D: Deserializer<'de>,
{
let s: String = String::deserialize(deserializer)?;
let s: String = match String::deserialize(deserializer) {
Ok(s) => s,
Err(e) => {
tracing::error!("Failed to deserialize service mapping: {e}, ignoring");
return Ok(HashMap::new());
}
};

let map = s
.split(',')
Expand Down
Loading
Loading