diff --git a/Cargo.lock b/Cargo.lock index 19805168..c625dfba 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -77,6 +77,15 @@ dependencies = [ "syn 2.0.114", ] +[[package]] +name = "atomic" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a89cbf775b137e9b968e67227ef7f775587cde3fd31b0d8599dbd0f598a48340" +dependencies = [ + "bytemuck", +] + [[package]] name = "atomic-waker" version = "1.1.2" @@ -188,6 +197,12 @@ version = "3.19.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" +[[package]] +name = "bytemuck" +version = "1.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec" + [[package]] name = "byteorder" version = "1.5.0" @@ -380,6 +395,22 @@ dependencies = [ "parking_lot_core", ] +[[package]] +name = "datadog-agent-config" +version = "0.1.0" +dependencies = [ + "dogstatsd", + "figment", + "libdd-trace-obfuscation", + "libdd-trace-utils", + "log", + "serde", + "serde-aux", + "serde_json", + "tokio", + "tracing", +] + [[package]] name = "datadog-fips" version = "0.1.0" @@ -442,6 +473,7 @@ dependencies = [ "serde", "serde_json", "serial_test", + "temp-env", "tempfile", "tokio", "tracing", @@ -454,7 +486,7 @@ source = "git+https://github.com/DataDog/saluki/?rev=f863626dbfe3c59bb390985fa65 dependencies = [ "datadog-protos", "float-cmp", - "ordered-float", + "ordered-float 4.6.0", "smallvec", ] @@ -592,6 +624,22 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" +[[package]] +name = "figment" +version = "0.10.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8cb01cd46b0cf372153850f4c6c272d9cbea2da513e07538405148f95bd789f3" +dependencies = [ + "atomic", + "parking_lot", + "pear", + "serde", + "serde_yaml", + "tempfile", + "uncased", + "version_check", +] + [[package]] name = "find-msvc-tools" version = "0.1.9" @@ -1145,6 +1193,12 @@ dependencies = [ "hashbrown 0.16.1", ] +[[package]] +name = "inlinable_string" +version = "0.1.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8fae54786f62fb2918dcfae3d568594e50eb9b5c25bf04371af6fe7516452fb" + [[package]] name = "ipnet" version = "2.11.0" @@ -1506,6 +1560,15 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe" +[[package]] +name = "ordered-float" +version = "2.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c" +dependencies = [ + "num-traits", +] + [[package]] name = "ordered-float" version = "4.6.0" @@ -1553,6 +1616,29 @@ dependencies = [ "smallvec", ] +[[package]] +name = "pear" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bdeeaa00ce488657faba8ebf44ab9361f9365a97bd39ffb8a60663f57ff4b467" +dependencies = [ + "inlinable_string", + "pear_codegen", + "yansi", +] + +[[package]] +name = "pear_codegen" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bab5b985dc082b345f812b7df84e1bef27e7207b39e448439ba8bd69c93f147" +dependencies = [ + "proc-macro2", + "proc-macro2-diagnostics", + "quote", + "syn 2.0.114", +] + [[package]] name = "percent-encoding" version = "2.3.2" @@ -1668,6 +1754,19 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "proc-macro2-diagnostics" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.114", + "version_check", + "yansi", +] + [[package]] name = "proptest" version = "1.9.0" @@ -2240,6 +2339,27 @@ dependencies = [ "serde_derive", ] +[[package]] +name = "serde-aux" +version = "4.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "207f67b28fe90fb596503a9bf0bf1ea5e831e21307658e177c5dfcdfc3ab8a0a" +dependencies = [ + "serde", + "serde-value", + "serde_json", +] + +[[package]] +name = "serde-value" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3a1a3341211875ef120e117ea7fd5228530ae7e7036a779fdc9117be6b3282c" +dependencies = [ + "ordered-float 2.10.1", + "serde", +] + [[package]] name = "serde_bytes" version = "0.11.19" @@ -2305,6 +2425,19 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_yaml" +version = "0.9.34+deprecated" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" +dependencies = [ + "indexmap", + "itoa", + "ryu", + "serde", + "unsafe-libyaml", +] + [[package]] name = "serial_test" version = "2.0.0" @@ -2474,6 +2607,15 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "temp-env" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96374855068f47402c3121c6eed88d29cb1de8f3ab27090e273e420bdabcf050" +dependencies = [ + "parking_lot", +] + [[package]] name = "tempfile" version = "3.24.0" @@ -2803,6 +2945,15 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94" +[[package]] +name = "uncased" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1b88fcfe09e89d3866a5c11019378088af2d24c3fbd4f0543f96b479ec90697" +dependencies = [ + "version_check", +] + [[package]] name = "unicode-ident" version = "1.0.22" @@ -2821,6 +2972,12 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" +[[package]] +name = "unsafe-libyaml" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" + [[package]] name = "untrusted" version = "0.9.0" @@ -3200,6 +3357,12 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" +[[package]] +name = "yansi" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" + [[package]] name = "yoke" version = "0.8.1" diff --git a/Cargo.toml b/Cargo.toml index cb438b99..0ce470ad 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,7 +8,7 @@ members = [ ] [workspace.package] -edition = "2021" +edition = "2024" license = "Apache-2.0" homepage = "https://github.com/DataDog/serverless-components" repository = "https://github.com/DataDog/serverless-components" diff --git a/LICENSE-3rdparty.csv b/LICENSE-3rdparty.csv index 26f606f9..c94550e4 100644 --- a/LICENSE-3rdparty.csv +++ b/LICENSE-3rdparty.csv @@ -7,6 +7,7 @@ assert-json-diff,https://github.com/davidpdrsn/assert-json-diff,MIT,David Peders async-lock,https://github.com/smol-rs/async-lock,Apache-2.0 OR MIT,Stjepan Glavina async-object-pool,https://github.com/alexliesenfeld/async-object-pool,MIT,Alexander Liesenfeld async-trait,https://github.com/dtolnay/async-trait,MIT OR Apache-2.0,David Tolnay +atomic,https://github.com/Amanieu/atomic-rs,Apache-2.0 OR MIT,Amanieu d'Antras atomic-waker,https://github.com/smol-rs/atomic-waker,Apache-2.0 OR MIT,"Stjepan Glavina , Contributors to futures-rs" aws-lc-rs,https://github.com/aws/aws-lc-rs,ISC AND (Apache-2.0 OR ISC),AWS-LibCrypto aws-lc-sys,https://github.com/aws/aws-lc-rs,ISC AND (Apache-2.0 OR ISC) AND OpenSSL,AWS-LC @@ -16,6 +17,7 @@ bit-vec,https://github.com/contain-rs/bit-vec,Apache-2.0 OR MIT,Alexis Beingessn bitflags,https://github.com/bitflags/bitflags,MIT OR Apache-2.0,The Rust Project Developers block-buffer,https://github.com/RustCrypto/utils,MIT OR Apache-2.0,RustCrypto Developers bumpalo,https://github.com/fitzgen/bumpalo,MIT OR Apache-2.0,Nick Fitzgerald +bytemuck,https://github.com/Lokathor/bytemuck,Zlib OR Apache-2.0 OR MIT,Lokathor byteorder,https://github.com/BurntSushi/byteorder,Unlicense OR MIT,Andrew Gallant bytes,https://github.com/tokio-rs/bytes,MIT,"Carl Lerche , Sean McArthur " camino,https://github.com/camino-rs/camino,MIT OR Apache-2.0,"Without Boats , Ashley Williams , Steve Klabnik , Rain " @@ -45,6 +47,7 @@ errno,https://github.com/lambda-fairy/rust-errno,MIT OR Apache-2.0,"Chris Wong < event-listener,https://github.com/smol-rs/event-listener,Apache-2.0 OR MIT,"Stjepan Glavina , John Nunley " event-listener-strategy,https://github.com/smol-rs/event-listener-strategy,Apache-2.0 OR MIT,John Nunley fastrand,https://github.com/smol-rs/fastrand,Apache-2.0 OR MIT,Stjepan Glavina +figment,https://github.com/SergioBenitez/Figment,MIT OR Apache-2.0,Sergio Benitez find-msvc-tools,https://github.com/rust-lang/cc-rs,MIT OR Apache-2.0,The find-msvc-tools Authors fixedbitset,https://github.com/petgraph/fixedbitset,MIT OR Apache-2.0,bluss flate2,https://github.com/rust-lang/flate2-rs,MIT OR Apache-2.0,"Alex Crichton , Josh Triplett " @@ -91,6 +94,7 @@ icu_provider,https://github.com/unicode-org/icu4x,Unicode-3.0,The ICU4X Project idna,https://github.com/servo/rust-url,MIT OR Apache-2.0,The rust-url developers idna_adapter,https://github.com/hsivonen/idna_adapter,Apache-2.0 OR MIT,The rust-url developers indexmap,https://github.com/indexmap-rs/indexmap,Apache-2.0 OR MIT,The indexmap Authors +inlinable_string,https://github.com/fitzgen/inlinable_string,Apache-2.0 OR MIT,Nick Fitzgerald ipnet,https://github.com/krisprice/ipnet,MIT OR Apache-2.0,Kris Price iri-string,https://github.com/lo48576/iri-string,MIT OR Apache-2.0,YOSHIOKA Takuma itertools,https://github.com/rust-itertools/itertools,MIT OR Apache-2.0,bluss @@ -126,6 +130,8 @@ parking,https://github.com/smol-rs/parking,Apache-2.0 OR MIT,"Stjepan Glavina parking_lot_core,https://github.com/Amanieu/parking_lot,MIT OR Apache-2.0,Amanieu d'Antras path-tree,https://github.com/viz-rs/path-tree,MIT OR Apache-2.0,Fangdun Tsai +pear,https://github.com/SergioBenitez/Pear,MIT OR Apache-2.0,Sergio Benitez +pear_codegen,https://github.com/SergioBenitez/Pear,MIT OR Apache-2.0,Sergio Benitez percent-encoding,https://github.com/servo/rust-url,MIT OR Apache-2.0,The rust-url developers petgraph,https://github.com/petgraph/petgraph,MIT OR Apache-2.0,"bluss, mitchmindtree" pin-project,https://github.com/taiki-e/pin-project,Apache-2.0 OR MIT,The pin-project Authors @@ -138,6 +144,7 @@ prettyplease,https://github.com/dtolnay/prettyplease,MIT OR Apache-2.0,David Tol proc-macro-error,https://gitlab.com/CreepySkeleton/proc-macro-error,MIT OR Apache-2.0,CreepySkeleton proc-macro-error-attr,https://gitlab.com/CreepySkeleton/proc-macro-error,MIT OR Apache-2.0,CreepySkeleton proc-macro2,https://github.com/dtolnay/proc-macro2,MIT OR Apache-2.0,"David Tolnay , Alex Crichton " +proc-macro2-diagnostics,https://github.com/SergioBenitez/proc-macro2-diagnostics,MIT OR Apache-2.0,Sergio Benitez prost,https://github.com/tokio-rs/prost,Apache-2.0,"Dan Burkert , Lucio Franco , Casper Meijn , Tokio Contributors " prost-build,https://github.com/tokio-rs/prost,Apache-2.0,"Dan Burkert , Lucio Franco , Casper Meijn , Tokio Contributors " prost-derive,https://github.com/tokio-rs/prost,Apache-2.0,"Dan Burkert , Lucio Franco , Casper Meijn , Tokio Contributors " @@ -178,12 +185,15 @@ security-framework,https://github.com/kornelski/rust-security-framework,MIT OR A security-framework-sys,https://github.com/kornelski/rust-security-framework,MIT OR Apache-2.0,"Steven Fackler , Kornel " semver,https://github.com/dtolnay/semver,MIT OR Apache-2.0,David Tolnay serde,https://github.com/serde-rs/serde,MIT OR Apache-2.0,"Erick Tryzelaar , David Tolnay " +serde-aux,https://github.com/iddm/serde-aux,MIT,Victor Polevoy +serde-value,https://github.com/arcnmx/serde-value,MIT,arcnmx serde_bytes,https://github.com/serde-rs/bytes,MIT OR Apache-2.0,David Tolnay serde_core,https://github.com/serde-rs/serde,MIT OR Apache-2.0,"Erick Tryzelaar , David Tolnay " serde_derive,https://github.com/serde-rs/serde,MIT OR Apache-2.0,"Erick Tryzelaar , David Tolnay " serde_json,https://github.com/serde-rs/json,MIT OR Apache-2.0,"Erick Tryzelaar , David Tolnay " serde_regex,https://github.com/tailhook/serde-regex,MIT OR Apache-2.0,paul@colomiets.name serde_urlencoded,https://github.com/nox/serde_urlencoded,MIT OR Apache-2.0,Anthony Ramine +serde_yaml,https://github.com/dtolnay/serde-yaml,MIT OR Apache-2.0,David Tolnay serial_test_derive,https://github.com/palfrey/serial_test,MIT,Tom Parker-Shemilt sha1,https://github.com/RustCrypto/hashes,MIT OR Apache-2.0,RustCrypto Developers sharded-slab,https://github.com/hawkw/sharded-slab,MIT,Eliza Weisman @@ -228,9 +238,11 @@ tracing-test-macro,https://github.com/dbrgn/tracing-test,MIT,Danilo Bargen typenum,https://github.com/paholg/typenum,MIT OR Apache-2.0,"Paho Lurie-Gregg , Andre Bogus " unarray,https://github.com/cameron1024/unarray,MIT OR Apache-2.0,The unarray Authors +uncased,https://github.com/SergioBenitez/uncased,MIT OR Apache-2.0,Sergio Benitez unicode-ident,https://github.com/dtolnay/unicode-ident,(MIT OR Apache-2.0) AND Unicode-3.0,David Tolnay unicode-width,https://github.com/unicode-rs/unicode-width,MIT OR Apache-2.0,"kwantam , Manish Goregaokar " unicode-xid,https://github.com/unicode-rs/unicode-xid,MIT OR Apache-2.0,"erick.tryzelaar , kwantam , Manish Goregaokar " +unsafe-libyaml,https://github.com/dtolnay/unsafe-libyaml,MIT,David Tolnay untrusted,https://github.com/briansmith/untrusted,ISC,Brian Smith url,https://github.com/servo/rust-url,MIT OR Apache-2.0,The rust-url developers urlencoding,https://github.com/kornelski/rust_urlencoding,MIT,"Kornel , Bertram Truong " @@ -273,6 +285,7 @@ windows_x86_64_msvc,https://github.com/microsoft/windows-rs,MIT OR Apache-2.0,Mi windows_x86_64_msvc,https://github.com/microsoft/windows-rs,MIT OR Apache-2.0,The windows_x86_64_msvc Authors wit-bindgen,https://github.com/bytecodealliance/wit-bindgen,Apache-2.0 WITH LLVM-exception OR Apache-2.0 OR MIT,Alex Crichton writeable,https://github.com/unicode-org/icu4x,Unicode-3.0,The ICU4X Project Developers +yansi,https://github.com/SergioBenitez/yansi,MIT OR Apache-2.0,Sergio Benitez yoke,https://github.com/unicode-org/icu4x,Unicode-3.0,Manish Goregaokar yoke-derive,https://github.com/unicode-org/icu4x,Unicode-3.0,Manish Goregaokar zerocopy,https://github.com/google/zerocopy,BSD-2-Clause OR Apache-2.0 OR MIT,"Joshua Liebow-Feeser , Jack Wrenn " diff --git a/crates/datadog-agent-config/Cargo.toml b/crates/datadog-agent-config/Cargo.toml new file mode 100644 index 00000000..50678993 --- /dev/null +++ b/crates/datadog-agent-config/Cargo.toml @@ -0,0 +1,26 @@ +[package] +name = "datadog-agent-config" +version = "0.1.0" +edition.workspace = true +license.workspace = true + +[lib] +path = "mod.rs" + +[dependencies] +figment = { version = "0.10", default-features = false, features = ["yaml", "env"] } +libdd-trace-obfuscation = { git = "https://github.com/DataDog/libdatadog", rev = "d52ee90209cb12a28bdda0114535c1a985a29d95" } +libdd-trace-utils = { git = "https://github.com/DataDog/libdatadog", rev = "d52ee90209cb12a28bdda0114535c1a985a29d95" } +log = { version = "0.4", default-features = false } +serde = { version = "1.0", default-features = false, features = ["derive"] } +serde-aux = { version = "4.7", default-features = false } +serde_json = { version = "1.0", default-features = false, features = ["alloc"] } +tracing = { version = "0.1", default-features = false } +dogstatsd = { path = "../dogstatsd" } +tokio = { version = "1.47", default-features = false, features = ["time"] } + +[dev-dependencies] +figment = { version = "0.10", default-features = false, features = ["yaml", "env", "test"] } + +[lints.rust] +unexpected_cfgs = { level = "warn", check-cfg = ['cfg(coverage,coverage_nightly)'] } diff --git a/crates/datadog-agent-config/additional_endpoints.rs b/crates/datadog-agent-config/additional_endpoints.rs new file mode 100644 index 00000000..16611833 --- /dev/null +++ b/crates/datadog-agent-config/additional_endpoints.rs @@ -0,0 +1,129 @@ +use serde::{Deserialize, Deserializer}; +use serde_json::Value; +use std::collections::HashMap; +use tracing::error; + +#[allow(clippy::module_name_repetitions)] +pub fn deserialize_additional_endpoints<'de, D>( + deserializer: D, +) -> Result>, D::Error> +where + D: Deserializer<'de>, +{ + let value = Value::deserialize(deserializer)?; + + match value { + Value::Object(map) => { + // For YAML format (object) in datadog.yaml + let mut result = HashMap::new(); + for (key, value) in map { + match value { + Value::Array(arr) => { + let urls: Vec = arr + .into_iter() + .filter_map(|v| v.as_str().map(String::from)) + .collect(); + result.insert(key, urls); + } + _ => { + error!( + "Failed to deserialize additional endpoints - Invalid YAML format: expected array for key {}", + key + ); + } + } + } + Ok(result) + } + Value::String(s) if !s.is_empty() => { + // For JSON format (string) in DD_ADDITIONAL_ENDPOINTS + if let Ok(map) = serde_json::from_str(&s) { + Ok(map) + } else { + error!("Failed to deserialize additional endpoints - Invalid JSON format"); + Ok(HashMap::new()) + } + } + _ => Ok(HashMap::new()), + } +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + #[test] + fn test_deserialize_additional_endpoints_yaml() { + // Test YAML format (object) + let input = json!({ + "https://app.datadoghq.com": ["key1", "key2"], + "https://app.datadoghq.eu": ["key3"] + }); + + let result = deserialize_additional_endpoints(input) + .expect("Failed to deserialize additional endpoints"); + + let mut expected = HashMap::new(); + expected.insert( + "https://app.datadoghq.com".to_string(), + vec!["key1".to_string(), "key2".to_string()], + ); + expected.insert( + "https://app.datadoghq.eu".to_string(), + vec!["key3".to_string()], + ); + + assert_eq!(result, expected); + } + + #[test] + fn test_deserialize_additional_endpoints_json() { + // Test JSON string format + let input = json!( + "{\"https://app.datadoghq.com\":[\"key1\",\"key2\"],\"https://app.datadoghq.eu\":[\"key3\"]}" + ); + + let result = deserialize_additional_endpoints(input) + .expect("Failed to deserialize additional endpoints"); + + let mut expected = HashMap::new(); + expected.insert( + "https://app.datadoghq.com".to_string(), + vec!["key1".to_string(), "key2".to_string()], + ); + expected.insert( + "https://app.datadoghq.eu".to_string(), + vec!["key3".to_string()], + ); + + assert_eq!(result, expected); + } + + #[test] + fn test_deserialize_additional_endpoints_invalid_or_empty() { + // Test empty YAML + let input = json!({}); + let result = deserialize_additional_endpoints(input) + .expect("Failed to deserialize additional endpoints"); + assert!(result.is_empty()); + + // Test empty JSON + let input = json!(""); + let result = deserialize_additional_endpoints(input) + .expect("Failed to deserialize additional endpoints"); + assert!(result.is_empty()); + + let input = json!({ + "https://app.datadoghq.com": "invalid-yaml" + }); + let result = deserialize_additional_endpoints(input) + .expect("Failed to deserialize additional endpoints"); + assert!(result.is_empty()); + + let input = json!("invalid-json"); + let result = deserialize_additional_endpoints(input) + .expect("Failed to deserialize additional endpoints"); + assert!(result.is_empty()); + } +} diff --git a/crates/datadog-agent-config/apm_replace_rule.rs b/crates/datadog-agent-config/apm_replace_rule.rs new file mode 100644 index 00000000..41b13594 --- /dev/null +++ b/crates/datadog-agent-config/apm_replace_rule.rs @@ -0,0 +1,71 @@ +use libdd_trace_obfuscation::replacer::{ReplaceRule, parse_rules_from_string}; +use serde::de::{Deserializer, SeqAccess, Visitor}; +use serde::{Deserialize, Serialize}; +use serde_json; +use std::fmt; + +#[derive(Deserialize, Serialize)] +struct ReplaceRuleYaml { + name: String, + pattern: String, + repl: String, +} + +struct StringOrReplaceRulesVisitor; + +impl<'de> Visitor<'de> for StringOrReplaceRulesVisitor { + type Value = String; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("a JSON string or YAML sequence of replace rules") + } + + // Handle existing JSON strings + fn visit_str(self, value: &str) -> Result + where + E: serde::de::Error, + { + match serde_json::from_str::(value) { + Ok(_) => Ok(value.to_string()), + Err(e) => { + tracing::error!("Invalid JSON string for APM replace rules: {}", e); + Ok(String::new()) + } + } + } + + // Convert YAML sequences to JSON strings + fn visit_seq(self, mut seq: A) -> Result + where + A: SeqAccess<'de>, + { + let mut rules = Vec::new(); + while let Some(rule) = seq.next_element::()? { + rules.push(rule); + } + match serde_json::to_string(&rules) { + Ok(json) => Ok(json), + Err(e) => { + tracing::error!("Failed to convert YAML rules to JSON: {}", e); + Ok(String::new()) + } + } + } +} + +pub fn deserialize_apm_replace_rules<'de, D>( + deserializer: D, +) -> Result>, D::Error> +where + D: Deserializer<'de>, +{ + let json_string = deserializer.deserialize_any(StringOrReplaceRulesVisitor)?; + + match parse_rules_from_string(&json_string) { + Ok(rules) => Ok(Some(rules)), + Err(e) => { + tracing::error!("Failed to parse APM replace rule, ignoring: {}", e); + Ok(None) + } + } +} diff --git a/crates/datadog-agent-config/env.rs b/crates/datadog-agent-config/env.rs new file mode 100644 index 00000000..f9d66b69 --- /dev/null +++ b/crates/datadog-agent-config/env.rs @@ -0,0 +1,1247 @@ +use figment::{Figment, providers::Env}; +use serde::Deserialize; +use std::collections::HashMap; +use std::time::Duration; + +use dogstatsd::util::parse_metric_namespace; +use libdd_trace_obfuscation::replacer::ReplaceRule; + +use crate::{ + Config, ConfigError, ConfigSource, + additional_endpoints::deserialize_additional_endpoints, + apm_replace_rule::deserialize_apm_replace_rules, + deserialize_apm_filter_tags, deserialize_array_from_comma_separated_string, + deserialize_key_value_pairs, deserialize_option_lossless, + deserialize_optional_bool_from_anything, deserialize_optional_duration_from_microseconds, + deserialize_optional_duration_from_seconds, + deserialize_optional_duration_from_seconds_ignore_zero, deserialize_optional_string, + deserialize_string_or_int, + flush_strategy::FlushStrategy, + log_level::LogLevel, + logs_additional_endpoints::{LogsAdditionalEndpoint, deserialize_logs_additional_endpoints}, + merge_hashmap, merge_option, merge_option_to_value, merge_string, merge_vec, + processing_rule::{ProcessingRule, deserialize_processing_rules}, + service_mapping::deserialize_service_mapping, + trace_propagation_style::{TracePropagationStyle, deserialize_trace_propagation_style}, +}; + +#[derive(Debug, PartialEq, Deserialize, Clone, Default)] +#[serde(default)] +#[allow(clippy::struct_excessive_bools)] +#[allow(clippy::module_name_repetitions)] +pub struct EnvConfig { + /// @env `DD_SITE` + /// + /// The Datadog site to send telemetry to + #[serde(deserialize_with = "deserialize_optional_string")] + pub site: Option, + /// @env `DD_API_KEY` + /// + /// The Datadog API key used to submit telemetry to Datadog + #[serde(deserialize_with = "deserialize_optional_string")] + pub api_key: Option, + /// @env `DD_LOG_LEVEL` + /// + /// Minimum log level of the Datadog Agent. + /// Valid log levels are: trace, debug, info, warn, and error. + pub log_level: Option, + + /// @env `DD_FLUSH_TIMEOUT` + /// + /// Flush timeout in seconds + /// todo(duncanista): find out where this comes from + /// todo(?): go agent adds jitter too + #[serde(deserialize_with = "deserialize_option_lossless")] + pub flush_timeout: Option, + + // Proxy + /// @env `DD_PROXY_HTTPS` + /// + /// Proxy endpoint for HTTPS connections (most Datadog traffic) + #[serde(deserialize_with = "deserialize_optional_string")] + pub proxy_https: Option, + /// @env `DD_PROXY_NO_PROXY` + /// + /// Specify hosts the Agent should connect to directly, bypassing the proxy. + #[serde(deserialize_with = "deserialize_array_from_comma_separated_string")] + pub proxy_no_proxy: Vec, + /// @env `DD_HTTP_PROTOCOL` + /// + /// The HTTP protocol to use for the Datadog Agent. + /// The transport type to use for sending logs. Possible values are "auto" or "http1". + #[serde(deserialize_with = "deserialize_optional_string")] + pub http_protocol: Option, + /// @env `DD_TLS_CERT_FILE` + /// The path to a file of concatenated CA certificates in PEM format. + /// Example: `/opt/ca-cert.pem` + #[serde(deserialize_with = "deserialize_optional_string")] + pub tls_cert_file: Option, + /// @env `DD_SKIP_SSL_VALIDATION` + /// + /// If set to true, the Agent will skip TLS certificate validation for outgoing connections. + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub skip_ssl_validation: Option, + + // Metrics + /// @env `DD_DD_URL` + /// + /// @default `https://app.datadoghq.com` + /// + /// The host of the Datadog intake server to send **metrics** to, only set this option + /// if you need the Agent to send **metrics** to a custom URL, it overrides the site + /// setting defined in "site". It does not affect APM, Logs, Remote Configuration, + /// or Live Process intake which have their own "*_`dd_url`" settings. + /// + /// If `DD_DD_URL` and `DD_URL` are both set, `DD_DD_URL` is used in priority. + #[serde(deserialize_with = "deserialize_optional_string")] + pub dd_url: Option, + /// @env `DD_URL` + /// + /// @default `https://app.datadoghq.com` + #[serde(deserialize_with = "deserialize_optional_string")] + pub url: Option, + /// @env `DD_ADDITIONAL_ENDPOINTS` + /// + /// Additional endpoints to send metrics to. + /// + #[serde(deserialize_with = "deserialize_additional_endpoints")] + pub additional_endpoints: HashMap>, + + // Unified Service Tagging + /// @env `DD_ENV` + /// + /// The environment name where the agent is running. Attached in-app to every + /// metric, event, log, trace, and service check emitted by this Agent. + #[serde(deserialize_with = "deserialize_string_or_int")] + pub env: Option, + /// @env `DD_SERVICE` + #[serde(deserialize_with = "deserialize_string_or_int")] + pub service: Option, + /// @env `DD_VERSION` + #[serde(deserialize_with = "deserialize_string_or_int")] + pub version: Option, + /// @env `DD_TAGS` + #[serde(deserialize_with = "deserialize_key_value_pairs")] + pub tags: HashMap, + /// @env `DD_COMPRESSION_LEVEL` + /// + /// Global level `compression_level` parameter accepts values from 0 (no compression) + /// to 9 (maximum compression but higher resource usage). This value is effective only if + /// the individual component doesn't specify its own. + #[serde(deserialize_with = "deserialize_option_lossless")] + pub compression_level: Option, + + // Logs + /// @env `DD_LOGS_CONFIG_LOGS_DD_URL` + /// + /// Define the endpoint and port to hit when using a proxy for logs. + #[serde(deserialize_with = "deserialize_optional_string")] + pub logs_config_logs_dd_url: Option, + /// @env `DD_LOGS_CONFIG_PROCESSING_RULES` + /// + /// Global processing rules that are applied to all logs. The available rules are + /// "`exclude_at_match`", "`include_at_match`" and "`mask_sequences`". More information in Datadog documentation: + /// + #[serde(deserialize_with = "deserialize_processing_rules")] + pub logs_config_processing_rules: Option>, + /// @env `DD_LOGS_CONFIG_USE_COMPRESSION` + /// + /// If enabled, the Agent compresses logs before sending them. + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub logs_config_use_compression: Option, + /// @env `DD_LOGS_CONFIG_COMPRESSION_LEVEL` + /// + /// The `compression_level` parameter accepts values from 0 (no compression) + /// to 9 (maximum compression but higher resource usage). Only takes effect if + /// `use_compression` is set to `true`. + #[serde(deserialize_with = "deserialize_option_lossless")] + pub logs_config_compression_level: Option, + /// @env `DD_LOGS_CONFIG_ADDITIONAL_ENDPOINTS` + /// + /// Additional endpoints to send logs to. + /// + #[serde(deserialize_with = "deserialize_logs_additional_endpoints")] + pub logs_config_additional_endpoints: Vec, + + /// @env `DD_OBSERVABILITY_PIPELINES_WORKER_LOGS_ENABLED` + /// When true, emit plain json suitable for Observability Pipelines + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub observability_pipelines_worker_logs_enabled: Option, + /// @env `DD_OBSERVABILITY_PIPELINES_WORKER_LOGS_URL` + /// + /// The URL endpoint for sending logs to Observability Pipelines Worker + #[serde(deserialize_with = "deserialize_optional_string")] + pub observability_pipelines_worker_logs_url: Option, + + // APM + // + /// @env `DD_SERVICE_MAPPING` + #[serde(deserialize_with = "deserialize_service_mapping")] + pub service_mapping: HashMap, + // + /// @env `DD_APM_DD_URL` + /// + /// Define the endpoint and port to hit when using a proxy for APM. + #[serde(deserialize_with = "deserialize_optional_string")] + pub apm_dd_url: Option, + /// @env `DD_APM_REPLACE_TAGS` + /// + /// Defines a set of rules to replace or remove certain resources, tags containing + /// potentially sensitive information. + /// Each rule has to contain: + /// * name - string - The tag name to replace, for resources use "resource.name". + /// * pattern - string - The pattern to match the desired content to replace + /// * repl - string - what to inline if the pattern is matched + /// + /// + #[serde(deserialize_with = "deserialize_apm_replace_rules")] + pub apm_replace_tags: Option>, + /// @env `DD_APM_CONFIG_OBFUSCATION_HTTP_REMOVE_QUERY_STRING` + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub apm_config_obfuscation_http_remove_query_string: Option, + /// @env `DD_APM_CONFIG_OBFUSCATION_HTTP_REMOVE_PATHS_WITH_DIGITS` + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub apm_config_obfuscation_http_remove_paths_with_digits: Option, + /// @env `DD_APM_CONFIG_COMPRESSION_LEVEL` + /// + /// The Agent compresses traces before sending them. The `compression_level` parameter + /// accepts values from 0 (no compression) to 9 (maximum compression but + /// higher resource usage). + #[serde(deserialize_with = "deserialize_option_lossless")] + pub apm_config_compression_level: Option, + /// @env `DD_APM_FEATURES` + #[serde(deserialize_with = "deserialize_array_from_comma_separated_string")] + pub apm_features: Vec, + /// @env `DD_APM_ADDITIONAL_ENDPOINTS` + /// + /// Additional endpoints to send traces to. + /// + #[serde(deserialize_with = "deserialize_additional_endpoints")] + pub apm_additional_endpoints: HashMap>, + /// @env `DD_APM_FILTER_TAGS_REQUIRE` + /// + /// Space-separated list of key:value tag pairs that spans must match to be kept. + /// Only spans matching at least one of these tags will be sent to Datadog. + /// Example: "env:production service:api-gateway" + #[serde(deserialize_with = "deserialize_apm_filter_tags")] + pub apm_filter_tags_require: Option>, + /// @env `DD_APM_FILTER_TAGS_REJECT` + /// + /// Space-separated list of key:value tag pairs that will cause spans to be filtered out. + /// Spans matching any of these tags will be dropped. + /// Example: "env:development debug:true name:health.check" + #[serde(deserialize_with = "deserialize_apm_filter_tags")] + pub apm_filter_tags_reject: Option>, + /// @env `DD_APM_FILTER_TAGS_REGEX_REQUIRE` + /// + /// Space-separated list of key:value tag pairs with regex values that spans must match to be kept. + /// Only spans matching at least one of these regex patterns will be sent to Datadog. + /// Example: "env:^prod.*$ service:^api-.*$" + #[serde(deserialize_with = "deserialize_apm_filter_tags")] + pub apm_filter_tags_regex_require: Option>, + /// @env `DD_APM_FILTER_TAGS_REGEX_REJECT` + /// + /// Space-separated list of key:value tag pairs with regex values that will cause spans to be filtered out. + /// Spans matching any of these regex patterns will be dropped. + /// Example: "env:^test.*$ debug:^true$" + #[serde(deserialize_with = "deserialize_apm_filter_tags")] + pub apm_filter_tags_regex_reject: Option>, + /// @env `DD_TRACE_AWS_SERVICE_REPRESENTATION_ENABLED` + /// + /// Enable the new AWS-resource naming logic in the tracer. + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub trace_aws_service_representation_enabled: Option, + // + // Trace Propagation + /// @env `DD_TRACE_PROPAGATION_STYLE` + #[serde(deserialize_with = "deserialize_trace_propagation_style")] + pub trace_propagation_style: Vec, + /// @env `DD_TRACE_PROPAGATION_STYLE_EXTRACT` + #[serde(deserialize_with = "deserialize_trace_propagation_style")] + pub trace_propagation_style_extract: Vec, + /// @env `DD_TRACE_PROPAGATION_EXTRACT_FIRST` + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub trace_propagation_extract_first: Option, + /// @env `DD_TRACE_PROPAGATION_HTTP_BAGGAGE_ENABLED` + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub trace_propagation_http_baggage_enabled: Option, + + /// @env `DD_METRICS_CONFIG_COMPRESSION_LEVEL` + /// The metrics compresses traces before sending them. The `compression_level` parameter + /// accepts values from 0 (no compression) to 9 (maximum compression but + /// higher resource usage). + #[serde(deserialize_with = "deserialize_option_lossless")] + pub metrics_config_compression_level: Option, + + /// @env `DD_STATSD_METRIC_NAMESPACE` + /// Prefix all `StatsD` metrics with a namespace. + #[serde(deserialize_with = "deserialize_optional_string")] + pub statsd_metric_namespace: Option, + + /// @env `DD_DOGSTATSD_SO_RCVBUF` + /// Size of the receive buffer for `DogStatsD` UDP packets, in bytes (`SO_RCVBUF`). + /// Increase to reduce packet loss under high-throughput metric bursts. + #[serde(deserialize_with = "deserialize_option_lossless")] + pub dogstatsd_so_rcvbuf: Option, + + /// @env `DD_DOGSTATSD_BUFFER_SIZE` + /// Maximum size of a single read from any transport (UDP or named pipe), in bytes. + /// Defaults to 8192. + #[serde(deserialize_with = "deserialize_option_lossless")] + pub dogstatsd_buffer_size: Option, + + /// @env `DD_DOGSTATSD_QUEUE_SIZE` + /// Internal queue capacity between the socket reader and metric processor. + /// Defaults to 1024. Increase if the processor can't keep up with burst traffic. + #[serde(deserialize_with = "deserialize_option_lossless")] + pub dogstatsd_queue_size: Option, + + // OTLP + // + // - APM / Traces + /// @env `DD_OTLP_CONFIG_TRACES_ENABLED` + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub otlp_config_traces_enabled: Option, + /// @env `DD_OTLP_CONFIG_TRACES_SPAN_NAME_AS_RESOURCE_NAME` + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub otlp_config_traces_span_name_as_resource_name: Option, + /// @env `DD_OTLP_CONFIG_TRACES_SPAN_NAME_REMAPPINGS` + #[serde(deserialize_with = "deserialize_key_value_pairs")] + pub otlp_config_traces_span_name_remappings: HashMap, + /// @env `DD_OTLP_CONFIG_IGNORE_MISSING_DATADOG_FIELDS` + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub otlp_config_ignore_missing_datadog_fields: Option, + // + // - Receiver / HTTP + /// @env `DD_OTLP_CONFIG_RECEIVER_PROTOCOLS_HTTP_ENDPOINT` + #[serde(deserialize_with = "deserialize_optional_string")] + pub otlp_config_receiver_protocols_http_endpoint: Option, + // - Unsupported Configuration + // + // - Receiver / GRPC + /// @env `DD_OTLP_CONFIG_RECEIVER_PROTOCOLS_GRPC_ENDPOINT` + #[serde(deserialize_with = "deserialize_optional_string")] + pub otlp_config_receiver_protocols_grpc_endpoint: Option, + /// @env `DD_OTLP_CONFIG_RECEIVER_PROTOCOLS_GRPC_TRANSPORT` + #[serde(deserialize_with = "deserialize_optional_string")] + pub otlp_config_receiver_protocols_grpc_transport: Option, + /// @env `DD_OTLP_CONFIG_RECEIVER_PROTOCOLS_GRPC_MAX_RECV_MSG_SIZE_MIB` + #[serde(deserialize_with = "deserialize_option_lossless")] + pub otlp_config_receiver_protocols_grpc_max_recv_msg_size_mib: Option, + // - Metrics + /// @env `DD_OTLP_CONFIG_METRICS_ENABLED` + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub otlp_config_metrics_enabled: Option, + /// @env `DD_OTLP_CONFIG_METRICS_RESOURCE_ATTRIBUTES_AS_TAGS` + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub otlp_config_metrics_resource_attributes_as_tags: Option, + /// @env `DD_OTLP_CONFIG_METRICS_INSTRUMENTATION_SCOPE_METADATA_AS_TAGS` + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub otlp_config_metrics_instrumentation_scope_metadata_as_tags: Option, + /// @env `DD_OTLP_CONFIG_METRICS_TAG_CARDINALITY` + #[serde(deserialize_with = "deserialize_optional_string")] + pub otlp_config_metrics_tag_cardinality: Option, + /// @env `DD_OTLP_CONFIG_METRICS_DELTA_TTL` + #[serde(deserialize_with = "deserialize_option_lossless")] + pub otlp_config_metrics_delta_ttl: Option, + /// @env `DD_OTLP_CONFIG_METRICS_HISTOGRAMS_MODE` + #[serde(deserialize_with = "deserialize_optional_string")] + pub otlp_config_metrics_histograms_mode: Option, + /// @env `DD_OTLP_CONFIG_METRICS_HISTOGRAMS_SEND_COUNT_SUM_METRICS` + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub otlp_config_metrics_histograms_send_count_sum_metrics: Option, + /// @env `DD_OTLP_CONFIG_METRICS_HISTOGRAMS_SEND_AGGREGATION_METRICS` + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub otlp_config_metrics_histograms_send_aggregation_metrics: Option, + #[serde(deserialize_with = "deserialize_optional_string")] + pub otlp_config_metrics_sums_cumulative_monotonic_mode: Option, + /// @env `DD_OTLP_CONFIG_METRICS_SUMS_INITIAL_CUMULATIVE_MONOTONIC_VALUE` + #[serde(deserialize_with = "deserialize_optional_string")] + pub otlp_config_metrics_sums_initial_cumulativ_monotonic_value: Option, + /// @env `DD_OTLP_CONFIG_METRICS_SUMMARIES_MODE` + #[serde(deserialize_with = "deserialize_optional_string")] + pub otlp_config_metrics_summaries_mode: Option, + // - Traces + /// @env `DD_OTLP_CONFIG_TRACES_PROBABILISTIC_SAMPLER_SAMPLING_PERCENTAGE` + #[serde(deserialize_with = "deserialize_option_lossless")] + pub otlp_config_traces_probabilistic_sampler_sampling_percentage: Option, + // - Logs + /// @env `DD_OTLP_CONFIG_LOGS_ENABLED` + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub otlp_config_logs_enabled: Option, + + // AWS Lambda + /// @env `DD_API_KEY_SECRET_ARN` + /// + /// The AWS ARN of the secret containing the Datadog API key. + #[serde(deserialize_with = "deserialize_optional_string")] + pub api_key_secret_arn: Option, + /// @env `DD_KMS_API_KEY` + /// + /// The AWS KMS API key to use for the Datadog Agent. + #[serde(deserialize_with = "deserialize_optional_string")] + pub kms_api_key: Option, + /// @env `DD_API_KEY_SSM_ARN` + /// + /// The AWS Systems Manager Parameter Store parameter ARN containing the Datadog API key. + #[serde(deserialize_with = "deserialize_optional_string")] + pub api_key_ssm_arn: Option, + /// @env `DD_SERVERLESS_LOGS_ENABLED` + /// + /// Enable logs for AWS Lambda. Default is `true`. + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub serverless_logs_enabled: Option, + /// @env `DD_LOGS_ENABLED` + /// + /// Enable logs for AWS Lambda. Alias for `DD_SERVERLESS_LOGS_ENABLED`. Default is `true`. + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub logs_enabled: Option, + /// @env `DD_SERVERLESS_FLUSH_STRATEGY` + /// + /// The flush strategy to use for AWS Lambda. + pub serverless_flush_strategy: Option, + /// @env `DD_ENHANCED_METRICS` + /// + /// Enable enhanced metrics for AWS Lambda. Default is `true`. + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub enhanced_metrics: Option, + /// @env `DD_LAMBDA_PROC_ENHANCED_METRICS` + /// + /// Enable Lambda process metrics for AWS Lambda. Default is `true`. + /// + /// This is for metrics like: + /// - CPU usage + /// - Network usage + /// - File descriptor count + /// - Thread count + /// - Temp directory usage + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub lambda_proc_enhanced_metrics: Option, + /// @env `DD_CAPTURE_LAMBDA_PAYLOAD` + /// + /// Enable capture of the Lambda request and response payloads. + /// Default is `false`. + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub capture_lambda_payload: Option, + /// @env `DD_CAPTURE_LAMBDA_PAYLOAD_MAX_DEPTH` + /// + /// The maximum depth of the Lambda payload to capture. + /// Default is `10`. Requires `capture_lambda_payload` to be `true`. + #[serde(deserialize_with = "deserialize_option_lossless")] + pub capture_lambda_payload_max_depth: Option, + /// @env `DD_COMPUTE_TRACE_STATS_ON_EXTENSION` + /// + /// If true, enable computation of trace stats on the extension side. + /// If false, trace stats will be computed on the backend side. + /// Default is `false`. + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub compute_trace_stats_on_extension: Option, + /// @env `DD_SPAN_DEDUP_TIMEOUT` + /// + /// The timeout for the span deduplication service to check if a span key exists, in seconds. + /// For now, this is a temporary field added to debug the failure of `check_and_add()` in span dedup service. + /// Do not use this field extensively in production. + #[serde(deserialize_with = "deserialize_optional_duration_from_seconds_ignore_zero")] + pub span_dedup_timeout: Option, + /// @env `DD_API_KEY_SECRET_RELOAD_INTERVAL` + /// + /// The interval at which the Datadog API key is reloaded, in seconds. + /// If None, the API key will not be reloaded. + /// Default is `None`. + #[serde(deserialize_with = "deserialize_optional_duration_from_seconds_ignore_zero")] + pub api_key_secret_reload_interval: Option, + /// @env `DD_SERVERLESS_APPSEC_ENABLED` + /// + /// Enable Application and API Protection (AAP), previously known as AppSec/ASM, for AWS Lambda. + /// Default is `false`. + /// + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub serverless_appsec_enabled: Option, + /// @env `DD_APPSEC_RULES` + /// + /// The path to a user-configured App & API Protection ruleset (in JSON format). + #[serde(deserialize_with = "deserialize_optional_string")] + pub appsec_rules: Option, + /// @env `DD_APPSEC_WAF_TIMEOUT` + /// + /// The timeout for the WAF to process a request, in microseconds. + #[serde(deserialize_with = "deserialize_optional_duration_from_microseconds")] + pub appsec_waf_timeout: Option, + /// @env `DD_API_SECURITY_ENABLED` + /// + /// Enable API Security for AWS Lambda. + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub api_security_enabled: Option, + /// @env `DD_API_SECURITY_SAMPLE_DELAY` + /// + /// The delay between two samples of the API Security schema collection, in seconds. + #[serde(deserialize_with = "deserialize_optional_duration_from_seconds")] + pub api_security_sample_delay: Option, +} + +#[allow(clippy::too_many_lines)] +fn merge_config(config: &mut Config, env_config: &EnvConfig) { + // Basic fields + merge_string!(config, env_config, site); + merge_string!(config, env_config, api_key); + merge_option_to_value!(config, env_config, log_level); + merge_option_to_value!(config, env_config, flush_timeout); + + // Unified Service Tagging + merge_option!(config, env_config, env); + merge_option!(config, env_config, service); + merge_option!(config, env_config, version); + merge_hashmap!(config, env_config, tags); + + // Proxy + merge_option!(config, env_config, proxy_https); + merge_vec!(config, env_config, proxy_no_proxy); + merge_option!(config, env_config, http_protocol); + merge_option!(config, env_config, tls_cert_file); + merge_option_to_value!(config, env_config, skip_ssl_validation); + + // Endpoints + merge_string!(config, env_config, dd_url); + merge_string!(config, env_config, url); + merge_hashmap!(config, env_config, additional_endpoints); + + merge_option_to_value!(config, env_config, compression_level); + + // Logs + merge_string!(config, env_config, logs_config_logs_dd_url); + merge_option!(config, env_config, logs_config_processing_rules); + merge_option_to_value!(config, env_config, logs_config_use_compression); + merge_option_to_value!( + config, + logs_config_compression_level, + env_config, + compression_level + ); + merge_option_to_value!(config, env_config, logs_config_compression_level); + merge_vec!(config, env_config, logs_config_additional_endpoints); + merge_option_to_value!( + config, + env_config, + observability_pipelines_worker_logs_enabled + ); + merge_string!(config, env_config, observability_pipelines_worker_logs_url); + + // APM + merge_hashmap!(config, env_config, service_mapping); + merge_string!(config, env_config, apm_dd_url); + merge_option!(config, env_config, apm_replace_tags); + merge_option_to_value!( + config, + env_config, + apm_config_obfuscation_http_remove_query_string + ); + merge_option_to_value!( + config, + env_config, + apm_config_obfuscation_http_remove_paths_with_digits + ); + merge_option_to_value!( + config, + apm_config_compression_level, + env_config, + compression_level + ); + merge_option_to_value!(config, env_config, apm_config_compression_level); + merge_vec!(config, env_config, apm_features); + merge_hashmap!(config, env_config, apm_additional_endpoints); + merge_option!(config, env_config, apm_filter_tags_require); + merge_option!(config, env_config, apm_filter_tags_reject); + merge_option!(config, env_config, apm_filter_tags_regex_require); + merge_option!(config, env_config, apm_filter_tags_regex_reject); + merge_option_to_value!(config, env_config, trace_aws_service_representation_enabled); + + // Trace Propagation + merge_vec!(config, env_config, trace_propagation_style); + merge_vec!(config, env_config, trace_propagation_style_extract); + merge_option_to_value!(config, env_config, trace_propagation_extract_first); + merge_option_to_value!(config, env_config, trace_propagation_http_baggage_enabled); + + // Metrics + merge_option_to_value!( + config, + metrics_config_compression_level, + env_config, + compression_level + ); + merge_option_to_value!(config, env_config, metrics_config_compression_level); + + if let Some(namespace) = &env_config.statsd_metric_namespace { + config.statsd_metric_namespace = parse_metric_namespace(namespace); + } + + // DogStatsD + merge_option!(config, env_config, dogstatsd_so_rcvbuf); + merge_option!(config, env_config, dogstatsd_buffer_size); + merge_option!(config, env_config, dogstatsd_queue_size); + + // OTLP + merge_option_to_value!(config, env_config, otlp_config_traces_enabled); + merge_option_to_value!( + config, + env_config, + otlp_config_traces_span_name_as_resource_name + ); + merge_hashmap!(config, env_config, otlp_config_traces_span_name_remappings); + merge_option_to_value!( + config, + env_config, + otlp_config_ignore_missing_datadog_fields + ); + merge_option!( + config, + env_config, + otlp_config_receiver_protocols_http_endpoint + ); + merge_option!( + config, + env_config, + otlp_config_receiver_protocols_grpc_endpoint + ); + merge_option!( + config, + env_config, + otlp_config_receiver_protocols_grpc_transport + ); + merge_option!( + config, + env_config, + otlp_config_receiver_protocols_grpc_max_recv_msg_size_mib + ); + merge_option_to_value!(config, env_config, otlp_config_metrics_enabled); + merge_option_to_value!( + config, + env_config, + otlp_config_metrics_resource_attributes_as_tags + ); + merge_option_to_value!( + config, + env_config, + otlp_config_metrics_instrumentation_scope_metadata_as_tags + ); + merge_option!(config, env_config, otlp_config_metrics_tag_cardinality); + merge_option!(config, env_config, otlp_config_metrics_delta_ttl); + merge_option!(config, env_config, otlp_config_metrics_histograms_mode); + merge_option_to_value!( + config, + env_config, + otlp_config_metrics_histograms_send_count_sum_metrics + ); + merge_option_to_value!( + config, + env_config, + otlp_config_metrics_histograms_send_aggregation_metrics + ); + merge_option!( + config, + env_config, + otlp_config_metrics_sums_cumulative_monotonic_mode + ); + merge_option!( + config, + env_config, + otlp_config_metrics_sums_initial_cumulativ_monotonic_value + ); + merge_option!(config, env_config, otlp_config_metrics_summaries_mode); + merge_option!( + config, + env_config, + otlp_config_traces_probabilistic_sampler_sampling_percentage + ); + merge_option_to_value!(config, env_config, otlp_config_logs_enabled); + + // AWS Lambda + merge_string!(config, env_config, api_key_secret_arn); + merge_string!(config, env_config, kms_api_key); + merge_string!(config, env_config, api_key_ssm_arn); + merge_option_to_value!(config, env_config, serverless_logs_enabled); + + // Handle serverless_logs_enabled with OR logic: if either DD_LOGS_ENABLED or DD_SERVERLESS_LOGS_ENABLED is true, enable logs + if env_config.serverless_logs_enabled.is_some() || env_config.logs_enabled.is_some() { + config.serverless_logs_enabled = env_config.serverless_logs_enabled.unwrap_or(false) + || env_config.logs_enabled.unwrap_or(false); + } + + merge_option_to_value!(config, env_config, serverless_flush_strategy); + merge_option_to_value!(config, env_config, enhanced_metrics); + merge_option_to_value!(config, env_config, lambda_proc_enhanced_metrics); + merge_option_to_value!(config, env_config, capture_lambda_payload); + merge_option_to_value!(config, env_config, capture_lambda_payload_max_depth); + merge_option_to_value!(config, env_config, compute_trace_stats_on_extension); + merge_option!(config, env_config, span_dedup_timeout); + merge_option!(config, env_config, api_key_secret_reload_interval); + merge_option_to_value!(config, env_config, serverless_appsec_enabled); + merge_option!(config, env_config, appsec_rules); + merge_option_to_value!(config, env_config, appsec_waf_timeout); + merge_option_to_value!(config, env_config, api_security_enabled); + merge_option_to_value!(config, env_config, api_security_sample_delay); +} + +#[derive(Debug, PartialEq, Clone, Copy)] +#[allow(clippy::module_name_repetitions)] +pub struct EnvConfigSource; + +impl ConfigSource for EnvConfigSource { + fn load(&self, config: &mut Config) -> Result<(), ConfigError> { + let figment = Figment::new() + .merge(Env::prefixed("DATADOG_")) + .merge(Env::prefixed("DD_")); + + match figment.extract::() { + Ok(env_config) => merge_config(config, &env_config), + Err(e) => { + return Err(ConfigError::ParseError(format!( + "Failed to parse config from environment variables: {e}, using default config.", + ))); + } + } + + Ok(()) + } +} + +#[cfg_attr(coverage_nightly, coverage(off))] // Test modules skew coverage metrics +#[cfg(test)] +mod tests { + use std::time::Duration; + + use super::*; + use crate::{ + Config, + flush_strategy::{FlushStrategy, PeriodicStrategy}, + log_level::LogLevel, + processing_rule::{Kind, ProcessingRule}, + trace_propagation_style::TracePropagationStyle, + }; + + #[test] + #[allow(clippy::too_many_lines)] + fn test_merge_config_overrides_with_environment_variables() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + + // Set environment variables here + jail.set_env("DD_SITE", "test-site"); + jail.set_env("DD_API_KEY", "test-api-key"); + jail.set_env("DD_LOG_LEVEL", "debug"); + jail.set_env("DD_FLUSH_TIMEOUT", "42"); + + // Proxy + jail.set_env("DD_PROXY_HTTPS", "https://proxy.example.com"); + jail.set_env("DD_PROXY_NO_PROXY", "localhost,127.0.0.1"); + jail.set_env("DD_HTTP_PROTOCOL", "http1"); + jail.set_env("DD_TLS_CERT_FILE", "/opt/ca-cert.pem"); + jail.set_env("DD_SKIP_SSL_VALIDATION", "true"); + + // Metrics + jail.set_env("DD_DD_URL", "https://metrics.datadoghq.com"); + jail.set_env("DD_URL", "https://app.datadoghq.com"); + jail.set_env( + "DD_ADDITIONAL_ENDPOINTS", + "{\"https://app.datadoghq.com\": [\"apikey2\", \"apikey3\"], \"https://app.datadoghq.eu\": [\"apikey4\"]}", + ); + + // Unified Service Tagging + jail.set_env("DD_ENV", "test-env"); + jail.set_env("DD_SERVICE", "test-service"); + jail.set_env("DD_VERSION", "1.0.0"); + jail.set_env("DD_TAGS", "team:test-team,project:test-project"); + jail.set_env("DD_COMPRESSION_LEVEL", "4"); + + // Logs + jail.set_env("DD_LOGS_CONFIG_LOGS_DD_URL", "https://logs.datadoghq.com"); + jail.set_env( + "DD_LOGS_CONFIG_PROCESSING_RULES", + r#"[{"type":"exclude_at_match","name":"exclude","pattern":"exclude"}]"#, + ); + jail.set_env("DD_LOGS_CONFIG_USE_COMPRESSION", "false"); + jail.set_env("DD_LOGS_CONFIG_COMPRESSION_LEVEL", "1"); + jail.set_env( + "DD_LOGS_CONFIG_ADDITIONAL_ENDPOINTS", + "[{\"api_key\": \"apikey2\", \"Host\": \"agent-http-intake.logs.datadoghq.com\", \"Port\": 443, \"is_reliable\": true}]", + ); + + // APM + jail.set_env("DD_SERVICE_MAPPING", "old-service:new-service"); + jail.set_env("DD_APPSEC_ENABLED", "true"); + jail.set_env("DD_APM_DD_URL", "https://apm.datadoghq.com"); + jail.set_env( + "DD_APM_REPLACE_TAGS", + r#"[{"name":"test-tag","pattern":"test-pattern","repl":"replacement"}]"#, + ); + jail.set_env("DD_APM_CONFIG_OBFUSCATION_HTTP_REMOVE_QUERY_STRING", "true"); + jail.set_env( + "DD_APM_CONFIG_OBFUSCATION_HTTP_REMOVE_PATHS_WITH_DIGITS", + "true", + ); + jail.set_env("DD_APM_CONFIG_COMPRESSION_LEVEL", "2"); + jail.set_env( + "DD_APM_FEATURES", + "enable_otlp_compute_top_level_by_span_kind,enable_stats_by_span_kind", + ); + jail.set_env("DD_APM_ADDITIONAL_ENDPOINTS", "{\"https://trace.agent.datadoghq.com\": [\"apikey2\", \"apikey3\"], \"https://trace.agent.datadoghq.eu\": [\"apikey4\"]}"); + jail.set_env("DD_APM_FILTER_TAGS_REQUIRE", "env:production service:api"); + jail.set_env("DD_APM_FILTER_TAGS_REJECT", "debug:true env:test"); + jail.set_env( + "DD_APM_FILTER_TAGS_REGEX_REQUIRE", + "env:^test.*$ debug:^true$", + ); + jail.set_env( + "DD_APM_FILTER_TAGS_REGEX_REJECT", + "env:^test.*$ debug:^true$", + ); + + jail.set_env("DD_METRICS_CONFIG_COMPRESSION_LEVEL", "3"); + // Trace Propagation + jail.set_env("DD_TRACE_PROPAGATION_STYLE", "datadog"); + jail.set_env("DD_TRACE_PROPAGATION_STYLE_EXTRACT", "b3"); + jail.set_env("DD_TRACE_PROPAGATION_EXTRACT_FIRST", "true"); + jail.set_env("DD_TRACE_PROPAGATION_HTTP_BAGGAGE_ENABLED", "true"); + jail.set_env("DD_TRACE_AWS_SERVICE_REPRESENTATION_ENABLED", "true"); + + // OTLP + jail.set_env("DD_OTLP_CONFIG_TRACES_ENABLED", "false"); + jail.set_env("DD_OTLP_CONFIG_TRACES_SPAN_NAME_AS_RESOURCE_NAME", "true"); + jail.set_env( + "DD_OTLP_CONFIG_TRACES_SPAN_NAME_REMAPPINGS", + "old-span:new-span", + ); + jail.set_env("DD_OTLP_CONFIG_IGNORE_MISSING_DATADOG_FIELDS", "true"); + jail.set_env( + "DD_OTLP_CONFIG_RECEIVER_PROTOCOLS_HTTP_ENDPOINT", + "http://localhost:4318", + ); + jail.set_env( + "DD_OTLP_CONFIG_RECEIVER_PROTOCOLS_GRPC_ENDPOINT", + "http://localhost:4317", + ); + jail.set_env("DD_OTLP_CONFIG_RECEIVER_PROTOCOLS_GRPC_TRANSPORT", "tcp"); + jail.set_env( + "DD_OTLP_CONFIG_RECEIVER_PROTOCOLS_GRPC_MAX_RECV_MSG_SIZE_MIB", + "4", + ); + jail.set_env("DD_OTLP_CONFIG_METRICS_ENABLED", "true"); + jail.set_env("DD_OTLP_CONFIG_METRICS_RESOURCE_ATTRIBUTES_AS_TAGS", "true"); + jail.set_env( + "DD_OTLP_CONFIG_METRICS_INSTRUMENTATION_SCOPE_METADATA_AS_TAGS", + "true", + ); + jail.set_env("DD_OTLP_CONFIG_METRICS_TAG_CARDINALITY", "low"); + jail.set_env("DD_OTLP_CONFIG_METRICS_DELTA_TTL", "3600"); + jail.set_env("DD_OTLP_CONFIG_METRICS_HISTOGRAMS_MODE", "counters"); + jail.set_env( + "DD_OTLP_CONFIG_METRICS_HISTOGRAMS_SEND_COUNT_SUM_METRICS", + "true", + ); + jail.set_env( + "DD_OTLP_CONFIG_METRICS_HISTOGRAMS_SEND_AGGREGATION_METRICS", + "true", + ); + jail.set_env( + "DD_OTLP_CONFIG_METRICS_SUMS_CUMULATIVE_MONOTONIC_MODE", + "to_delta", + ); + jail.set_env( + "DD_OTLP_CONFIG_METRICS_SUMS_INITIAL_CUMULATIV_MONOTONIC_VALUE", + "auto", + ); + jail.set_env("DD_OTLP_CONFIG_METRICS_SUMMARIES_MODE", "quantiles"); + jail.set_env( + "DD_OTLP_CONFIG_TRACES_PROBABILISTIC_SAMPLER_SAMPLING_PERCENTAGE", + "50", + ); + jail.set_env("DD_OTLP_CONFIG_LOGS_ENABLED", "true"); + + // DogStatsD + jail.set_env("DD_DOGSTATSD_SO_RCVBUF", "1048576"); + jail.set_env("DD_DOGSTATSD_BUFFER_SIZE", "65507"); + jail.set_env("DD_DOGSTATSD_QUEUE_SIZE", "2048"); + + // AWS Lambda + jail.set_env( + "DD_API_KEY_SECRET_ARN", + "arn:aws:secretsmanager:region:account:secret:datadog-api-key", + ); + jail.set_env("DD_KMS_API_KEY", "test-kms-key"); + jail.set_env("DD_SERVERLESS_LOGS_ENABLED", "false"); + jail.set_env("DD_SERVERLESS_FLUSH_STRATEGY", "periodically,60000"); + jail.set_env("DD_ENHANCED_METRICS", "false"); + jail.set_env("DD_LAMBDA_PROC_ENHANCED_METRICS", "false"); + jail.set_env("DD_CAPTURE_LAMBDA_PAYLOAD", "true"); + jail.set_env("DD_CAPTURE_LAMBDA_PAYLOAD_MAX_DEPTH", "5"); + jail.set_env("DD_COMPUTE_TRACE_STATS_ON_EXTENSION", "true"); + jail.set_env("DD_SPAN_DEDUP_TIMEOUT", "5"); + jail.set_env("DD_API_KEY_SECRET_RELOAD_INTERVAL", "10"); + jail.set_env("DD_SERVERLESS_APPSEC_ENABLED", "true"); + jail.set_env("DD_APPSEC_RULES", "/path/to/rules.json"); + jail.set_env("DD_APPSEC_WAF_TIMEOUT", "1000000"); // Microseconds + jail.set_env("DD_API_SECURITY_ENABLED", "0"); // Seconds + jail.set_env("DD_API_SECURITY_SAMPLE_DELAY", "60"); // Seconds + + let mut config = Config::default(); + let env_config_source = EnvConfigSource; + env_config_source + .load(&mut config) + .expect("Failed to load config"); + + let expected_config = Config { + site: "test-site".to_string(), + api_key: "test-api-key".to_string(), + log_level: LogLevel::Debug, + compression_level: 4, + flush_timeout: 42, + proxy_https: Some("https://proxy.example.com".to_string()), + proxy_no_proxy: vec!["localhost".to_string(), "127.0.0.1".to_string()], + http_protocol: Some("http1".to_string()), + tls_cert_file: Some("/opt/ca-cert.pem".to_string()), + skip_ssl_validation: true, + dd_url: "https://metrics.datadoghq.com".to_string(), + url: "https://app.datadoghq.com".to_string(), + additional_endpoints: HashMap::from([ + ( + "https://app.datadoghq.com".to_string(), + vec!["apikey2".to_string(), "apikey3".to_string()], + ), + ( + "https://app.datadoghq.eu".to_string(), + vec!["apikey4".to_string()], + ), + ]), + env: Some("test-env".to_string()), + service: Some("test-service".to_string()), + version: Some("1.0.0".to_string()), + tags: HashMap::from([ + ("team".to_string(), "test-team".to_string()), + ("project".to_string(), "test-project".to_string()), + ]), + logs_config_logs_dd_url: "https://logs.datadoghq.com".to_string(), + logs_config_processing_rules: Some(vec![ProcessingRule { + kind: Kind::ExcludeAtMatch, + name: "exclude".to_string(), + pattern: "exclude".to_string(), + replace_placeholder: None, + }]), + logs_config_use_compression: false, + logs_config_compression_level: 1, + logs_config_additional_endpoints: vec![LogsAdditionalEndpoint { + api_key: "apikey2".to_string(), + host: "agent-http-intake.logs.datadoghq.com".to_string(), + port: 443, + is_reliable: true, + }], + observability_pipelines_worker_logs_enabled: false, + observability_pipelines_worker_logs_url: String::default(), + service_mapping: HashMap::from([( + "old-service".to_string(), + "new-service".to_string(), + )]), + apm_dd_url: "https://apm.datadoghq.com".to_string(), + apm_replace_tags: Some( + libdd_trace_obfuscation::replacer::parse_rules_from_string( + r#"[{"name":"test-tag","pattern":"test-pattern","repl":"replacement"}]"#, + ) + .expect("Failed to parse replace rules"), + ), + apm_config_obfuscation_http_remove_query_string: true, + apm_config_obfuscation_http_remove_paths_with_digits: true, + apm_config_compression_level: 2, + apm_features: vec![ + "enable_otlp_compute_top_level_by_span_kind".to_string(), + "enable_stats_by_span_kind".to_string(), + ], + apm_additional_endpoints: HashMap::from([ + ( + "https://trace.agent.datadoghq.com".to_string(), + vec!["apikey2".to_string(), "apikey3".to_string()], + ), + ( + "https://trace.agent.datadoghq.eu".to_string(), + vec!["apikey4".to_string()], + ), + ]), + apm_filter_tags_require: Some(vec![ + "env:production".to_string(), + "service:api".to_string(), + ]), + apm_filter_tags_reject: Some(vec![ + "debug:true".to_string(), + "env:test".to_string(), + ]), + apm_filter_tags_regex_require: Some(vec![ + "env:^test.*$".to_string(), + "debug:^true$".to_string(), + ]), + apm_filter_tags_regex_reject: Some(vec![ + "env:^test.*$".to_string(), + "debug:^true$".to_string(), + ]), + trace_propagation_style: vec![TracePropagationStyle::Datadog], + trace_propagation_style_extract: vec![TracePropagationStyle::B3], + trace_propagation_extract_first: true, + trace_propagation_http_baggage_enabled: true, + trace_aws_service_representation_enabled: true, + metrics_config_compression_level: 3, + otlp_config_traces_enabled: false, + otlp_config_traces_span_name_as_resource_name: true, + otlp_config_traces_span_name_remappings: HashMap::from([( + "old-span".to_string(), + "new-span".to_string(), + )]), + otlp_config_ignore_missing_datadog_fields: true, + otlp_config_receiver_protocols_http_endpoint: Some( + "http://localhost:4318".to_string(), + ), + otlp_config_receiver_protocols_grpc_endpoint: Some( + "http://localhost:4317".to_string(), + ), + otlp_config_receiver_protocols_grpc_transport: Some("tcp".to_string()), + otlp_config_receiver_protocols_grpc_max_recv_msg_size_mib: Some(4), + otlp_config_metrics_enabled: true, + otlp_config_metrics_resource_attributes_as_tags: true, + otlp_config_metrics_instrumentation_scope_metadata_as_tags: true, + otlp_config_metrics_tag_cardinality: Some("low".to_string()), + otlp_config_metrics_delta_ttl: Some(3600), + otlp_config_metrics_histograms_mode: Some("counters".to_string()), + otlp_config_metrics_histograms_send_count_sum_metrics: true, + otlp_config_metrics_histograms_send_aggregation_metrics: true, + otlp_config_metrics_sums_cumulative_monotonic_mode: Some("to_delta".to_string()), + otlp_config_metrics_sums_initial_cumulativ_monotonic_value: Some( + "auto".to_string(), + ), + otlp_config_metrics_summaries_mode: Some("quantiles".to_string()), + otlp_config_traces_probabilistic_sampler_sampling_percentage: Some(50), + otlp_config_logs_enabled: true, + statsd_metric_namespace: None, + dogstatsd_so_rcvbuf: Some(1_048_576), + dogstatsd_buffer_size: Some(65507), + dogstatsd_queue_size: Some(2048), + api_key_secret_arn: "arn:aws:secretsmanager:region:account:secret:datadog-api-key" + .to_string(), + kms_api_key: "test-kms-key".to_string(), + api_key_ssm_arn: String::default(), + serverless_logs_enabled: false, + serverless_flush_strategy: FlushStrategy::Periodically(PeriodicStrategy { + interval: 60000, + }), + enhanced_metrics: false, + lambda_proc_enhanced_metrics: false, + capture_lambda_payload: true, + capture_lambda_payload_max_depth: 5, + compute_trace_stats_on_extension: true, + span_dedup_timeout: Some(Duration::from_secs(5)), + api_key_secret_reload_interval: Some(Duration::from_secs(10)), + serverless_appsec_enabled: true, + appsec_rules: Some("/path/to/rules.json".to_string()), + appsec_waf_timeout: Duration::from_secs(1), + api_security_enabled: false, + api_security_sample_delay: Duration::from_secs(60), + }; + + assert_eq!(config, expected_config); + + Ok(()) + }); + } + + #[test] + fn test_dd_logs_enabled_true() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.set_env("DD_LOGS_ENABLED", "true"); + + let mut config = Config::default(); + let env_config_source = EnvConfigSource; + env_config_source + .load(&mut config) + .expect("Failed to load config"); + + assert!(config.serverless_logs_enabled); + Ok(()) + }); + } + + #[test] + fn test_dd_logs_enabled_false() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.set_env("DD_LOGS_ENABLED", "false"); + + let mut config = Config::default(); + let env_config_source = EnvConfigSource; + env_config_source + .load(&mut config) + .expect("Failed to load config"); + + assert!(!config.serverless_logs_enabled); + Ok(()) + }); + } + + #[test] + fn test_dd_serverless_logs_enabled_true() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.set_env("DD_SERVERLESS_LOGS_ENABLED", "true"); + + let mut config = Config::default(); + let env_config_source = EnvConfigSource; + env_config_source + .load(&mut config) + .expect("Failed to load config"); + + assert!(config.serverless_logs_enabled); + Ok(()) + }); + } + + #[test] + fn test_dd_serverless_logs_enabled_false() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.set_env("DD_SERVERLESS_LOGS_ENABLED", "false"); + + let mut config = Config::default(); + let env_config_source = EnvConfigSource; + env_config_source + .load(&mut config) + .expect("Failed to load config"); + + assert!(!config.serverless_logs_enabled); + Ok(()) + }); + } + + #[test] + fn test_both_logs_enabled_true() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.set_env("DD_LOGS_ENABLED", "true"); + jail.set_env("DD_SERVERLESS_LOGS_ENABLED", "true"); + + let mut config = Config::default(); + let env_config_source = EnvConfigSource; + env_config_source + .load(&mut config) + .expect("Failed to load config"); + + assert!(config.serverless_logs_enabled); + Ok(()) + }); + } + + #[test] + fn test_both_logs_enabled_false() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.set_env("DD_LOGS_ENABLED", "false"); + jail.set_env("DD_SERVERLESS_LOGS_ENABLED", "false"); + + let mut config = Config::default(); + let env_config_source = EnvConfigSource; + env_config_source + .load(&mut config) + .expect("Failed to load config"); + + assert!(!config.serverless_logs_enabled); + Ok(()) + }); + } + + #[test] + fn test_logs_enabled_true_serverless_logs_enabled_false() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.set_env("DD_LOGS_ENABLED", "true"); + jail.set_env("DD_SERVERLESS_LOGS_ENABLED", "false"); + + let mut config = Config::default(); + let env_config_source = EnvConfigSource; + env_config_source + .load(&mut config) + .expect("Failed to load config"); + + // OR logic: if either is true, logs are enabled + assert!(config.serverless_logs_enabled); + Ok(()) + }); + } + + #[test] + fn test_logs_enabled_false_serverless_logs_enabled_true() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.set_env("DD_LOGS_ENABLED", "false"); + jail.set_env("DD_SERVERLESS_LOGS_ENABLED", "true"); + + let mut config = Config::default(); + let env_config_source = EnvConfigSource; + env_config_source + .load(&mut config) + .expect("Failed to load config"); + + // OR logic: if either is true, logs are enabled + assert!(config.serverless_logs_enabled); + Ok(()) + }); + } + + #[test] + fn test_neither_logs_enabled_set_uses_default() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + + let mut config = Config::default(); + let env_config_source = EnvConfigSource; + env_config_source + .load(&mut config) + .expect("Failed to load config"); + + // Default value is true + assert!(config.serverless_logs_enabled); + Ok(()) + }); + } + + #[test] + fn test_dogstatsd_config_from_env() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.set_env("DD_DOGSTATSD_SO_RCVBUF", "1048576"); + jail.set_env("DD_DOGSTATSD_BUFFER_SIZE", "65507"); + jail.set_env("DD_DOGSTATSD_QUEUE_SIZE", "2048"); + + let mut config = Config::default(); + let env_config_source = EnvConfigSource; + env_config_source + .load(&mut config) + .expect("Failed to load config"); + + assert_eq!(config.dogstatsd_so_rcvbuf, Some(1_048_576)); + assert_eq!(config.dogstatsd_buffer_size, Some(65507)); + assert_eq!(config.dogstatsd_queue_size, Some(2048)); + Ok(()) + }); + } + + #[test] + fn test_dogstatsd_config_defaults_to_none() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + + let mut config = Config::default(); + let env_config_source = EnvConfigSource; + env_config_source + .load(&mut config) + .expect("Failed to load config"); + + assert_eq!(config.dogstatsd_so_rcvbuf, None); + assert_eq!(config.dogstatsd_buffer_size, None); + assert_eq!(config.dogstatsd_queue_size, None); + Ok(()) + }); + } +} diff --git a/crates/datadog-agent-config/flush_strategy.rs b/crates/datadog-agent-config/flush_strategy.rs new file mode 100644 index 00000000..0a09e822 --- /dev/null +++ b/crates/datadog-agent-config/flush_strategy.rs @@ -0,0 +1,168 @@ +use serde::{Deserialize, Deserializer}; +use tracing::debug; + +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct PeriodicStrategy { + pub interval: u64, +} + +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum FlushStrategy { + // Flush every 1s and at the end of the invocation + Default, + // User specifies the interval in milliseconds, will not block on the runtimeDone event + Periodically(PeriodicStrategy), + // Always flush at the end of the invocation + End, + // Flush both (1) at the end of the invocation and (2) periodically with the specified interval + EndPeriodically(PeriodicStrategy), + // Flush in a non-blocking, asynchronous manner, so the next invocation can start without waiting + // for the flush to complete + Continuously(PeriodicStrategy), +} + +impl FlushStrategy { + /// Returns the name of the flush strategy as a string slice. + #[must_use] + pub const fn name(&self) -> &'static str { + match self { + FlushStrategy::Default => "default", + FlushStrategy::End => "end", + FlushStrategy::Periodically(_) => "periodically", + FlushStrategy::EndPeriodically(_) => "end-periodically", + FlushStrategy::Continuously(_) => "continuously", + } + } +} + +// A restricted subset of `FlushStrategy`. The Default strategy is now allowed, which is required to be +// translated into a concrete strategy. +#[allow(clippy::module_name_repetitions)] +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum ConcreteFlushStrategy { + Periodically(PeriodicStrategy), + End, + EndPeriodically(PeriodicStrategy), + Continuously(PeriodicStrategy), +} + +// Deserialize for FlushStrategy +// Flush Strategy can be either "end", "end,", or "periodically," +impl<'de> Deserialize<'de> for FlushStrategy { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let value = String::deserialize(deserializer)?; + if value.as_str() == "end" { + Ok(FlushStrategy::End) + } else { + let mut split_value = value.as_str().split(','); + // "periodically,60000" + // "end,1000" + let strategy = split_value.next(); + let interval: Option = split_value.next().and_then(|v| v.parse().ok()); + + match (strategy, interval) { + (Some("periodically"), Some(interval)) => { + Ok(FlushStrategy::Periodically(PeriodicStrategy { interval })) + } + (Some("continuously"), Some(interval)) => { + Ok(FlushStrategy::Continuously(PeriodicStrategy { interval })) + } + (Some("end"), Some(interval)) => { + Ok(FlushStrategy::EndPeriodically(PeriodicStrategy { + interval, + })) + } + (Some(strategy), _) => { + debug!("Invalid flush interval: {}, using default", strategy); + Ok(FlushStrategy::Default) + } + _ => { + debug!("Invalid flush strategy: {}, using default", value); + Ok(FlushStrategy::Default) + } + } + } + } +} + +#[cfg(test)] +#[allow(clippy::unwrap_used)] +mod tests { + use super::*; + + #[test] + fn deserialize_end() { + let flush_strategy: FlushStrategy = serde_json::from_str("\"end\"").unwrap(); + assert_eq!(flush_strategy, FlushStrategy::End); + } + + #[test] + fn deserialize_periodically() { + let flush_strategy: FlushStrategy = serde_json::from_str("\"periodically,60000\"").unwrap(); + assert_eq!( + flush_strategy, + FlushStrategy::Periodically(PeriodicStrategy { interval: 60000 }) + ); + } + + #[test] + fn deserialize_end_periodically() { + let flush_strategy: FlushStrategy = serde_json::from_str("\"end,1000\"").unwrap(); + assert_eq!( + flush_strategy, + FlushStrategy::EndPeriodically(PeriodicStrategy { interval: 1000 }) + ); + } + + #[test] + fn deserialize_invalid() { + let flush_strategy: FlushStrategy = serde_json::from_str("\"invalid\"").unwrap(); + assert_eq!(flush_strategy, FlushStrategy::Default); + } + + #[test] + fn deserialize_invalid_interval() { + let flush_strategy: FlushStrategy = + serde_json::from_str("\"periodically,invalid\"").unwrap(); + assert_eq!(flush_strategy, FlushStrategy::Default); + } + + #[test] + fn deserialize_invalid_end_interval() { + let flush_strategy: FlushStrategy = serde_json::from_str("\"end,invalid\"").unwrap(); + assert_eq!(flush_strategy, FlushStrategy::Default); + } + + #[test] + fn test_flush_strategy_name_default() { + let strategy = FlushStrategy::Default; + assert_eq!(strategy.name(), "default"); + } + + #[test] + fn test_flush_strategy_name_end() { + let strategy = FlushStrategy::End; + assert_eq!(strategy.name(), "end"); + } + + #[test] + fn test_flush_strategy_name_periodically() { + let strategy = FlushStrategy::Periodically(PeriodicStrategy { interval: 1000 }); + assert_eq!(strategy.name(), "periodically"); + } + + #[test] + fn test_flush_strategy_name_end_periodically() { + let strategy = FlushStrategy::EndPeriodically(PeriodicStrategy { interval: 2000 }); + assert_eq!(strategy.name(), "end-periodically"); + } + + #[test] + fn test_flush_strategy_name_continuously() { + let strategy = FlushStrategy::Continuously(PeriodicStrategy { interval: 30000 }); + assert_eq!(strategy.name(), "continuously"); + } +} diff --git a/crates/datadog-agent-config/log_level.rs b/crates/datadog-agent-config/log_level.rs new file mode 100644 index 00000000..7443f3ca --- /dev/null +++ b/crates/datadog-agent-config/log_level.rs @@ -0,0 +1,85 @@ +use std::str::FromStr; + +use serde::{Deserialize, Deserializer}; +use serde_json::Value; +use tracing::error; + +#[derive(Clone, Copy, Debug, PartialEq, Default)] +pub enum LogLevel { + /// Designates very serious errors. + Error, + /// Designates hazardous situations. + #[default] + Warn, + /// Designates useful information. + Info, + /// Designates lower priority information. + Debug, + /// Designates very low priority, often extremely verbose, information. + Trace, +} + +impl AsRef for LogLevel { + fn as_ref(&self) -> &str { + match self { + LogLevel::Error => "ERROR", + LogLevel::Warn => "WARN", + LogLevel::Info => "INFO", + LogLevel::Debug => "DEBUG", + LogLevel::Trace => "TRACE", + } + } +} + +impl LogLevel { + /// Construct a `log::LevelFilter` from a `LogLevel` + #[must_use] + pub fn as_level_filter(self) -> log::LevelFilter { + match self { + LogLevel::Error => log::LevelFilter::Error, + LogLevel::Warn => log::LevelFilter::Warn, + LogLevel::Info => log::LevelFilter::Info, + LogLevel::Debug => log::LevelFilter::Debug, + LogLevel::Trace => log::LevelFilter::Trace, + } + } +} + +impl FromStr for LogLevel { + type Err = String; + + fn from_str(s: &str) -> Result { + match s.to_lowercase().as_str() { + "error" => Ok(LogLevel::Error), + "warn" => Ok(LogLevel::Warn), + "info" => Ok(LogLevel::Info), + "debug" => Ok(LogLevel::Debug), + "trace" => Ok(LogLevel::Trace), + _ => Err(format!( + "Invalid log level: '{s}'. Valid levels are: error, warn, info, debug, trace", + )), + } + } +} + +impl<'de> Deserialize<'de> for LogLevel { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let value = Value::deserialize(deserializer)?; + + if let Value::String(s) = value { + match LogLevel::from_str(&s) { + Ok(level) => Ok(level), + Err(e) => { + error!("{}", e); + Ok(LogLevel::Warn) + } + } + } else { + error!("Expected a string for log level, got {:?}", value); + Ok(LogLevel::Warn) + } + } +} diff --git a/crates/datadog-agent-config/logs_additional_endpoints.rs b/crates/datadog-agent-config/logs_additional_endpoints.rs new file mode 100644 index 00000000..f3d18c15 --- /dev/null +++ b/crates/datadog-agent-config/logs_additional_endpoints.rs @@ -0,0 +1,72 @@ +use serde::{Deserialize, Deserializer}; +use serde_json::Value; +use tracing::error; + +#[derive(Debug, PartialEq, Clone, Deserialize)] +pub struct LogsAdditionalEndpoint { + pub api_key: String, + #[serde(rename = "Host")] + pub host: String, + #[serde(rename = "Port")] + pub port: u32, + pub is_reliable: bool, +} + +#[allow(clippy::module_name_repetitions)] +pub fn deserialize_logs_additional_endpoints<'de, D>( + deserializer: D, +) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + let value = Value::deserialize(deserializer)?; + + match value { + Value::String(s) if !s.is_empty() => { + // For JSON format (string) in DD_ADDITIONAL_ENDPOINTS + Ok(serde_json::from_str(&s).unwrap_or_else(|err| { + error!("Failed to deserialize DD_LOGS_CONFIG_ADDITIONAL_ENDPOINTS: {err}"); + vec![] + })) + } + _ => Ok(Vec::new()), + } +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + #[test] + fn test_deserialize_logs_additional_endpoints_valid() { + let input = json!( + "[{\"api_key\": \"apiKey2\", \"Host\": \"agent-http-intake.logs.datadoghq.com\", \"Port\": 443, \"is_reliable\": true}]" + ); + + let result = deserialize_logs_additional_endpoints(input) + .expect("Failed to deserialize logs additional endpoints"); + let expected = vec![LogsAdditionalEndpoint { + api_key: "apiKey2".to_string(), + host: "agent-http-intake.logs.datadoghq.com".to_string(), + port: 443, + is_reliable: true, + }]; + + assert_eq!(result, expected); + } + + #[test] + fn test_deserialize_logs_additional_endpoints_invalid() { + // input missing "Port" field + let input = json!( + "[{\"api_key\": \"apiKey2\", \"Host\": \"agent-http-intake.logs.datadoghq.com\", \"is_reliable\": true}]" + ); + + let result = deserialize_logs_additional_endpoints(input) + .expect("Failed to deserialize logs additional endpoints"); + let expected = Vec::new(); // expect empty list due to invalid input + + assert_eq!(result, expected); + } +} diff --git a/crates/datadog-agent-config/mod.rs b/crates/datadog-agent-config/mod.rs new file mode 100644 index 00000000..1d27be6d --- /dev/null +++ b/crates/datadog-agent-config/mod.rs @@ -0,0 +1,1637 @@ +pub mod additional_endpoints; +pub mod apm_replace_rule; +pub mod env; +pub mod flush_strategy; +pub mod log_level; +pub mod logs_additional_endpoints; +pub mod processing_rule; +pub mod service_mapping; +pub mod trace_propagation_style; +pub mod yaml; + +use libdd_trace_obfuscation::replacer::ReplaceRule; +use libdd_trace_utils::config_utils::{trace_intake_url, trace_intake_url_prefixed}; + +use serde::{Deserialize, Deserializer}; +use serde_aux::prelude::deserialize_bool_from_anything; +use serde_json::Value; + +use std::path::Path; +use std::time::Duration; +use std::{collections::HashMap, fmt}; +use tracing::{debug, error}; + +use crate::{ + apm_replace_rule::deserialize_apm_replace_rules, + env::EnvConfigSource, + flush_strategy::FlushStrategy, + log_level::LogLevel, + logs_additional_endpoints::LogsAdditionalEndpoint, + processing_rule::{ProcessingRule, deserialize_processing_rules}, + trace_propagation_style::TracePropagationStyle, + yaml::YamlConfigSource, +}; + +/// Helper macro to merge Option fields to String fields +/// +/// Providing one field argument will merge the value from the source config field into the config +/// field. +/// +/// Providing two field arguments will merge the value from the source config field into the config +/// field if the value is not empty. +#[macro_export] +macro_rules! merge_string { + ($config:expr, $config_field:ident, $source:expr, $source_field:ident) => { + if let Some(value) = &$source.$source_field { + $config.$config_field.clone_from(value); + } + }; + ($config:expr, $source:expr, $field:ident) => { + if let Some(value) = &$source.$field { + $config.$field.clone_from(value); + } + }; +} + +/// Helper macro to merge Option fields where T implements Clone +/// +/// Providing one field argument will merge the value from the source config field into the config +/// field. +/// +/// Providing two field arguments will merge the value from the source config field into the config +/// field if the value is not empty. +#[macro_export] +macro_rules! merge_option { + ($config:expr, $config_field:ident, $source:expr, $source_field:ident) => { + if $source.$source_field.is_some() { + $config.$config_field.clone_from(&$source.$source_field); + } + }; + ($config:expr, $source:expr, $field:ident) => { + if $source.$field.is_some() { + $config.$field.clone_from(&$source.$field); + } + }; +} + +/// Helper macro to merge Option fields to T fields when Option is Some +/// +/// Providing one field argument will merge the value from the source config field into the config +/// field. +/// +/// Providing two field arguments will merge the value from the source config field into the config +/// field if the value is not empty. +#[macro_export] +macro_rules! merge_option_to_value { + ($config:expr, $config_field:ident, $source:expr, $source_field:ident) => { + if let Some(value) = &$source.$source_field { + $config.$config_field = value.clone(); + } + }; + ($config:expr, $source:expr, $field:ident) => { + if let Some(value) = &$source.$field { + $config.$field = value.clone(); + } + }; +} + +/// Helper macro to merge `Vec` fields when `Vec` is not empty +/// +/// Providing one field argument will merge the value from the source config field into the config +/// field. +/// +/// Providing two field arguments will merge the value from the source config field into the config +/// field if the value is not empty. +#[macro_export] +macro_rules! merge_vec { + ($config:expr, $config_field:ident, $source:expr, $source_field:ident) => { + if !$source.$source_field.is_empty() { + $config.$config_field.clone_from(&$source.$source_field); + } + }; + ($config:expr, $source:expr, $field:ident) => { + if !$source.$field.is_empty() { + $config.$field.clone_from(&$source.$field); + } + }; +} + +// nit: these will replace one map with the other, not merge the maps togehter, right? +/// Helper macro to merge `HashMap` fields when `HashMap` is not empty +/// +/// Providing one field argument will merge the value from the source config field into the config +/// field. +/// +/// Providing two field arguments will merge the value from the source config field into the config +/// field if the value is not empty. +#[macro_export] +macro_rules! merge_hashmap { + ($config:expr, $config_field:ident, $source:expr, $source_field:ident) => { + if !$source.$source_field.is_empty() { + $config.$config_field.clone_from(&$source.$source_field); + } + }; + ($config:expr, $source:expr, $field:ident) => { + if !$source.$field.is_empty() { + $config.$field.clone_from(&$source.$field); + } + }; +} + +#[derive(Debug, PartialEq)] +#[allow(clippy::module_name_repetitions)] +pub enum ConfigError { + ParseError(String), + UnsupportedField(String), +} + +#[allow(clippy::module_name_repetitions)] +pub trait ConfigSource { + fn load(&self, config: &mut Config) -> Result<(), ConfigError>; +} + +#[derive(Default)] +#[allow(clippy::module_name_repetitions)] +pub struct ConfigBuilder { + sources: Vec>, + config: Config, +} + +#[allow(clippy::module_name_repetitions)] +impl ConfigBuilder { + #[must_use] + pub fn add_source(mut self, source: Box) -> Self { + self.sources.push(source); + self + } + + pub fn build(&mut self) -> Config { + let mut failed_sources = 0; + for source in &self.sources { + match source.load(&mut self.config) { + Ok(()) => (), + Err(e) => { + error!("Failed to load config: {:?}", e); + failed_sources += 1; + } + } + } + + if !self.sources.is_empty() && failed_sources == self.sources.len() { + debug!("All sources failed to load config, using default config."); + } + + if self.config.site.is_empty() { + self.config.site = "datadoghq.com".to_string(); + } + + // If `proxy_https` is not set, set it from `HTTPS_PROXY` environment variable + // if it exists + if let Ok(https_proxy) = std::env::var("HTTPS_PROXY") + && self.config.proxy_https.is_none() + { + self.config.proxy_https = Some(https_proxy); + } + + // If `proxy_https` is set, check if the site is in `NO_PROXY` environment variable + // or in the `proxy_no_proxy` config field. + if self.config.proxy_https.is_some() { + let site_in_no_proxy = std::env::var("NO_PROXY") + .is_ok_and(|no_proxy| no_proxy.contains(&self.config.site)) + || self + .config + .proxy_no_proxy + .iter() + .any(|no_proxy| no_proxy.contains(&self.config.site)); + if site_in_no_proxy { + self.config.proxy_https = None; + } + } + + // If extraction is not set, set it to the same as the propagation style + if self.config.trace_propagation_style_extract.is_empty() { + self.config + .trace_propagation_style_extract + .clone_from(&self.config.trace_propagation_style); + } + + // If Logs URL is not set, set it to the default + if self.config.logs_config_logs_dd_url.trim().is_empty() { + self.config.logs_config_logs_dd_url = build_fqdn_logs(self.config.site.clone()); + } else { + self.config.logs_config_logs_dd_url = + logs_intake_url(self.config.logs_config_logs_dd_url.as_str()); + } + + // If APM URL is not set, set it to the default + if self.config.apm_dd_url.is_empty() { + self.config.apm_dd_url = trace_intake_url(self.config.site.clone().as_str()); + } else { + // If APM URL is set, add the site to the URL + self.config.apm_dd_url = trace_intake_url_prefixed(self.config.apm_dd_url.as_str()); + } + + self.config.clone() + } +} + +#[derive(Debug, PartialEq, Clone)] +#[allow(clippy::module_name_repetitions)] +#[allow(clippy::struct_excessive_bools)] +pub struct Config { + pub site: String, + pub api_key: String, + pub log_level: LogLevel, + + // Timeout for the request to flush data to Datadog endpoint + pub flush_timeout: u64, + + // Global config of compression levels. + // It would be overridden by the setup for the individual component + pub compression_level: i32, + + // Proxy + pub proxy_https: Option, + pub proxy_no_proxy: Vec, + pub http_protocol: Option, + pub tls_cert_file: Option, + pub skip_ssl_validation: bool, + + // Endpoints + pub dd_url: String, + pub url: String, + pub additional_endpoints: HashMap>, + + // Unified Service Tagging + pub env: Option, + pub service: Option, + pub version: Option, + pub tags: HashMap, + + // Logs + pub logs_config_logs_dd_url: String, + pub logs_config_processing_rules: Option>, + pub logs_config_use_compression: bool, + pub logs_config_compression_level: i32, + pub logs_config_additional_endpoints: Vec, + pub observability_pipelines_worker_logs_enabled: bool, + pub observability_pipelines_worker_logs_url: String, + + // APM + // + pub service_mapping: HashMap, + // + pub apm_dd_url: String, + pub apm_replace_tags: Option>, + pub apm_config_obfuscation_http_remove_query_string: bool, + pub apm_config_obfuscation_http_remove_paths_with_digits: bool, + pub apm_config_compression_level: i32, + pub apm_features: Vec, + pub apm_additional_endpoints: HashMap>, + pub apm_filter_tags_require: Option>, + pub apm_filter_tags_reject: Option>, + pub apm_filter_tags_regex_require: Option>, + pub apm_filter_tags_regex_reject: Option>, + // + // Trace Propagation + pub trace_propagation_style: Vec, + pub trace_propagation_style_extract: Vec, + pub trace_propagation_extract_first: bool, + pub trace_propagation_http_baggage_enabled: bool, + pub trace_aws_service_representation_enabled: bool, + + // Metrics + pub metrics_config_compression_level: i32, + pub statsd_metric_namespace: Option, + /// Size of the receive buffer for `DogStatsD` UDP packets, in bytes (`SO_RCVBUF`). + /// Increase to reduce packet loss under high-throughput metric bursts. + /// If None, uses the OS default. + pub dogstatsd_so_rcvbuf: Option, + /// Maximum size of a single read from any transport (UDP or named pipe), in bytes. + /// Defaults to 8192. For UDP, the client must batch metrics into packets of + /// this size for the increase to take effect. + pub dogstatsd_buffer_size: Option, + /// Internal queue capacity between the socket reader and metric processor. + /// Defaults to 1024. Increase if the processor can't keep up with burst traffic. + pub dogstatsd_queue_size: Option, + + // OTLP + // + // - APM / Traces + pub otlp_config_traces_enabled: bool, + pub otlp_config_traces_span_name_as_resource_name: bool, + pub otlp_config_traces_span_name_remappings: HashMap, + pub otlp_config_ignore_missing_datadog_fields: bool, + // + // - Receiver / HTTP + pub otlp_config_receiver_protocols_http_endpoint: Option, + // - Unsupported Configuration + // + // - Receiver / GRPC + pub otlp_config_receiver_protocols_grpc_endpoint: Option, + pub otlp_config_receiver_protocols_grpc_transport: Option, + pub otlp_config_receiver_protocols_grpc_max_recv_msg_size_mib: Option, + // - Metrics + pub otlp_config_metrics_enabled: bool, + pub otlp_config_metrics_resource_attributes_as_tags: bool, + pub otlp_config_metrics_instrumentation_scope_metadata_as_tags: bool, + pub otlp_config_metrics_tag_cardinality: Option, + pub otlp_config_metrics_delta_ttl: Option, + pub otlp_config_metrics_histograms_mode: Option, + pub otlp_config_metrics_histograms_send_count_sum_metrics: bool, + pub otlp_config_metrics_histograms_send_aggregation_metrics: bool, + pub otlp_config_metrics_sums_cumulative_monotonic_mode: Option, + // nit: is the e in cumulative missing intentionally? + pub otlp_config_metrics_sums_initial_cumulativ_monotonic_value: Option, + pub otlp_config_metrics_summaries_mode: Option, + // - Traces + pub otlp_config_traces_probabilistic_sampler_sampling_percentage: Option, + // - Logs + pub otlp_config_logs_enabled: bool, + + // AWS Lambda + pub api_key_secret_arn: String, + pub kms_api_key: String, + pub api_key_ssm_arn: String, + pub serverless_logs_enabled: bool, + pub serverless_flush_strategy: FlushStrategy, + pub enhanced_metrics: bool, + pub lambda_proc_enhanced_metrics: bool, + pub capture_lambda_payload: bool, + pub capture_lambda_payload_max_depth: u32, + pub compute_trace_stats_on_extension: bool, + pub span_dedup_timeout: Option, + pub api_key_secret_reload_interval: Option, + + pub serverless_appsec_enabled: bool, + pub appsec_rules: Option, + pub appsec_waf_timeout: Duration, + pub api_security_enabled: bool, + pub api_security_sample_delay: Duration, +} + +impl Default for Config { + fn default() -> Self { + Self { + site: String::default(), + api_key: String::default(), + log_level: LogLevel::default(), + flush_timeout: 30, + + // Proxy + proxy_https: None, + proxy_no_proxy: vec![], + http_protocol: None, + tls_cert_file: None, + skip_ssl_validation: false, + + // Endpoints + dd_url: String::default(), + url: String::default(), + additional_endpoints: HashMap::new(), + + // Unified Service Tagging + env: None, + service: None, + version: None, + tags: HashMap::new(), + + compression_level: 3, + + // Logs + logs_config_logs_dd_url: String::default(), + logs_config_processing_rules: None, + logs_config_use_compression: true, + logs_config_compression_level: 3, + logs_config_additional_endpoints: Vec::new(), + observability_pipelines_worker_logs_enabled: false, + observability_pipelines_worker_logs_url: String::default(), + + // APM + service_mapping: HashMap::new(), + apm_dd_url: String::default(), + apm_replace_tags: None, + apm_config_obfuscation_http_remove_query_string: false, + apm_config_obfuscation_http_remove_paths_with_digits: false, + apm_config_compression_level: 3, + apm_features: vec![], + apm_additional_endpoints: HashMap::new(), + apm_filter_tags_require: None, + apm_filter_tags_reject: None, + apm_filter_tags_regex_require: None, + apm_filter_tags_regex_reject: None, + trace_aws_service_representation_enabled: true, + trace_propagation_style: vec![ + TracePropagationStyle::Datadog, + TracePropagationStyle::TraceContext, + ], + trace_propagation_style_extract: vec![], + trace_propagation_extract_first: false, + trace_propagation_http_baggage_enabled: false, + + // Metrics + metrics_config_compression_level: 3, + statsd_metric_namespace: None, + + // DogStatsD + // Defaults to None, which uses the OS default. + dogstatsd_so_rcvbuf: None, + // Defaults to 8192 internally. + dogstatsd_buffer_size: None, + // Defaults to 1024 internally. + dogstatsd_queue_size: None, + + // OTLP + otlp_config_traces_enabled: true, + otlp_config_traces_span_name_as_resource_name: false, + otlp_config_traces_span_name_remappings: HashMap::new(), + otlp_config_ignore_missing_datadog_fields: false, + otlp_config_receiver_protocols_http_endpoint: None, + otlp_config_receiver_protocols_grpc_endpoint: None, + otlp_config_receiver_protocols_grpc_transport: None, + otlp_config_receiver_protocols_grpc_max_recv_msg_size_mib: None, + otlp_config_metrics_enabled: false, // TODO(duncanista): Go Agent default is to true + otlp_config_metrics_resource_attributes_as_tags: false, + otlp_config_metrics_instrumentation_scope_metadata_as_tags: false, + otlp_config_metrics_tag_cardinality: None, + otlp_config_metrics_delta_ttl: None, + otlp_config_metrics_histograms_mode: None, + otlp_config_metrics_histograms_send_count_sum_metrics: false, + otlp_config_metrics_histograms_send_aggregation_metrics: false, + otlp_config_metrics_sums_cumulative_monotonic_mode: None, + otlp_config_metrics_sums_initial_cumulativ_monotonic_value: None, + otlp_config_metrics_summaries_mode: None, + otlp_config_traces_probabilistic_sampler_sampling_percentage: None, + otlp_config_logs_enabled: false, + + // AWS Lambda + api_key_secret_arn: String::default(), + kms_api_key: String::default(), + api_key_ssm_arn: String::default(), + serverless_logs_enabled: true, + serverless_flush_strategy: FlushStrategy::Default, + enhanced_metrics: true, + lambda_proc_enhanced_metrics: true, + capture_lambda_payload: false, + capture_lambda_payload_max_depth: 10, + compute_trace_stats_on_extension: false, + span_dedup_timeout: None, + api_key_secret_reload_interval: None, + + serverless_appsec_enabled: false, + appsec_rules: None, + appsec_waf_timeout: Duration::from_millis(5), + api_security_enabled: true, + api_security_sample_delay: Duration::from_secs(30), + } + } +} + +#[allow(clippy::module_name_repetitions)] +#[inline] +#[must_use] +pub fn get_config(config_directory: &Path) -> Config { + let path: std::path::PathBuf = config_directory.join("datadog.yaml"); + ConfigBuilder::default() + .add_source(Box::new(YamlConfigSource { path })) + .add_source(Box::new(EnvConfigSource)) + .build() +} + +#[inline] +#[must_use] +fn build_fqdn_logs(site: String) -> String { + format!("https://http-intake.logs.{site}") +} + +#[inline] +#[must_use] +fn logs_intake_url(url: &str) -> String { + let url = url.trim(); + if url.is_empty() { + return url.to_string(); + } + if url.starts_with("https://") || url.starts_with("http://") { + return url.to_string(); + } + format!("https://{url}") +} + +pub fn deserialize_optional_string<'de, D>(deserializer: D) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + match Value::deserialize(deserializer)? { + Value::String(s) => Ok(Some(s)), + other => { + error!( + "Failed to parse value, expected a string, got: {}, ignoring", + other + ); + Ok(None) + } + } +} + +pub fn deserialize_string_or_int<'de, D>(deserializer: D) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + let value = Value::deserialize(deserializer)?; + match value { + Value::String(s) => { + if s.trim().is_empty() { + Ok(None) + } else { + Ok(Some(s)) + } + } + Value::Number(n) => Ok(Some(n.to_string())), + _ => { + error!("Failed to parse value, expected a string or an integer, ignoring"); + Ok(None) + } + } +} + +pub fn deserialize_optional_bool_from_anything<'de, D>( + deserializer: D, +) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + // First try to deserialize as Option<_> to handle null/missing values + let opt: Option = Option::deserialize(deserializer)?; + + match opt { + None => Ok(None), + Some(value) => match deserialize_bool_from_anything(value) { + Ok(bool_result) => Ok(Some(bool_result)), + Err(e) => { + error!("Failed to parse bool value: {}, ignoring", e); + Ok(None) + } + }, + } +} + +/// Parse a single "key:value" string into a (key, value) tuple +/// Returns None if the string is invalid (e.g., missing colon, empty key/value) +fn parse_key_value_tag(tag: &str) -> Option<(String, String)> { + let parts: Vec<&str> = tag.splitn(2, ':').collect(); + if parts.len() == 2 && !parts[0].is_empty() && !parts[1].is_empty() { + Some((parts[0].to_string(), parts[1].to_string())) + } else { + error!( + "Failed to parse tag '{}', expected format 'key:value', ignoring", + tag + ); + None + } +} + +pub fn deserialize_key_value_pairs<'de, D>( + deserializer: D, +) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + struct KeyValueVisitor; + + impl serde::de::Visitor<'_> for KeyValueVisitor { + type Value = HashMap; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("a string in format 'key1:value1,key2:value2' or 'key1:value1'") + } + + fn visit_str(self, value: &str) -> Result + where + E: serde::de::Error, + { + let mut map = HashMap::new(); + for tag in value.split(&[',', ' ']) { + if tag.is_empty() { + continue; + } + if let Some((key, val)) = parse_key_value_tag(tag) { + map.insert(key, val); + } + } + + Ok(map) + } + + fn visit_u64(self, value: u64) -> Result + where + E: serde::de::Error, + { + error!( + "Failed to parse tags: expected string in format 'key:value', got number {}, ignoring", + value + ); + Ok(HashMap::new()) + } + + fn visit_i64(self, value: i64) -> Result + where + E: serde::de::Error, + { + error!( + "Failed to parse tags: expected string in format 'key:value', got number {}, ignoring", + value + ); + Ok(HashMap::new()) + } + + fn visit_f64(self, value: f64) -> Result + where + E: serde::de::Error, + { + error!( + "Failed to parse tags: expected string in format 'key:value', got number {}, ignoring", + value + ); + Ok(HashMap::new()) + } + + fn visit_bool(self, value: bool) -> Result + where + E: serde::de::Error, + { + error!( + "Failed to parse tags: expected string in format 'key:value', got boolean {}, ignoring", + value + ); + Ok(HashMap::new()) + } + } + + deserializer.deserialize_any(KeyValueVisitor) +} + +pub fn deserialize_array_from_comma_separated_string<'de, D>( + deserializer: D, +) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + let s: String = String::deserialize(deserializer)?; + Ok(s.split(',') + .map(|feature| feature.trim().to_string()) + .filter(|feature| !feature.is_empty()) + .collect()) +} + +pub fn deserialize_key_value_pair_array_to_hashmap<'de, D>( + deserializer: D, +) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + let array: Vec = Vec::deserialize(deserializer)?; + let mut map = HashMap::new(); + for s in array { + if let Some((key, val)) = parse_key_value_tag(&s) { + map.insert(key, val); + } + } + Ok(map) +} + +/// Deserialize APM filter tags from space-separated "key:value" pairs, also support key-only tags +pub fn deserialize_apm_filter_tags<'de, D>(deserializer: D) -> Result>, D::Error> +where + D: Deserializer<'de>, +{ + let opt: Option = Option::deserialize(deserializer)?; + + match opt { + None => Ok(None), + Some(s) if s.trim().is_empty() => Ok(None), + Some(s) => { + let tags: Vec = s + .split_whitespace() + .filter_map(|pair| { + let parts: Vec<&str> = pair.splitn(2, ':').collect(); + if parts.len() == 2 { + let key = parts[0].trim(); + let value = parts[1].trim(); + if key.is_empty() { + None + } else if value.is_empty() { + Some(key.to_string()) + } else { + Some(format!("{key}:{value}")) + } + } else if parts.len() == 1 { + let key = parts[0].trim(); + if key.is_empty() { + None + } else { + Some(key.to_string()) + } + } else { + None + } + }) + .collect(); + + if tags.is_empty() { + Ok(None) + } else { + Ok(Some(tags)) + } + } + } +} + +pub fn deserialize_option_lossless<'de, D, T>(deserializer: D) -> Result, D::Error> +where + D: Deserializer<'de>, + T: Deserialize<'de>, +{ + match Option::::deserialize(deserializer) { + Ok(value) => Ok(value), + Err(e) => { + error!("Failed to deserialize optional value: {}, ignoring", e); + Ok(None) + } + } +} + +pub fn deserialize_optional_duration_from_microseconds<'de, D: Deserializer<'de>>( + deserializer: D, +) -> Result, D::Error> { + Ok(Option::::deserialize(deserializer)?.map(Duration::from_micros)) +} + +pub fn deserialize_optional_duration_from_seconds<'de, D: Deserializer<'de>>( + deserializer: D, +) -> Result, D::Error> { + struct DurationVisitor; + impl serde::de::Visitor<'_> for DurationVisitor { + type Value = Option; + fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "a duration in seconds (integer or float)") + } + fn visit_u64(self, v: u64) -> Result { + Ok(Some(Duration::from_secs(v))) + } + fn visit_i64(self, v: i64) -> Result { + if v < 0 { + error!("Failed to parse duration: negative durations are not allowed, ignoring"); + return Ok(None); + } + self.visit_u64(u64::try_from(v).expect("positive i64 to u64 conversion never fails")) + } + fn visit_f64(self, v: f64) -> Result { + if v < 0f64 { + error!("Failed to parse duration: negative durations are not allowed, ignoring"); + return Ok(None); + } + Ok(Some(Duration::from_secs_f64(v))) + } + } + deserializer.deserialize_any(DurationVisitor) +} + +// Like deserialize_optional_duration_from_seconds(), but return None if the value is 0 +pub fn deserialize_optional_duration_from_seconds_ignore_zero<'de, D: Deserializer<'de>>( + deserializer: D, +) -> Result, D::Error> { + let duration: Option = deserialize_optional_duration_from_seconds(deserializer)?; + if duration.is_some_and(|d| d.as_secs() == 0) { + return Ok(None); + } + Ok(duration) +} + +#[cfg_attr(coverage_nightly, coverage(off))] // Test modules skew coverage metrics +#[cfg(test)] +pub mod tests { + use libdd_trace_obfuscation::replacer::parse_rules_from_string; + + use super::*; + + use crate::{ + flush_strategy::{FlushStrategy, PeriodicStrategy}, + log_level::LogLevel, + processing_rule::ProcessingRule, + trace_propagation_style::TracePropagationStyle, + }; + + #[test] + fn test_default_logs_intake_url() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + + let config = get_config(Path::new("")); + assert_eq!( + config.logs_config_logs_dd_url, + "https://http-intake.logs.datadoghq.com".to_string() + ); + Ok(()) + }); + } + + #[test] + fn test_support_pci_logs_intake_url() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.set_env( + "DD_LOGS_CONFIG_LOGS_DD_URL", + "agent-http-intake-pci.logs.datadoghq.com:443", + ); + + let config = get_config(Path::new("")); + assert_eq!( + config.logs_config_logs_dd_url, + "https://agent-http-intake-pci.logs.datadoghq.com:443".to_string() + ); + Ok(()) + }); + } + + #[test] + fn test_logs_intake_url_adds_prefix() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.set_env( + "DD_LOGS_CONFIG_LOGS_DD_URL", + "dr-test-failover-http-intake.logs.datadoghq.com:443", + ); + + let config = get_config(Path::new("")); + // ensure host:port URL is prefixed with https:// + assert_eq!( + config.logs_config_logs_dd_url, + "https://dr-test-failover-http-intake.logs.datadoghq.com:443".to_string() + ); + Ok(()) + }); + } + + #[test] + fn test_prefixed_logs_intake_url() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.set_env( + "DD_LOGS_CONFIG_LOGS_DD_URL", + "https://custom-intake.logs.datadoghq.com:443", + ); + + let config = get_config(Path::new("")); + assert_eq!( + config.logs_config_logs_dd_url, + "https://custom-intake.logs.datadoghq.com:443".to_string() + ); + Ok(()) + }); + } + + #[test] + fn test_support_pci_traces_intake_url() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.set_env("DD_APM_DD_URL", "https://trace-pci.agent.datadoghq.com"); + + let config = get_config(Path::new("")); + assert_eq!( + config.apm_dd_url, + "https://trace-pci.agent.datadoghq.com/api/v0.2/traces".to_string() + ); + Ok(()) + }); + } + + #[test] + fn test_support_dd_dd_url() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.set_env("DD_DD_URL", "custom_proxy:3128"); + + let config = get_config(Path::new("")); + assert_eq!(config.dd_url, "custom_proxy:3128".to_string()); + Ok(()) + }); + } + + #[test] + fn test_support_dd_url() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.set_env("DD_URL", "custom_proxy:3128"); + + let config = get_config(Path::new("")); + assert_eq!(config.url, "custom_proxy:3128".to_string()); + Ok(()) + }); + } + + #[test] + fn test_dd_dd_url_default() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + + let config = get_config(Path::new("")); + assert_eq!(config.dd_url, String::new()); + Ok(()) + }); + } + + #[test] + fn test_precedence() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.create_file( + "datadog.yaml", + r" + site: datadoghq.eu, + ", + )?; + jail.set_env("DD_SITE", "datad0g.com"); + let config = get_config(Path::new("")); + assert_eq!(config.site, "datad0g.com"); + Ok(()) + }); + } + + #[test] + fn test_parse_config_file() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + // nit: does parsing an empty file actually test "parse config file"? + jail.create_file( + "datadog.yaml", + r" + ", + )?; + let config = get_config(Path::new("")); + assert_eq!(config.site, "datadoghq.com"); + Ok(()) + }); + } + + #[test] + fn test_parse_env() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.set_env("DD_SITE", "datadoghq.eu"); + let config = get_config(Path::new("")); + assert_eq!(config.site, "datadoghq.eu"); + Ok(()) + }); + } + + #[test] + fn test_parse_log_level() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.set_env("DD_LOG_LEVEL", "TRACE"); + let config = get_config(Path::new("")); + assert_eq!(config.log_level, LogLevel::Trace); + Ok(()) + }); + } + + #[test] + fn test_parse_default() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + let config = get_config(Path::new("")); + assert_eq!( + config, + Config { + site: "datadoghq.com".to_string(), + trace_propagation_style_extract: vec![ + TracePropagationStyle::Datadog, + TracePropagationStyle::TraceContext + ], + logs_config_logs_dd_url: "https://http-intake.logs.datadoghq.com".to_string(), + apm_dd_url: trace_intake_url("datadoghq.com").clone(), + dd_url: String::new(), // We add the prefix in main.rs + ..Config::default() + } + ); + Ok(()) + }); + } + + #[test] + fn test_proxy_config() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.set_env("DD_PROXY_HTTPS", "my-proxy:3128"); + let config = get_config(Path::new("")); + assert_eq!(config.proxy_https, Some("my-proxy:3128".to_string())); + Ok(()) + }); + } + + #[test] + fn test_noproxy_config() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.set_env("DD_SITE", "datadoghq.eu"); + jail.set_env("DD_PROXY_HTTPS", "my-proxy:3128"); + jail.set_env( + "NO_PROXY", + "127.0.0.1,localhost,172.16.0.0/12,us-east-1.amazonaws.com,datadoghq.eu", + ); + let config = get_config(Path::new("")); + assert_eq!(config.proxy_https, None); + Ok(()) + }); + } + + #[test] + fn test_proxy_yaml() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.create_file( + "datadog.yaml", + r" + proxy: + https: my-proxy:3128 + ", + )?; + + let config = get_config(Path::new("")); + assert_eq!(config.proxy_https, Some("my-proxy:3128".to_string())); + Ok(()) + }); + } + + #[test] + fn test_no_proxy_yaml() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.create_file( + "datadog.yaml", + r" + proxy: + https: my-proxy:3128 + no_proxy: + - datadoghq.com + ", + )?; + + let config = get_config(Path::new("")); + assert_eq!(config.proxy_https, None); + // Assertion to ensure config.site runs before proxy + // because we chenck that noproxy contains the site + assert_eq!(config.site, "datadoghq.com"); + Ok(()) + }); + } + + #[test] + fn test_parse_flush_strategy_end() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.set_env("DD_SERVERLESS_FLUSH_STRATEGY", "end"); + let config = get_config(Path::new("")); + assert_eq!(config.serverless_flush_strategy, FlushStrategy::End); + Ok(()) + }); + } + + #[test] + fn test_parse_flush_strategy_periodically() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.set_env("DD_SERVERLESS_FLUSH_STRATEGY", "periodically,100000"); + let config = get_config(Path::new("")); + assert_eq!( + config.serverless_flush_strategy, + FlushStrategy::Periodically(PeriodicStrategy { interval: 100_000 }) + ); + Ok(()) + }); + } + + #[test] + fn test_parse_flush_strategy_invalid() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.set_env("DD_SERVERLESS_FLUSH_STRATEGY", "invalid_strategy"); + let config = get_config(Path::new("")); + assert_eq!(config.serverless_flush_strategy, FlushStrategy::Default); + Ok(()) + }); + } + + #[test] + fn test_parse_flush_strategy_invalid_periodic() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.set_env( + "DD_SERVERLESS_FLUSH_STRATEGY", + "periodically,invalid_interval", + ); + let config = get_config(Path::new("")); + assert_eq!(config.serverless_flush_strategy, FlushStrategy::Default); + Ok(()) + }); + } + + #[test] + fn parse_number_or_string_env_vars() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.set_env("DD_VERSION", "123"); + jail.set_env("DD_ENV", "123456890"); + jail.set_env("DD_SERVICE", "123456"); + let config = get_config(Path::new("")); + assert_eq!(config.version.expect("failed to parse DD_VERSION"), "123"); + assert_eq!(config.env.expect("failed to parse DD_ENV"), "123456890"); + assert_eq!( + config.service.expect("failed to parse DD_SERVICE"), + "123456" + ); + Ok(()) + }); + } + + #[test] + fn test_parse_logs_config_processing_rules_from_env() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.set_env( + "DD_LOGS_CONFIG_PROCESSING_RULES", + r#"[{"type":"exclude_at_match","name":"exclude","pattern":"exclude"}]"#, + ); + jail.create_file( + "datadog.yaml", + r" + logs_config: + processing_rules: + - type: exclude_at_match + name: exclude-me-yaml + pattern: exclude-me-yaml + ", + )?; + let config = get_config(Path::new("")); + assert_eq!( + config.logs_config_processing_rules, + Some(vec![ProcessingRule { + kind: processing_rule::Kind::ExcludeAtMatch, + name: "exclude".to_string(), + pattern: "exclude".to_string(), + replace_placeholder: None + }]) + ); + Ok(()) + }); + } + + #[test] + fn test_parse_logs_config_processing_rules_from_yaml() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.create_file( + "datadog.yaml", + r" + site: datadoghq.com + logs_config: + processing_rules: + - type: exclude_at_match + name: exclude + pattern: exclude + ", + )?; + let config = get_config(Path::new("")); + assert_eq!( + config.logs_config_processing_rules, + Some(vec![ProcessingRule { + kind: processing_rule::Kind::ExcludeAtMatch, + name: "exclude".to_string(), + pattern: "exclude".to_string(), + replace_placeholder: None + }]), + ); + Ok(()) + }); + } + + #[test] + fn test_parse_apm_replace_tags_from_yaml() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.create_file( + "datadog.yaml", + r" + site: datadoghq.com + apm_config: + replace_tags: + - name: '*' + pattern: 'foo' + repl: 'REDACTED' + ", + )?; + let config = get_config(Path::new("")); + let rule = parse_rules_from_string( + r#"[ + {"name": "*", "pattern": "foo", "repl": "REDACTED"} + ]"#, + ) + .expect("can't parse rules"); + assert_eq!(config.apm_replace_tags, Some(rule),); + Ok(()) + }); + } + + #[test] + fn test_apm_tags_env_overrides_yaml() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.set_env( + "DD_APM_REPLACE_TAGS", + r#"[{"name":"*","pattern":"foo","repl":"REDACTED-ENV"}]"#, + ); + jail.create_file( + "datadog.yaml", + r" + site: datadoghq.com + apm_config: + replace_tags: + - name: '*' + pattern: 'foo' + repl: 'REDACTED-YAML' + ", + )?; + let config = get_config(Path::new("")); + let rule = parse_rules_from_string( + r#"[ + {"name": "*", "pattern": "foo", "repl": "REDACTED-ENV"} + ]"#, + ) + .expect("can't parse rules"); + assert_eq!(config.apm_replace_tags, Some(rule),); + Ok(()) + }); + } + + #[test] + fn test_parse_apm_http_obfuscation_from_yaml() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.create_file( + "datadog.yaml", + r" + site: datadoghq.com + apm_config: + obfuscation: + http: + remove_query_string: true + remove_paths_with_digits: true + ", + )?; + let config = get_config(Path::new("")); + assert!(config.apm_config_obfuscation_http_remove_query_string,); + assert!(config.apm_config_obfuscation_http_remove_paths_with_digits,); + Ok(()) + }); + } + #[test] + fn test_parse_trace_propagation_style() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.set_env( + "DD_TRACE_PROPAGATION_STYLE", + "datadog,tracecontext,b3,b3multi", + ); + let config = get_config(Path::new("")); + + let expected_styles = vec![ + TracePropagationStyle::Datadog, + TracePropagationStyle::TraceContext, + TracePropagationStyle::B3, + TracePropagationStyle::B3Multi, + ]; + assert_eq!(config.trace_propagation_style, expected_styles); + assert_eq!(config.trace_propagation_style_extract, expected_styles); + Ok(()) + }); + } + + #[test] + fn test_parse_trace_propagation_style_extract() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.set_env("DD_TRACE_PROPAGATION_STYLE_EXTRACT", "datadog"); + let config = get_config(Path::new("")); + + assert_eq!( + config.trace_propagation_style, + vec![ + TracePropagationStyle::Datadog, + TracePropagationStyle::TraceContext, + ] + ); + assert_eq!( + config.trace_propagation_style_extract, + vec![TracePropagationStyle::Datadog] + ); + Ok(()) + }); + } + + #[test] + fn test_bad_tags() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.set_env("DD_TAGS", 123); + let config = get_config(Path::new("")); + assert_eq!(config.tags, HashMap::new()); + Ok(()) + }); + } + + #[test] + fn test_tags_comma_separated() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.set_env("DD_TAGS", "team:serverless,env:prod,version:1.0"); + let config = get_config(Path::new("")); + assert_eq!(config.tags.get("team"), Some(&"serverless".to_string())); + assert_eq!(config.tags.get("env"), Some(&"prod".to_string())); + assert_eq!(config.tags.get("version"), Some(&"1.0".to_string())); + assert_eq!(config.tags.len(), 3); + Ok(()) + }); + } + + #[test] + fn test_tags_space_separated() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.set_env("DD_TAGS", "team:serverless env:prod version:1.0"); + let config = get_config(Path::new("")); + assert_eq!(config.tags.get("team"), Some(&"serverless".to_string())); + assert_eq!(config.tags.get("env"), Some(&"prod".to_string())); + assert_eq!(config.tags.get("version"), Some(&"1.0".to_string())); + assert_eq!(config.tags.len(), 3); + Ok(()) + }); + } + + #[test] + fn test_tags_space_separated_with_extra_spaces() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.set_env("DD_TAGS", "team:serverless env:prod version:1.0"); + let config = get_config(Path::new("")); + assert_eq!(config.tags.get("team"), Some(&"serverless".to_string())); + assert_eq!(config.tags.get("env"), Some(&"prod".to_string())); + assert_eq!(config.tags.get("version"), Some(&"1.0".to_string())); + assert_eq!(config.tags.len(), 3); + Ok(()) + }); + } + + #[test] + fn test_tags_mixed_separators() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.set_env("DD_TAGS", "team:serverless,env:prod version:1.0"); + let config = get_config(Path::new("")); + assert_eq!(config.tags.get("team"), Some(&"serverless".to_string())); + assert_eq!(config.tags.get("env"), Some(&"prod".to_string())); + assert_eq!(config.tags.get("version"), Some(&"1.0".to_string())); + assert_eq!(config.tags.len(), 3); + Ok(()) + }); + } + + #[test] + fn test_parse_bool_from_anything() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.set_env("DD_SERVERLESS_LOGS_ENABLED", "true"); + jail.set_env("DD_ENHANCED_METRICS", "1"); + jail.set_env("DD_LOGS_CONFIG_USE_COMPRESSION", "TRUE"); + jail.set_env("DD_CAPTURE_LAMBDA_PAYLOAD", "0"); + let config = get_config(Path::new("")); + assert!(config.serverless_logs_enabled); + assert!(config.enhanced_metrics); + assert!(config.logs_config_use_compression); + assert!(!config.capture_lambda_payload); + Ok(()) + }); + } + + #[test] + fn test_overrides_config_based_on_priority() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.create_file( + "datadog.yaml", + r#" + site: us3.datadoghq.com + api_key: "yaml-api-key" + log_level: "debug" + "#, + )?; + jail.set_env("DD_SITE", "us5.datadoghq.com"); + jail.set_env("DD_API_KEY", "env-api-key"); + jail.set_env("DD_FLUSH_TIMEOUT", "10"); + let config = get_config(Path::new("")); + + assert_eq!(config.site, "us5.datadoghq.com"); + assert_eq!(config.api_key, "env-api-key"); + assert_eq!(config.log_level, LogLevel::Debug); + assert_eq!(config.flush_timeout, 10); + Ok(()) + }); + } + + #[test] + fn test_parse_duration_from_microseconds() { + #[derive(Deserialize, Debug, PartialEq, Eq)] + struct Value { + #[serde(default)] + #[serde(deserialize_with = "deserialize_optional_duration_from_microseconds")] + duration: Option, + } + + assert_eq!( + serde_json::from_str::("{}").expect("failed to parse JSON"), + Value { duration: None } + ); + serde_json::from_str::(r#"{"duration":-1}"#) + .expect_err("should have failed parsing"); + assert_eq!( + serde_json::from_str::(r#"{"duration":1000000}"#).expect("failed to parse JSON"), + Value { + duration: Some(Duration::from_secs(1)) + } + ); + serde_json::from_str::(r#"{"duration":-1.5}"#) + .expect_err("should have failed parsing"); + serde_json::from_str::(r#"{"duration":1.5}"#) + .expect_err("should have failed parsing"); + } + + #[test] + fn test_parse_duration_from_seconds() { + #[derive(Deserialize, Debug, PartialEq, Eq)] + struct Value { + #[serde(default)] + #[serde(deserialize_with = "deserialize_optional_duration_from_seconds")] + duration: Option, + } + + assert_eq!( + serde_json::from_str::("{}").expect("failed to parse JSON"), + Value { duration: None } + ); + assert_eq!( + serde_json::from_str::(r#"{"duration":-1}"#).expect("failed to parse JSON"), + Value { duration: None } + ); + assert_eq!( + serde_json::from_str::(r#"{"duration":1}"#).expect("failed to parse JSON"), + Value { + duration: Some(Duration::from_secs(1)) + } + ); + assert_eq!( + serde_json::from_str::(r#"{"duration":-1.5}"#).expect("failed to parse JSON"), + Value { duration: None } + ); + assert_eq!( + serde_json::from_str::(r#"{"duration":1.5}"#).expect("failed to parse JSON"), + Value { + duration: Some(Duration::from_millis(1500)) + } + ); + } + + #[test] + fn test_parse_duration_from_seconds_ignore_zero() { + #[derive(Deserialize, Debug, PartialEq, Eq)] + struct Value { + #[serde(default)] + #[serde(deserialize_with = "deserialize_optional_duration_from_seconds_ignore_zero")] + duration: Option, + } + + assert_eq!( + serde_json::from_str::(r#"{"duration":1}"#).expect("failed to parse JSON"), + Value { + duration: Some(Duration::from_secs(1)) + } + ); + + assert_eq!( + serde_json::from_str::(r#"{"duration":0}"#).expect("failed to parse JSON"), + Value { duration: None } + ); + } + + #[test] + fn test_deserialize_key_value_pairs_ignores_empty_keys() { + #[derive(Deserialize, Debug, PartialEq)] + struct TestStruct { + #[serde(deserialize_with = "deserialize_key_value_pairs")] + tags: HashMap, + } + + let result = serde_json::from_str::(r#"{"tags": ":value,valid:tag"}"#) + .expect("failed to parse JSON"); + let mut expected = HashMap::new(); + expected.insert("valid".to_string(), "tag".to_string()); + assert_eq!(result.tags, expected); + } + + #[test] + fn test_deserialize_key_value_pairs_ignores_empty_values() { + #[derive(Deserialize, Debug, PartialEq)] + struct TestStruct { + #[serde(deserialize_with = "deserialize_key_value_pairs")] + tags: HashMap, + } + + let result = serde_json::from_str::(r#"{"tags": "key:,valid:tag"}"#) + .expect("failed to parse JSON"); + let mut expected = HashMap::new(); + expected.insert("valid".to_string(), "tag".to_string()); + assert_eq!(result.tags, expected); + } + + #[test] + fn test_deserialize_key_value_pairs_with_url_values() { + #[derive(Deserialize, Debug, PartialEq)] + struct TestStruct { + #[serde(deserialize_with = "deserialize_key_value_pairs")] + tags: HashMap, + } + + let result = serde_json::from_str::( + r#"{"tags": "git.repository_url:https://gitlab.ddbuild.io/DataDog/serverless-e2e-tests.git,env:prod"}"# + ) + .expect("failed to parse JSON"); + let mut expected = HashMap::new(); + expected.insert( + "git.repository_url".to_string(), + "https://gitlab.ddbuild.io/DataDog/serverless-e2e-tests.git".to_string(), + ); + expected.insert("env".to_string(), "prod".to_string()); + assert_eq!(result.tags, expected); + } + + #[test] + fn test_deserialize_key_value_pair_array_with_urls() { + #[derive(Deserialize, Debug, PartialEq)] + struct TestStruct { + #[serde(deserialize_with = "deserialize_key_value_pair_array_to_hashmap")] + tags: HashMap, + } + + let result = serde_json::from_str::( + r#"{"tags": ["git.repository_url:https://gitlab.ddbuild.io/DataDog/serverless-e2e-tests.git", "env:prod", "version:1.2.3"]}"# + ) + .expect("failed to parse JSON"); + let mut expected = HashMap::new(); + expected.insert( + "git.repository_url".to_string(), + "https://gitlab.ddbuild.io/DataDog/serverless-e2e-tests.git".to_string(), + ); + expected.insert("env".to_string(), "prod".to_string()); + expected.insert("version".to_string(), "1.2.3".to_string()); + assert_eq!(result.tags, expected); + } + + #[test] + fn test_deserialize_key_value_pair_array_ignores_invalid() { + #[derive(Deserialize, Debug, PartialEq)] + struct TestStruct { + #[serde(deserialize_with = "deserialize_key_value_pair_array_to_hashmap")] + tags: HashMap, + } + + let result = serde_json::from_str::( + r#"{"tags": ["valid:tag", "invalid_no_colon", "another:good:value:with:colons"]}"#, + ) + .expect("failed to parse JSON"); + let mut expected = HashMap::new(); + expected.insert("valid".to_string(), "tag".to_string()); + expected.insert("another".to_string(), "good:value:with:colons".to_string()); + assert_eq!(result.tags, expected); + } + + #[test] + fn test_deserialize_key_value_pair_array_empty() { + #[derive(Deserialize, Debug, PartialEq)] + struct TestStruct { + #[serde(deserialize_with = "deserialize_key_value_pair_array_to_hashmap")] + tags: HashMap, + } + + let result = + serde_json::from_str::(r#"{"tags": []}"#).expect("failed to parse JSON"); + assert_eq!(result.tags, HashMap::new()); + } +} diff --git a/crates/datadog-agent-config/processing_rule.rs b/crates/datadog-agent-config/processing_rule.rs new file mode 100644 index 00000000..cae8a5ad --- /dev/null +++ b/crates/datadog-agent-config/processing_rule.rs @@ -0,0 +1,56 @@ +use serde::{Deserialize, Deserializer}; +use serde_json::Value as JsonValue; + +#[derive(Clone, Copy, Debug, PartialEq, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum Kind { + ExcludeAtMatch, + IncludeAtMatch, + MaskSequences, +} + +#[derive(Clone, Debug, PartialEq, Deserialize)] +pub struct ProcessingRule { + #[serde(rename = "type")] + pub kind: Kind, + pub name: String, + pub pattern: String, + pub replace_placeholder: Option, +} + +pub fn deserialize_processing_rules<'de, D>( + deserializer: D, +) -> Result>, D::Error> +where + D: Deserializer<'de>, +{ + // Deserialize the JSON value using serde_json::Value + let value: JsonValue = Deserialize::deserialize(deserializer)?; + + match value { + JsonValue::String(s) => match serde_json::from_str(&s) { + Ok(values) => Ok(Some(values)), + Err(e) => { + tracing::error!("Failed to parse processing rules: {}, ignoring", e); + Ok(None) + } + }, + JsonValue::Array(a) => { + let mut values = Vec::new(); + for v in a { + match serde_json::from_value(v.clone()) { + Ok(rule) => values.push(rule), + Err(e) => { + tracing::error!("Failed to parse processing rule: {}, ignoring", e); + } + } + } + if values.is_empty() { + Ok(None) + } else { + Ok(Some(values)) + } + } + _ => Ok(None), + } +} diff --git a/crates/datadog-agent-config/service_mapping.rs b/crates/datadog-agent-config/service_mapping.rs new file mode 100644 index 00000000..5b133989 --- /dev/null +++ b/crates/datadog-agent-config/service_mapping.rs @@ -0,0 +1,32 @@ +use std::collections::HashMap; + +use serde::{Deserialize, Deserializer}; + +#[allow(clippy::module_name_repetitions)] +pub fn deserialize_service_mapping<'de, D>( + deserializer: D, +) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + let s: String = String::deserialize(deserializer)?; + + let map = s + .split(',') + .filter_map(|pair| { + let mut split = pair.split(':'); + + let service = split.next(); + let to_map = split.next(); + + if let (Some(service), Some(to_map)) = (service, to_map) { + Some((service.trim().to_string(), to_map.trim().to_string())) + } else { + tracing::error!("Failed to parse service mapping '{}', expected format 'service:mapped_service', ignoring", pair.trim()); + None + } + }) + .collect(); + + Ok(map) +} diff --git a/crates/datadog-agent-config/trace_propagation_style.rs b/crates/datadog-agent-config/trace_propagation_style.rs new file mode 100644 index 00000000..65971bfa --- /dev/null +++ b/crates/datadog-agent-config/trace_propagation_style.rs @@ -0,0 +1,66 @@ +use std::{fmt::Display, str::FromStr}; + +use serde::{Deserialize, Deserializer}; +use tracing::error; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum TracePropagationStyle { + Datadog, + B3Multi, + B3, + TraceContext, + None, +} + +impl FromStr for TracePropagationStyle { + type Err = String; + + fn from_str(s: &str) -> Result { + match s.to_lowercase().as_str() { + "datadog" => Ok(TracePropagationStyle::Datadog), + "b3multi" => Ok(TracePropagationStyle::B3Multi), + "b3" => Ok(TracePropagationStyle::B3), + "tracecontext" => Ok(TracePropagationStyle::TraceContext), + "none" => Ok(TracePropagationStyle::None), + _ => { + error!("Trace propagation style is invalid: {:?}, using None", s); + Ok(TracePropagationStyle::None) + } + } + } +} + +impl Display for TracePropagationStyle { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let style = match self { + TracePropagationStyle::Datadog => "datadog", + TracePropagationStyle::B3Multi => "b3multi", + TracePropagationStyle::B3 => "b3", + TracePropagationStyle::TraceContext => "tracecontext", + TracePropagationStyle::None => "none", + }; + write!(f, "{style}") + } +} + +#[allow(clippy::module_name_repetitions)] +pub fn deserialize_trace_propagation_style<'de, D>( + deserializer: D, +) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + let s: String = String::deserialize(deserializer)?; + + Ok(s.split(',') + .filter_map( + |style| match TracePropagationStyle::from_str(style.trim()) { + Ok(parsed_style) => Some(parsed_style), + Err(e) => { + tracing::error!("Failed to parse trace propagation style: {}, ignoring", e); + None + } + }, + ) + .collect()) +} diff --git a/crates/datadog-agent-config/yaml.rs b/crates/datadog-agent-config/yaml.rs new file mode 100644 index 00000000..0a7a52cd --- /dev/null +++ b/crates/datadog-agent-config/yaml.rs @@ -0,0 +1,1092 @@ +use std::time::Duration; +use std::{collections::HashMap, path::PathBuf}; + +use crate::{ + Config, ConfigError, ConfigSource, ProcessingRule, + additional_endpoints::deserialize_additional_endpoints, + deserialize_apm_replace_rules, deserialize_key_value_pair_array_to_hashmap, + deserialize_option_lossless, deserialize_optional_bool_from_anything, + deserialize_optional_duration_from_microseconds, deserialize_optional_duration_from_seconds, + deserialize_optional_duration_from_seconds_ignore_zero, deserialize_optional_string, + deserialize_processing_rules, deserialize_string_or_int, + flush_strategy::FlushStrategy, + log_level::LogLevel, + logs_additional_endpoints::LogsAdditionalEndpoint, + merge_hashmap, merge_option, merge_option_to_value, merge_string, merge_vec, + service_mapping::deserialize_service_mapping, + trace_propagation_style::{TracePropagationStyle, deserialize_trace_propagation_style}, +}; +use figment::{ + Figment, + providers::{Format, Yaml}, +}; +use libdd_trace_obfuscation::replacer::ReplaceRule; +use serde::Deserialize; + +/// `YamlConfig` is a struct that represents some of the fields in the `datadog.yaml` file. +/// +/// It is used to deserialize the `datadog.yaml` file into a struct that can be merged +/// with the `Config` struct. +#[derive(Debug, PartialEq, Deserialize, Clone, Default)] +#[serde(default)] +#[allow(clippy::module_name_repetitions)] +pub struct YamlConfig { + #[serde(deserialize_with = "deserialize_optional_string")] + pub site: Option, + #[serde(deserialize_with = "deserialize_optional_string")] + pub api_key: Option, + pub log_level: Option, + + #[serde(deserialize_with = "deserialize_option_lossless")] + pub flush_timeout: Option, + + #[serde(deserialize_with = "deserialize_option_lossless")] + pub compression_level: Option, + + // Proxy + pub proxy: ProxyConfig, + // nit: this should probably be in the endpoints section + #[serde(deserialize_with = "deserialize_optional_string")] + pub dd_url: Option, + #[serde(deserialize_with = "deserialize_optional_string")] + pub http_protocol: Option, + #[serde(deserialize_with = "deserialize_optional_string")] + pub tls_cert_file: Option, + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub skip_ssl_validation: Option, + + // Endpoints + #[serde(deserialize_with = "deserialize_additional_endpoints")] + /// Field used for Dual Shipping for Metrics + pub additional_endpoints: HashMap>, + + // Unified Service Tagging + #[serde(deserialize_with = "deserialize_string_or_int")] + pub env: Option, + #[serde(deserialize_with = "deserialize_string_or_int")] + pub service: Option, + #[serde(deserialize_with = "deserialize_string_or_int")] + pub version: Option, + #[serde(deserialize_with = "deserialize_key_value_pair_array_to_hashmap")] + pub tags: HashMap, + + // Logs + pub logs_config: LogsConfig, + + // APM + pub apm_config: ApmConfig, + #[serde(deserialize_with = "deserialize_service_mapping")] + pub service_mapping: HashMap, + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub trace_aws_service_representation_enabled: Option, + // Trace Propagation + #[serde(deserialize_with = "deserialize_trace_propagation_style")] + pub trace_propagation_style: Vec, + #[serde(deserialize_with = "deserialize_trace_propagation_style")] + pub trace_propagation_style_extract: Vec, + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub trace_propagation_extract_first: Option, + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub trace_propagation_http_baggage_enabled: Option, + + // Metrics + pub metrics_config: MetricsConfig, + + // DogStatsD + /// Size of the receive buffer for `DogStatsD` UDP packets, in bytes (`SO_RCVBUF`). + #[serde(deserialize_with = "deserialize_option_lossless")] + pub dogstatsd_so_rcvbuf: Option, + /// Maximum size of a single read from any transport (UDP or named pipe), in bytes. + #[serde(deserialize_with = "deserialize_option_lossless")] + pub dogstatsd_buffer_size: Option, + /// Internal queue capacity between the socket reader and metric processor. + #[serde(deserialize_with = "deserialize_option_lossless")] + pub dogstatsd_queue_size: Option, + + // OTLP + pub otlp_config: Option, + + // AWS Lambda + #[serde(deserialize_with = "deserialize_optional_string")] + pub api_key_secret_arn: Option, + #[serde(deserialize_with = "deserialize_optional_string")] + pub kms_api_key: Option, + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub serverless_logs_enabled: Option, + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub logs_enabled: Option, + pub serverless_flush_strategy: Option, + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub enhanced_metrics: Option, + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub lambda_proc_enhanced_metrics: Option, + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub capture_lambda_payload: Option, + #[serde(deserialize_with = "deserialize_option_lossless")] + pub capture_lambda_payload_max_depth: Option, + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub compute_trace_stats_on_extension: Option, + #[serde(deserialize_with = "deserialize_optional_duration_from_seconds_ignore_zero")] + pub api_key_secret_reload_interval: Option, + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub serverless_appsec_enabled: Option, + #[serde(deserialize_with = "deserialize_optional_string")] + pub appsec_rules: Option, + #[serde(deserialize_with = "deserialize_optional_duration_from_microseconds")] + pub appsec_waf_timeout: Option, + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub api_security_enabled: Option, + #[serde(deserialize_with = "deserialize_optional_duration_from_seconds")] + pub api_security_sample_delay: Option, +} + +/// Proxy Config +/// + +#[derive(Debug, PartialEq, Deserialize, Clone, Default)] +#[serde(default)] +#[allow(clippy::module_name_repetitions)] +pub struct ProxyConfig { + pub https: Option, + pub no_proxy: Option>, +} + +/// Logs Config +/// + +#[derive(Debug, PartialEq, Deserialize, Clone, Default)] +#[serde(default)] +#[allow(clippy::module_name_repetitions)] +pub struct LogsConfig { + pub logs_dd_url: Option, + #[serde(deserialize_with = "deserialize_processing_rules")] + pub processing_rules: Option>, + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub use_compression: Option, + #[serde(deserialize_with = "deserialize_option_lossless")] + pub compression_level: Option, + pub additional_endpoints: Vec, +} + +/// Metrics specific config +/// +#[derive(Debug, PartialEq, Deserialize, Clone, Copy, Default)] +#[serde(default)] +#[allow(clippy::module_name_repetitions)] +pub struct MetricsConfig { + #[serde(deserialize_with = "deserialize_option_lossless")] + pub compression_level: Option, +} + +/// APM Config +/// + +#[derive(Debug, PartialEq, Deserialize, Clone, Default)] +#[serde(default)] +#[allow(clippy::module_name_repetitions)] +pub struct ApmConfig { + pub apm_dd_url: Option, + #[serde(deserialize_with = "deserialize_apm_replace_rules")] + pub replace_tags: Option>, + pub obfuscation: Option, + #[serde(deserialize_with = "deserialize_option_lossless")] + pub compression_level: Option, + pub features: Vec, + #[serde(deserialize_with = "deserialize_additional_endpoints")] + pub additional_endpoints: HashMap>, +} + +impl ApmConfig { + #[must_use] + pub fn obfuscation_http_remove_query_string(&self) -> Option { + self.obfuscation + .as_ref() + .and_then(|obfuscation| obfuscation.http.remove_query_string) + } + + #[must_use] + pub fn obfuscation_http_remove_paths_with_digits(&self) -> Option { + self.obfuscation + .as_ref() + .and_then(|obfuscation| obfuscation.http.remove_paths_with_digits) + } +} + +#[derive(Debug, PartialEq, Deserialize, Clone, Copy, Default)] +#[serde(default)] +#[allow(clippy::module_name_repetitions)] +pub struct ApmObfuscation { + pub http: ApmHttpObfuscation, +} + +#[derive(Debug, PartialEq, Deserialize, Clone, Copy, Default)] +#[serde(default)] +#[allow(clippy::module_name_repetitions)] +pub struct ApmHttpObfuscation { + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub remove_query_string: Option, + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub remove_paths_with_digits: Option, +} + +/// OTLP Config +/// + +#[derive(Debug, PartialEq, Deserialize, Clone, Default)] +#[serde(default)] +#[allow(clippy::module_name_repetitions)] +pub struct OtlpConfig { + pub receiver: Option, + pub traces: Option, + + // NOT SUPPORTED + pub metrics: Option, + pub logs: Option, +} + +#[derive(Debug, PartialEq, Deserialize, Clone, Default)] +#[serde(default)] +#[allow(clippy::module_name_repetitions)] +pub struct OtlpReceiverConfig { + pub protocols: Option, +} + +#[derive(Debug, PartialEq, Deserialize, Clone, Default)] +#[serde(default)] +#[allow(clippy::module_name_repetitions)] +pub struct OtlpReceiverProtocolsConfig { + pub http: Option, + + // NOT SUPPORTED + pub grpc: Option, +} + +#[derive(Debug, PartialEq, Deserialize, Clone, Default)] +#[serde(default)] +#[allow(clippy::module_name_repetitions)] +pub struct OtlpReceiverHttpConfig { + pub endpoint: Option, +} + +#[derive(Debug, PartialEq, Deserialize, Clone, Default)] +#[serde(default)] +#[allow(clippy::module_name_repetitions)] +pub struct OtlpReceiverGrpcConfig { + pub endpoint: Option, + pub transport: Option, + #[serde(deserialize_with = "deserialize_option_lossless")] + pub max_recv_msg_size_mib: Option, +} + +#[derive(Debug, PartialEq, Deserialize, Clone, Default)] +#[serde(default)] +#[allow(clippy::module_name_repetitions)] +pub struct OtlpTracesConfig { + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub enabled: Option, + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub span_name_as_resource_name: Option, + pub span_name_remappings: HashMap, + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub ignore_missing_datadog_fields: Option, + + // NOT SUPORTED + pub probabilistic_sampler: Option, +} + +#[derive(Debug, PartialEq, Clone, Deserialize, Default, Copy)] +pub struct OtlpTracesProbabilisticSampler { + #[serde(deserialize_with = "deserialize_option_lossless")] + pub sampling_percentage: Option, +} + +#[derive(Debug, PartialEq, Deserialize, Clone, Default)] +#[serde(default)] +pub struct OtlpMetricsConfig { + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub enabled: Option, + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub resource_attributes_as_tags: Option, + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub instrumentation_scope_metadata_as_tags: Option, + pub tag_cardinality: Option, + #[serde(deserialize_with = "deserialize_option_lossless")] + pub delta_ttl: Option, + pub histograms: Option, + pub sums: Option, + pub summaries: Option, +} + +#[derive(Debug, PartialEq, Clone, Deserialize, Default)] +#[serde(default)] +pub struct OtlpMetricsHistograms { + pub mode: Option, + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub send_count_sum_metrics: Option, + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub send_aggregation_metrics: Option, +} + +#[derive(Debug, PartialEq, Clone, Deserialize, Default)] +#[serde(default)] +pub struct OtlpMetricsSums { + pub cumulative_monotonic_mode: Option, + pub initial_cumulative_monotonic_value: Option, +} + +#[derive(Debug, PartialEq, Clone, Deserialize, Default)] +#[serde(default)] +pub struct OtlpMetricsSummaries { + pub mode: Option, +} + +#[derive(Debug, PartialEq, Clone, Deserialize, Default, Copy)] +#[serde(default)] +pub struct OtlpLogsConfig { + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub enabled: Option, +} + +impl OtlpConfig { + #[must_use] + pub fn receiver_protocols_http_endpoint(&self) -> Option { + self.receiver.as_ref().and_then(|receiver| { + receiver.protocols.as_ref().and_then(|protocols| { + protocols + .http + .as_ref() + .and_then(|http| http.endpoint.clone()) + }) + }) + } + + #[must_use] + pub fn receiver_protocols_grpc(&self) -> Option<&OtlpReceiverGrpcConfig> { + self.receiver.as_ref().and_then(|receiver| { + receiver + .protocols + .as_ref() + .and_then(|protocols| protocols.grpc.as_ref()) + }) + } + + #[must_use] + pub fn traces_enabled(&self) -> Option { + self.traces.as_ref().and_then(|traces| traces.enabled) + } + + #[must_use] + pub fn traces_ignore_missing_datadog_fields(&self) -> Option { + self.traces + .as_ref() + .and_then(|traces| traces.ignore_missing_datadog_fields) + } + + #[must_use] + pub fn traces_span_name_as_resource_name(&self) -> Option { + self.traces + .as_ref() + .and_then(|traces| traces.span_name_as_resource_name) + } + + #[must_use] + pub fn traces_span_name_remappings(&self) -> HashMap { + self.traces + .as_ref() + .map(|traces| traces.span_name_remappings.clone()) + .unwrap_or_default() + } + + #[must_use] + pub fn traces_probabilistic_sampler(&self) -> Option<&OtlpTracesProbabilisticSampler> { + self.traces + .as_ref() + .and_then(|traces| traces.probabilistic_sampler.as_ref()) + } + + #[must_use] + pub fn logs(&self) -> Option<&OtlpLogsConfig> { + self.logs.as_ref() + } +} + +#[allow(clippy::too_many_lines)] +fn merge_config(config: &mut Config, yaml_config: &YamlConfig) { + // Basic fields + merge_string!(config, yaml_config, site); + merge_string!(config, yaml_config, api_key); + merge_option_to_value!(config, yaml_config, log_level); + merge_option_to_value!(config, yaml_config, flush_timeout); + + // Unified Service Tagging + merge_option!(config, yaml_config, env); + merge_option!(config, yaml_config, service); + merge_option!(config, yaml_config, version); + merge_hashmap!(config, yaml_config, tags); + + merge_option_to_value!(config, yaml_config, compression_level); + // Proxy + merge_option!(config, proxy_https, yaml_config.proxy, https); + merge_option_to_value!(config, proxy_no_proxy, yaml_config.proxy, no_proxy); + merge_option!(config, yaml_config, http_protocol); + merge_option!(config, yaml_config, tls_cert_file); + merge_option_to_value!(config, yaml_config, skip_ssl_validation); + + // Endpoints + merge_hashmap!(config, yaml_config, additional_endpoints); + merge_string!(config, yaml_config, dd_url); + + // Logs + merge_string!( + config, + logs_config_logs_dd_url, + yaml_config.logs_config, + logs_dd_url + ); + merge_option!( + config, + logs_config_processing_rules, + yaml_config.logs_config, + processing_rules + ); + merge_option_to_value!( + config, + logs_config_use_compression, + yaml_config.logs_config, + use_compression + ); + merge_option_to_value!( + config, + logs_config_compression_level, + yaml_config, + compression_level + ); + merge_option_to_value!( + config, + logs_config_compression_level, + yaml_config.logs_config, + compression_level + ); + merge_vec!( + config, + logs_config_additional_endpoints, + yaml_config.logs_config, + additional_endpoints + ); + + merge_option_to_value!( + config, + metrics_config_compression_level, + yaml_config, + compression_level + ); + + merge_option_to_value!( + config, + metrics_config_compression_level, + yaml_config.metrics_config, + compression_level + ); + + // DogStatsD + merge_option!(config, yaml_config, dogstatsd_so_rcvbuf); + merge_option!(config, yaml_config, dogstatsd_buffer_size); + merge_option!(config, yaml_config, dogstatsd_queue_size); + + // APM + merge_hashmap!(config, yaml_config, service_mapping); + merge_string!(config, apm_dd_url, yaml_config.apm_config, apm_dd_url); + merge_option!( + config, + apm_replace_tags, + yaml_config.apm_config, + replace_tags + ); + merge_option_to_value!( + config, + apm_config_compression_level, + yaml_config, + compression_level + ); + merge_option_to_value!( + config, + apm_config_compression_level, + yaml_config.apm_config, + compression_level + ); + merge_hashmap!( + config, + apm_additional_endpoints, + yaml_config.apm_config, + additional_endpoints + ); + + // Not using the macro here because we need to call a method on the struct + if let Some(remove_query_string) = yaml_config + .apm_config + .obfuscation_http_remove_query_string() + { + config + .apm_config_obfuscation_http_remove_query_string + .clone_from(&remove_query_string); + } + if let Some(remove_paths_with_digits) = yaml_config + .apm_config + .obfuscation_http_remove_paths_with_digits() + { + config + .apm_config_obfuscation_http_remove_paths_with_digits + .clone_from(&remove_paths_with_digits); + } + + merge_vec!(config, apm_features, yaml_config.apm_config, features); + + // Trace Propagation + merge_vec!(config, yaml_config, trace_propagation_style); + merge_vec!(config, yaml_config, trace_propagation_style_extract); + merge_option_to_value!(config, yaml_config, trace_propagation_extract_first); + merge_option_to_value!(config, yaml_config, trace_propagation_http_baggage_enabled); + merge_option_to_value!( + config, + yaml_config, + trace_aws_service_representation_enabled + ); + + // OTLP + if let Some(otlp_config) = &yaml_config.otlp_config { + // Traces + + // Not using macros in some cases because we need to call a method on the struct + if let Some(traces_enabled) = otlp_config.traces_enabled() { + config + .otlp_config_traces_enabled + .clone_from(&traces_enabled); + } + if let Some(traces_span_name_as_resource_name) = + otlp_config.traces_span_name_as_resource_name() + { + config + .otlp_config_traces_span_name_as_resource_name + .clone_from(&traces_span_name_as_resource_name); + } + + let traces_span_name_remappings = otlp_config.traces_span_name_remappings(); + if !traces_span_name_remappings.is_empty() { + config + .otlp_config_traces_span_name_remappings + .clone_from(&traces_span_name_remappings); + } + if let Some(traces_ignore_missing_datadog_fields) = + otlp_config.traces_ignore_missing_datadog_fields() + { + config + .otlp_config_ignore_missing_datadog_fields + .clone_from(&traces_ignore_missing_datadog_fields); + } + + if let Some(probabilistic_sampler) = otlp_config.traces_probabilistic_sampler() { + merge_option!( + config, + otlp_config_traces_probabilistic_sampler_sampling_percentage, + probabilistic_sampler, + sampling_percentage + ); + } + + // Receiver + let receiver_protocols_http_endpoint = otlp_config.receiver_protocols_http_endpoint(); + if receiver_protocols_http_endpoint.is_some() { + config + .otlp_config_receiver_protocols_http_endpoint + .clone_from(&receiver_protocols_http_endpoint); + } + + if let Some(receiver_protocols_grpc) = otlp_config.receiver_protocols_grpc() { + merge_option!( + config, + otlp_config_receiver_protocols_grpc_endpoint, + receiver_protocols_grpc, + endpoint + ); + merge_option!( + config, + otlp_config_receiver_protocols_grpc_transport, + receiver_protocols_grpc, + transport + ); + merge_option!( + config, + otlp_config_receiver_protocols_grpc_max_recv_msg_size_mib, + receiver_protocols_grpc, + max_recv_msg_size_mib + ); + } + + // Metrics + if let Some(metrics) = &otlp_config.metrics { + merge_option_to_value!(config, otlp_config_metrics_enabled, metrics, enabled); + merge_option_to_value!( + config, + otlp_config_metrics_resource_attributes_as_tags, + metrics, + resource_attributes_as_tags + ); + merge_option_to_value!( + config, + otlp_config_metrics_instrumentation_scope_metadata_as_tags, + metrics, + instrumentation_scope_metadata_as_tags + ); + merge_option!( + config, + otlp_config_metrics_tag_cardinality, + metrics, + tag_cardinality + ); + merge_option!(config, otlp_config_metrics_delta_ttl, metrics, delta_ttl); + if let Some(histograms) = &metrics.histograms { + merge_option_to_value!( + config, + otlp_config_metrics_histograms_send_count_sum_metrics, + histograms, + send_count_sum_metrics + ); + merge_option_to_value!( + config, + otlp_config_metrics_histograms_send_aggregation_metrics, + histograms, + send_aggregation_metrics + ); + merge_option!( + config, + otlp_config_metrics_histograms_mode, + histograms, + mode + ); + } + if let Some(sums) = &metrics.sums { + merge_option!( + config, + otlp_config_metrics_sums_cumulative_monotonic_mode, + sums, + cumulative_monotonic_mode + ); + merge_option!( + config, + otlp_config_metrics_sums_initial_cumulativ_monotonic_value, + sums, + initial_cumulative_monotonic_value + ); + } + if let Some(summaries) = &metrics.summaries { + merge_option!(config, otlp_config_metrics_summaries_mode, summaries, mode); + } + } + + // Logs + if let Some(logs) = &otlp_config.logs { + merge_option_to_value!(config, otlp_config_logs_enabled, logs, enabled); + } + } + + // AWS Lambda + merge_string!(config, yaml_config, api_key_secret_arn); + merge_string!(config, yaml_config, kms_api_key); + + // Handle serverless_logs_enabled with OR logic: if either logs_enabled or serverless_logs_enabled is true, enable logs + if yaml_config.serverless_logs_enabled.is_some() || yaml_config.logs_enabled.is_some() { + config.serverless_logs_enabled = yaml_config.serverless_logs_enabled.unwrap_or(false) + || yaml_config.logs_enabled.unwrap_or(false); + } + + merge_option_to_value!(config, yaml_config, serverless_flush_strategy); + merge_option_to_value!(config, yaml_config, enhanced_metrics); + merge_option_to_value!(config, yaml_config, lambda_proc_enhanced_metrics); + merge_option_to_value!(config, yaml_config, capture_lambda_payload); + merge_option_to_value!(config, yaml_config, capture_lambda_payload_max_depth); + merge_option_to_value!(config, yaml_config, compute_trace_stats_on_extension); + merge_option!(config, yaml_config, api_key_secret_reload_interval); + merge_option_to_value!(config, yaml_config, serverless_appsec_enabled); + merge_option!(config, yaml_config, appsec_rules); + merge_option_to_value!(config, yaml_config, appsec_waf_timeout); + merge_option_to_value!(config, yaml_config, api_security_enabled); + merge_option_to_value!(config, yaml_config, api_security_sample_delay); +} + +#[derive(Debug, PartialEq, Clone)] +#[allow(clippy::module_name_repetitions)] +pub struct YamlConfigSource { + pub path: PathBuf, +} + +impl ConfigSource for YamlConfigSource { + fn load(&self, config: &mut Config) -> Result<(), ConfigError> { + let figment = Figment::new().merge(Yaml::file(self.path.clone())); + + match figment.extract::() { + Ok(yaml_config) => merge_config(config, &yaml_config), + Err(e) => { + return Err(ConfigError::ParseError(format!( + "Failed to parse config from yaml file: {e}, using default config." + ))); + } + } + + Ok(()) + } +} + +#[cfg_attr(coverage_nightly, coverage(off))] // Test modules skew coverage metrics +#[cfg(test)] +mod tests { + use std::path::Path; + use std::time::Duration; + + use crate::{flush_strategy::PeriodicStrategy, processing_rule::Kind}; + + use super::*; + + #[test] + #[allow(clippy::too_many_lines)] + fn test_merge_config_overrides_with_yaml_file() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.create_file( + "datadog.yaml", + r#" +# Basic fields +site: "test-site" +api_key: "test-api-key" +log_level: "debug" +flush_timeout: 42 +compression_level: 4 +# Proxy +proxy: + https: "https://proxy.example.com" + no_proxy: ["localhost", "127.0.0.1"] +dd_url: "https://metrics.datadoghq.com" +http_protocol: "http1" +tls_cert_file: "/opt/ca-cert.pem" +skip_ssl_validation: true + +# Endpoints +additional_endpoints: + "https://app.datadoghq.com": + - apikey2 + - apikey3 + "https://app.datadoghq.eu": + - apikey4 + +# Unified Service Tagging +env: "test-env" +service: "test-service" +version: "1.0.0" +tags: + - "team:test-team" + - "project:test-project" + +# Logs +logs_config: + logs_dd_url: "https://logs.datadoghq.com" + processing_rules: + - name: "test-exclude" + type: "exclude_at_match" + pattern: "test-pattern" + use_compression: false + compression_level: 1 + additional_endpoints: + - api_key: "apikey2" + Host: "agent-http-intake.logs.datadoghq.com" + Port: 443 + is_reliable: true + +# APM +apm_config: + apm_dd_url: "https://apm.datadoghq.com" + replace_tags: [] + obfuscation: + http: + remove_query_string: true + remove_paths_with_digits: true + compression_level: 2 + features: + - "enable_otlp_compute_top_level_by_span_kind" + - "enable_stats_by_span_kind" + additional_endpoints: + "https://trace.agent.datadoghq.com": + - apikey2 + - apikey3 + "https://trace.agent.datadoghq.eu": + - apikey4 + +service_mapping: old-service:new-service + +# Trace Propagation +trace_propagation_style: "datadog" +trace_propagation_style_extract: "b3" +trace_propagation_extract_first: true +trace_propagation_http_baggage_enabled: true +trace_aws_service_representation_enabled: true + +metrics_config: + compression_level: 3 + +dogstatsd_so_rcvbuf: 1048576 +dogstatsd_buffer_size: 65507 +dogstatsd_queue_size: 2048 + +# OTLP +otlp_config: + receiver: + protocols: + http: + endpoint: "http://localhost:4318" + grpc: + endpoint: "http://localhost:4317" + transport: "tcp" + max_recv_msg_size_mib: 4 + traces: + enabled: false + span_name_as_resource_name: true + span_name_remappings: + "old-span": "new-span" + ignore_missing_datadog_fields: true + probabilistic_sampler: + sampling_percentage: 50 + metrics: + enabled: true + resource_attributes_as_tags: true + instrumentation_scope_metadata_as_tags: true + tag_cardinality: "low" + delta_ttl: 3600 + histograms: + mode: "counters" + send_count_sum_metrics: true + send_aggregation_metrics: true + sums: + cumulative_monotonic_mode: "to_delta" + initial_cumulative_monotonic_value: "auto" + summaries: + mode: "quantiles" + logs: + enabled: true + +# AWS Lambda +api_key_secret_arn: "arn:aws:secretsmanager:region:account:secret:datadog-api-key" +kms_api_key: "test-kms-key" +serverless_logs_enabled: false +serverless_flush_strategy: "periodically,60000" +enhanced_metrics: false +lambda_proc_enhanced_metrics: false +capture_lambda_payload: true +capture_lambda_payload_max_depth: 5 +compute_trace_stats_on_extension: true +api_key_secret_reload_interval: 0 +serverless_appsec_enabled: true +appsec_rules: "/path/to/rules.json" +appsec_waf_timeout: 1000000 # Microseconds +api_security_enabled: false +api_security_sample_delay: 60 # Seconds +"#, + )?; + + let mut config = Config::default(); + let yaml_config_source = YamlConfigSource { + path: Path::new("datadog.yaml").to_path_buf(), + }; + yaml_config_source + .load(&mut config) + .expect("Failed to load config"); + + let expected_config = Config { + site: "test-site".to_string(), + api_key: "test-api-key".to_string(), + log_level: LogLevel::Debug, + flush_timeout: 42, + compression_level: 4, + proxy_https: Some("https://proxy.example.com".to_string()), + proxy_no_proxy: vec!["localhost".to_string(), "127.0.0.1".to_string()], + http_protocol: Some("http1".to_string()), + tls_cert_file: Some("/opt/ca-cert.pem".to_string()), + skip_ssl_validation: true, + dd_url: "https://metrics.datadoghq.com".to_string(), + url: String::new(), // doesnt exist in yaml + additional_endpoints: HashMap::from([ + ( + "https://app.datadoghq.com".to_string(), + vec!["apikey2".to_string(), "apikey3".to_string()], + ), + ( + "https://app.datadoghq.eu".to_string(), + vec!["apikey4".to_string()], + ), + ]), + env: Some("test-env".to_string()), + service: Some("test-service".to_string()), + version: Some("1.0.0".to_string()), + tags: HashMap::from([ + ("team".to_string(), "test-team".to_string()), + ("project".to_string(), "test-project".to_string()), + ]), + logs_config_logs_dd_url: "https://logs.datadoghq.com".to_string(), + logs_config_processing_rules: Some(vec![ProcessingRule { + name: "test-exclude".to_string(), + pattern: "test-pattern".to_string(), + kind: Kind::ExcludeAtMatch, + replace_placeholder: None, + }]), + logs_config_use_compression: false, + logs_config_compression_level: 1, + logs_config_additional_endpoints: vec![LogsAdditionalEndpoint { + api_key: "apikey2".to_string(), + host: "agent-http-intake.logs.datadoghq.com".to_string(), + port: 443, + is_reliable: true, + }], + observability_pipelines_worker_logs_enabled: false, + observability_pipelines_worker_logs_url: String::default(), + service_mapping: HashMap::from([( + "old-service".to_string(), + "new-service".to_string(), + )]), + apm_dd_url: "https://apm.datadoghq.com".to_string(), + apm_replace_tags: Some(vec![]), + apm_config_obfuscation_http_remove_query_string: true, + apm_config_obfuscation_http_remove_paths_with_digits: true, + apm_config_compression_level: 2, + apm_features: vec![ + "enable_otlp_compute_top_level_by_span_kind".to_string(), + "enable_stats_by_span_kind".to_string(), + ], + apm_additional_endpoints: HashMap::from([ + ( + "https://trace.agent.datadoghq.com".to_string(), + vec!["apikey2".to_string(), "apikey3".to_string()], + ), + ( + "https://trace.agent.datadoghq.eu".to_string(), + vec!["apikey4".to_string()], + ), + ]), + trace_propagation_style: vec![TracePropagationStyle::Datadog], + trace_propagation_style_extract: vec![TracePropagationStyle::B3], + trace_propagation_extract_first: true, + trace_propagation_http_baggage_enabled: true, + trace_aws_service_representation_enabled: true, + metrics_config_compression_level: 3, + otlp_config_traces_enabled: false, + otlp_config_traces_span_name_as_resource_name: true, + otlp_config_traces_span_name_remappings: HashMap::from([( + "old-span".to_string(), + "new-span".to_string(), + )]), + otlp_config_ignore_missing_datadog_fields: true, + otlp_config_receiver_protocols_http_endpoint: Some( + "http://localhost:4318".to_string(), + ), + otlp_config_receiver_protocols_grpc_endpoint: Some( + "http://localhost:4317".to_string(), + ), + otlp_config_receiver_protocols_grpc_transport: Some("tcp".to_string()), + otlp_config_receiver_protocols_grpc_max_recv_msg_size_mib: Some(4), + otlp_config_metrics_enabled: true, + otlp_config_metrics_resource_attributes_as_tags: true, + otlp_config_metrics_instrumentation_scope_metadata_as_tags: true, + otlp_config_metrics_tag_cardinality: Some("low".to_string()), + otlp_config_metrics_delta_ttl: Some(3600), + otlp_config_metrics_histograms_mode: Some("counters".to_string()), + otlp_config_metrics_histograms_send_count_sum_metrics: true, + otlp_config_metrics_histograms_send_aggregation_metrics: true, + otlp_config_metrics_sums_cumulative_monotonic_mode: Some("to_delta".to_string()), + otlp_config_metrics_sums_initial_cumulativ_monotonic_value: Some( + "auto".to_string(), + ), + otlp_config_metrics_summaries_mode: Some("quantiles".to_string()), + otlp_config_traces_probabilistic_sampler_sampling_percentage: Some(50), + otlp_config_logs_enabled: true, + api_key_secret_arn: "arn:aws:secretsmanager:region:account:secret:datadog-api-key" + .to_string(), + kms_api_key: "test-kms-key".to_string(), + api_key_ssm_arn: String::default(), + serverless_logs_enabled: false, + serverless_flush_strategy: FlushStrategy::Periodically(PeriodicStrategy { + interval: 60000, + }), + enhanced_metrics: false, + lambda_proc_enhanced_metrics: false, + capture_lambda_payload: true, + capture_lambda_payload_max_depth: 5, + compute_trace_stats_on_extension: true, + span_dedup_timeout: None, + api_key_secret_reload_interval: None, + + serverless_appsec_enabled: true, + appsec_rules: Some("/path/to/rules.json".to_string()), + appsec_waf_timeout: Duration::from_secs(1), + api_security_enabled: false, + api_security_sample_delay: Duration::from_secs(60), + + apm_filter_tags_require: None, + apm_filter_tags_reject: None, + apm_filter_tags_regex_require: None, + apm_filter_tags_regex_reject: None, + statsd_metric_namespace: None, + dogstatsd_so_rcvbuf: Some(1_048_576), + dogstatsd_buffer_size: Some(65507), + dogstatsd_queue_size: Some(2048), + }; + + // Assert that + assert_eq!(config, expected_config); + + Ok(()) + }); + } + + #[test] + fn test_yaml_dogstatsd_config() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.create_file( + "datadog.yaml", + r" +dogstatsd_so_rcvbuf: 524288 +dogstatsd_buffer_size: 16384 +dogstatsd_queue_size: 512 +", + )?; + let mut config = Config::default(); + let yaml_config_source = YamlConfigSource { + path: Path::new("datadog.yaml").to_path_buf(), + }; + yaml_config_source + .load(&mut config) + .expect("Failed to load config"); + + assert_eq!(config.dogstatsd_so_rcvbuf, Some(524_288)); + assert_eq!(config.dogstatsd_buffer_size, Some(16384)); + assert_eq!(config.dogstatsd_queue_size, Some(512)); + Ok(()) + }); + } + + #[test] + fn test_yaml_dogstatsd_config_defaults_to_none() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.create_file("datadog.yaml", "")?; + let mut config = Config::default(); + let yaml_config_source = YamlConfigSource { + path: Path::new("datadog.yaml").to_path_buf(), + }; + yaml_config_source + .load(&mut config) + .expect("Failed to load config"); + + assert_eq!(config.dogstatsd_so_rcvbuf, None); + assert_eq!(config.dogstatsd_buffer_size, None); + assert_eq!(config.dogstatsd_queue_size, None); + Ok(()) + }); + } +} diff --git a/crates/datadog-serverless-compat/src/main.rs b/crates/datadog-serverless-compat/src/main.rs index 6d764815..d50798f0 100644 --- a/crates/datadog-serverless-compat/src/main.rs +++ b/crates/datadog-serverless-compat/src/main.rs @@ -10,7 +10,7 @@ use std::{env, sync::Arc}; use tokio::{ sync::Mutex as TokioMutex, - time::{interval, Duration}, + time::{Duration, interval}, }; use tracing::{debug, error, info}; use tracing_subscriber::EnvFilter; @@ -36,7 +36,7 @@ use dogstatsd::{ util::parse_metric_namespace, }; -use dogstatsd::metric::{SortedTags, EMPTY_TAGS}; +use dogstatsd::metric::{EMPTY_TAGS, SortedTags}; use tokio_util::sync::CancellationToken; const DOGSTATSD_FLUSH_INTERVAL: u64 = 10; diff --git a/crates/datadog-trace-agent/Cargo.toml b/crates/datadog-trace-agent/Cargo.toml index aec60c93..4cc5ed73 100644 --- a/crates/datadog-trace-agent/Cargo.toml +++ b/crates/datadog-trace-agent/Cargo.toml @@ -38,6 +38,7 @@ bytes = "1.10.1" rmp-serde = "1.1.1" serial_test = "2.0.0" duplicate = "0.4.1" +temp-env = "0.3.6" tempfile = "3.3.0" libdd-trace-utils = { git = "https://github.com/DataDog/libdatadog", rev = "d52ee90209cb12a28bdda0114535c1a985a29d95", features = [ "test-utils", diff --git a/crates/datadog-trace-agent/src/config.rs b/crates/datadog-trace-agent/src/config.rs index 2d94b4f3..5a7b8a8c 100644 --- a/crates/datadog-trace-agent/src/config.rs +++ b/crates/datadog-trace-agent/src/config.rs @@ -260,54 +260,57 @@ mod tests { use duplicate::duplicate_item; use serial_test::serial; use std::collections::HashMap; - use std::env; use crate::config; #[test] #[serial] fn test_error_if_unable_to_identify_env() { - env::set_var("DD_API_KEY", "_not_a_real_key_"); - - let config = config::Config::new(); - assert!(config.is_err()); - assert_eq!( - config.unwrap_err().to_string(), - "Unable to identify environment. Shutting down Mini Agent." - ); - env::remove_var("DD_API_KEY"); + temp_env::with_vars([("DD_API_KEY", Some("_not_a_real_key_"))], || { + let config = config::Config::new(); + assert!(config.is_err()); + assert_eq!( + config.unwrap_err().to_string(), + "Unable to identify environment. Shutting down Mini Agent." + ); + }); } #[test] #[serial] fn test_error_if_no_api_key_env_var() { - env::remove_var("DD_API_KEY"); - let config = config::Config::new(); - assert!(config.is_err()); - assert_eq!( - config.unwrap_err().to_string(), - "DD_API_KEY environment variable is not set" - ); + temp_env::with_vars([("DD_API_KEY", None::<&str>)], || { + let config = config::Config::new(); + assert!(config.is_err()); + assert_eq!( + config.unwrap_err().to_string(), + "DD_API_KEY environment variable is not set" + ); + }); } #[test] #[serial] fn test_default_trace_and_trace_stats_urls() { - env::set_var("DD_API_KEY", "_not_a_real_key_"); - env::set_var("K_SERVICE", "function_name"); - let config_res = config::Config::new(); - assert!(config_res.is_ok()); - let config = config_res.unwrap(); - assert_eq!( - config.trace_intake.url, - "https://trace.agent.datadoghq.com/api/v0.2/traces" - ); - assert_eq!( - config.trace_stats_intake.url, - "https://trace.agent.datadoghq.com/api/v0.2/stats" + temp_env::with_vars( + [ + ("DD_API_KEY", Some("_not_a_real_key_")), + ("K_SERVICE", Some("function_name")), + ], + || { + let config_res = config::Config::new(); + assert!(config_res.is_ok()); + let config = config_res.unwrap(); + assert_eq!( + config.trace_intake.url, + "https://trace.agent.datadoghq.com/api/v0.2/traces" + ); + assert_eq!( + config.trace_stats_intake.url, + "https://trace.agent.datadoghq.com/api/v0.2/stats" + ); + }, ); - env::remove_var("DD_API_KEY"); - env::remove_var("K_SERVICE"); } #[duplicate_item( @@ -322,16 +325,19 @@ mod tests { #[test] #[serial] fn test_name() { - env::set_var("DD_API_KEY", "_not_a_real_key_"); - env::set_var("K_SERVICE", "function_name"); - env::set_var("DD_SITE", dd_site); - let config_res = config::Config::new(); - assert!(config_res.is_ok()); - let config = config_res.unwrap(); - assert_eq!(config.trace_intake.url, expected_url); - env::remove_var("DD_API_KEY"); - env::remove_var("DD_SITE"); - env::remove_var("K_SERVICE"); + temp_env::with_vars( + [ + ("DD_API_KEY", Some("_not_a_real_key_")), + ("K_SERVICE", Some("function_name")), + ("DD_SITE", Some(dd_site)), + ], + || { + let config_res = config::Config::new(); + assert!(config_res.is_ok()); + let config = config_res.unwrap(); + assert_eq!(config.trace_intake.url, expected_url); + }, + ); } #[duplicate_item( @@ -346,193 +352,267 @@ mod tests { #[test] #[serial] fn test_name() { - env::set_var("DD_API_KEY", "_not_a_real_key_"); - env::set_var("K_SERVICE", "function_name"); - env::set_var("DD_SITE", dd_site); - let config_res = config::Config::new(); - assert!(config_res.is_ok()); - let config = config_res.unwrap(); - assert_eq!(config.trace_stats_intake.url, expected_url); - env::remove_var("DD_API_KEY"); - env::remove_var("DD_SITE"); - env::remove_var("K_SERVICE"); + temp_env::with_vars( + [ + ("DD_API_KEY", Some("_not_a_real_key_")), + ("K_SERVICE", Some("function_name")), + ("DD_SITE", Some(dd_site)), + ], + || { + let config_res = config::Config::new(); + assert!(config_res.is_ok()); + let config = config_res.unwrap(); + assert_eq!(config.trace_stats_intake.url, expected_url); + }, + ); } #[test] #[serial] fn test_set_custom_trace_and_trace_stats_intake_url() { - env::set_var("DD_API_KEY", "_not_a_real_key_"); - env::set_var("K_SERVICE", "function_name"); - env::set_var("DD_APM_DD_URL", "http://127.0.0.1:3333"); - let config_res = config::Config::new(); - assert!(config_res.is_ok()); - let config = config_res.unwrap(); - assert_eq!( - config.trace_intake.url, - "http://127.0.0.1:3333/api/v0.2/traces" - ); - assert_eq!( - config.trace_stats_intake.url, - "http://127.0.0.1:3333/api/v0.2/stats" + temp_env::with_vars( + [ + ("DD_API_KEY", Some("_not_a_real_key_")), + ("K_SERVICE", Some("function_name")), + ("DD_APM_DD_URL", Some("http://127.0.0.1:3333")), + ], + || { + let config_res = config::Config::new(); + assert!(config_res.is_ok()); + let config = config_res.unwrap(); + assert_eq!( + config.trace_intake.url, + "http://127.0.0.1:3333/api/v0.2/traces" + ); + assert_eq!( + config.trace_stats_intake.url, + "http://127.0.0.1:3333/api/v0.2/stats" + ); + }, ); - env::remove_var("DD_API_KEY"); - env::remove_var("DD_APM_DD_URL"); - env::remove_var("K_SERVICE"); } #[test] #[serial] #[cfg(any(all(windows, feature = "windows-pipes"), test))] fn test_apm_windows_pipe_name() { - env::set_var("DD_API_KEY", "_not_a_real_key_"); - env::set_var("ASCSVCRT_SPRING__APPLICATION__NAME", "test-spring-app"); - env::set_var("DD_APM_WINDOWS_PIPE_NAME", r"test_pipe"); - let config_res = config::Config::new(); - assert!(config_res.is_ok()); - let config = config_res.unwrap(); - assert_eq!( - config.dd_apm_windows_pipe_name, - Some(r"\\.\pipe\test_pipe".to_string()) + temp_env::with_vars( + [ + ("DD_API_KEY", Some("_not_a_real_key_")), + ( + "ASCSVCRT_SPRING__APPLICATION__NAME", + Some("test-spring-app"), + ), + ("DD_APM_WINDOWS_PIPE_NAME", Some(r"test_pipe")), + ], + || { + let config_res = config::Config::new(); + assert!(config_res.is_ok()); + let config = config_res.unwrap(); + assert_eq!( + config.dd_apm_windows_pipe_name, + Some(r"\\.\pipe\test_pipe".to_string()) + ); + + // Port should be overridden to 0 when pipe is set + assert_eq!(config.dd_apm_receiver_port, 0); + }, ); - - // Port should be overridden to 0 when pipe is set - assert_eq!(config.dd_apm_receiver_port, 0); - env::remove_var("DD_API_KEY"); - env::remove_var("ASCSVCRT_SPRING__APPLICATION__NAME"); - env::remove_var("DD_APM_WINDOWS_PIPE_NAME"); } #[test] #[serial] #[cfg(any(all(windows, feature = "windows-pipes"), test))] fn test_dogstatsd_windows_pipe_name() { - env::set_var("DD_API_KEY", "_not_a_real_key_"); - env::set_var("ASCSVCRT_SPRING__APPLICATION__NAME", "test-spring-app"); - env::set_var("DD_DOGSTATSD_WINDOWS_PIPE_NAME", r"test_pipe"); - let config_res = config::Config::new(); - assert!(config_res.is_ok()); - let config = config_res.unwrap(); - assert_eq!( - config.dd_dogstatsd_windows_pipe_name, - Some(r"\\.\pipe\test_pipe".to_string()) + temp_env::with_vars( + [ + ("DD_API_KEY", Some("_not_a_real_key_")), + ( + "ASCSVCRT_SPRING__APPLICATION__NAME", + Some("test-spring-app"), + ), + ("DD_DOGSTATSD_WINDOWS_PIPE_NAME", Some(r"test_pipe")), + ], + || { + let config_res = config::Config::new(); + assert!(config_res.is_ok()); + let config = config_res.unwrap(); + assert_eq!( + config.dd_dogstatsd_windows_pipe_name, + Some(r"\\.\pipe\test_pipe".to_string()) + ); + + // Port should be overridden to 0 when pipe is set + assert_eq!(config.dd_dogstatsd_port, 0); + }, ); - - // Port should be overridden to 0 when pipe is set - assert_eq!(config.dd_dogstatsd_port, 0); - env::remove_var("DD_API_KEY"); - env::remove_var("ASCSVCRT_SPRING__APPLICATION__NAME"); - env::remove_var("DD_DOGSTATSD_WINDOWS_PIPE_NAME"); } #[test] #[serial] fn test_default_dogstatsd_port() { - env::set_var("DD_API_KEY", "_not_a_real_key_"); - env::set_var("ASCSVCRT_SPRING__APPLICATION__NAME", "test-spring-app"); - let config_res = config::Config::new(); - assert!(config_res.is_ok()); - let config = config_res.unwrap(); - assert_eq!(config.dd_dogstatsd_port, 8125); - env::remove_var("DD_API_KEY"); - env::remove_var("ASCSVCRT_SPRING__APPLICATION__NAME"); + temp_env::with_vars( + [ + ("DD_API_KEY", Some("_not_a_real_key_")), + ( + "ASCSVCRT_SPRING__APPLICATION__NAME", + Some("test-spring-app"), + ), + ], + || { + let config_res = config::Config::new(); + assert!(config_res.is_ok()); + let config = config_res.unwrap(); + assert_eq!(config.dd_dogstatsd_port, 8125); + }, + ); } #[test] #[serial] fn test_custom_dogstatsd_port() { - env::set_var("DD_API_KEY", "_not_a_real_key_"); - env::set_var("ASCSVCRT_SPRING__APPLICATION__NAME", "test-spring-app"); - env::set_var("DD_DOGSTATSD_PORT", "18125"); - let config_res = config::Config::new(); - println!("{:?}", config_res); - assert!(config_res.is_ok()); - let config = config_res.unwrap(); - assert_eq!(config.dd_dogstatsd_port, 18125); - env::remove_var("DD_API_KEY"); - env::remove_var("ASCSVCRT_SPRING__APPLICATION__NAME"); - env::remove_var("DD_DOGSTATSD_PORT"); + temp_env::with_vars( + [ + ("DD_API_KEY", Some("_not_a_real_key_")), + ( + "ASCSVCRT_SPRING__APPLICATION__NAME", + Some("test-spring-app"), + ), + ("DD_DOGSTATSD_PORT", Some("18125")), + ], + || { + let config_res = config::Config::new(); + println!("{:?}", config_res); + assert!(config_res.is_ok()); + let config = config_res.unwrap(); + assert_eq!(config.dd_dogstatsd_port, 18125); + }, + ); } #[test] #[serial] fn test_default_apm_receiver_port() { - env::set_var("DD_API_KEY", "_not_a_real_key_"); - env::set_var("ASCSVCRT_SPRING__APPLICATION__NAME", "test-spring-app"); - let config_res = config::Config::new(); - assert!(config_res.is_ok()); - let config = config_res.unwrap(); - assert_eq!(config.dd_apm_receiver_port, 8126); - #[cfg(any(all(windows, feature = "windows-pipes"), test))] - assert_eq!(config.dd_apm_windows_pipe_name, None); - env::remove_var("DD_API_KEY"); - env::remove_var("ASCSVCRT_SPRING__APPLICATION__NAME"); + temp_env::with_vars( + [ + ("DD_API_KEY", Some("_not_a_real_key_")), + ( + "ASCSVCRT_SPRING__APPLICATION__NAME", + Some("test-spring-app"), + ), + ], + || { + let config_res = config::Config::new(); + assert!(config_res.is_ok()); + let config = config_res.unwrap(); + assert_eq!(config.dd_apm_receiver_port, 8126); + #[cfg(any(all(windows, feature = "windows-pipes"), test))] + assert_eq!(config.dd_apm_windows_pipe_name, None); + }, + ); } #[test] #[serial] fn test_custom_apm_receiver_port() { - env::set_var("DD_API_KEY", "_not_a_real_key_"); - env::set_var("ASCSVCRT_SPRING__APPLICATION__NAME", "test-spring-app"); - env::set_var("DD_APM_RECEIVER_PORT", "18126"); - let config_res = config::Config::new(); - assert!(config_res.is_ok()); - let config = config_res.unwrap(); - assert_eq!(config.dd_apm_receiver_port, 18126); - env::remove_var("DD_API_KEY"); - env::remove_var("ASCSVCRT_SPRING__APPLICATION__NAME"); - env::remove_var("DD_APM_RECEIVER_PORT"); + temp_env::with_vars( + [ + ("DD_API_KEY", Some("_not_a_real_key_")), + ( + "ASCSVCRT_SPRING__APPLICATION__NAME", + Some("test-spring-app"), + ), + ("DD_APM_RECEIVER_PORT", Some("18126")), + ], + || { + let config_res = config::Config::new(); + assert!(config_res.is_ok()); + let config = config_res.unwrap(); + assert_eq!(config.dd_apm_receiver_port, 18126); + }, + ); } - fn test_config_with_dd_tags(dd_tags: &str) -> config::Config { - env::set_var("DD_API_KEY", "_not_a_real_key_"); - env::set_var("ASCSVCRT_SPRING__APPLICATION__NAME", "test-spring-app"); - env::set_var("DD_TAGS", dd_tags); + /// Call from within temp_env::with_vars that set DD_API_KEY, ASCSVCRT_SPRING__APPLICATION__NAME, and DD_TAGS. + fn test_config_with_dd_tags() -> config::Config { let config_res = config::Config::new(); assert!(config_res.is_ok()); - let config = config_res.unwrap(); - env::remove_var("DD_API_KEY"); - env::remove_var("ASCSVCRT_SPRING__APPLICATION__NAME"); - env::remove_var("DD_TAGS"); - config + config_res.unwrap() } #[test] #[serial] fn test_dd_tags_comma_separated() { - let config = test_config_with_dd_tags("some:tag,another:thing,invalid:thing:here"); - let expected_tags = HashMap::from([ - ("some".to_string(), "tag".to_string()), - ("another".to_string(), "thing".to_string()), - ]); - assert_eq!(config.tags.tags(), &expected_tags); - assert_eq!(config.tags.function_tags(), Some("another:thing,some:tag")); + temp_env::with_vars( + [ + ("DD_API_KEY", Some("_not_a_real_key_")), + ( + "ASCSVCRT_SPRING__APPLICATION__NAME", + Some("test-spring-app"), + ), + ("DD_TAGS", Some("some:tag,another:thing,invalid:thing:here")), + ], + || { + let config = test_config_with_dd_tags(); + let expected_tags = HashMap::from([ + ("some".to_string(), "tag".to_string()), + ("another".to_string(), "thing".to_string()), + ]); + assert_eq!(config.tags.tags(), &expected_tags); + assert_eq!(config.tags.function_tags(), Some("another:thing,some:tag")); + }, + ); } #[test] #[serial] fn test_dd_tags_space_separated() { - let config = test_config_with_dd_tags("some:tag another:thing invalid:thing:here"); - let expected_tags = HashMap::from([ - ("some".to_string(), "tag".to_string()), - ("another".to_string(), "thing".to_string()), - ]); - assert_eq!(config.tags.tags(), &expected_tags); - assert_eq!(config.tags.function_tags(), Some("another:thing,some:tag")); + temp_env::with_vars( + [ + ("DD_API_KEY", Some("_not_a_real_key_")), + ( + "ASCSVCRT_SPRING__APPLICATION__NAME", + Some("test-spring-app"), + ), + ("DD_TAGS", Some("some:tag another:thing invalid:thing:here")), + ], + || { + let config = test_config_with_dd_tags(); + let expected_tags = HashMap::from([ + ("some".to_string(), "tag".to_string()), + ("another".to_string(), "thing".to_string()), + ]); + assert_eq!(config.tags.tags(), &expected_tags); + assert_eq!(config.tags.function_tags(), Some("another:thing,some:tag")); + }, + ); } #[test] #[serial] fn test_dd_tags_mixed_separators() { - let config = test_config_with_dd_tags("some:tag,another:thing extra:value"); - let expected_tags = HashMap::from([ - ("some".to_string(), "tag".to_string()), - ("another".to_string(), "thing".to_string()), - ("extra".to_string(), "value".to_string()), - ]); - assert_eq!(config.tags.tags(), &expected_tags); - assert_eq!( - config.tags.function_tags(), - Some("another:thing,extra:value,some:tag") + temp_env::with_vars( + [ + ("DD_API_KEY", Some("_not_a_real_key_")), + ( + "ASCSVCRT_SPRING__APPLICATION__NAME", + Some("test-spring-app"), + ), + ("DD_TAGS", Some("some:tag,another:thing extra:value")), + ], + || { + let config = test_config_with_dd_tags(); + let expected_tags = HashMap::from([ + ("some".to_string(), "tag".to_string()), + ("another".to_string(), "thing".to_string()), + ("extra".to_string(), "value".to_string()), + ]); + assert_eq!(config.tags.tags(), &expected_tags); + assert_eq!( + config.tags.function_tags(), + Some("another:thing,extra:value,some:tag") + ); + }, ); } @@ -540,24 +620,72 @@ mod tests { #[serial] fn test_dd_tags_no_valid_tags() { // Test with only invalid tags - let config = test_config_with_dd_tags("invalid:thing:here,also-bad"); - assert_eq!(config.tags.tags(), &HashMap::new()); - assert_eq!(config.tags.function_tags(), None); + temp_env::with_vars( + [ + ("DD_API_KEY", Some("_not_a_real_key_")), + ( + "ASCSVCRT_SPRING__APPLICATION__NAME", + Some("test-spring-app"), + ), + ("DD_TAGS", Some("invalid:thing:here,also-bad")), + ], + || { + let config = test_config_with_dd_tags(); + assert_eq!(config.tags.tags(), &HashMap::new()); + assert_eq!(config.tags.function_tags(), None); + }, + ); // Test with empty string - let config = test_config_with_dd_tags(""); - assert_eq!(config.tags.tags(), &HashMap::new()); - assert_eq!(config.tags.function_tags(), None); + temp_env::with_vars( + [ + ("DD_API_KEY", Some("_not_a_real_key_")), + ( + "ASCSVCRT_SPRING__APPLICATION__NAME", + Some("test-spring-app"), + ), + ("DD_TAGS", Some("")), + ], + || { + let config = test_config_with_dd_tags(); + assert_eq!(config.tags.tags(), &HashMap::new()); + assert_eq!(config.tags.function_tags(), None); + }, + ); // Test with just whitespace - let config = test_config_with_dd_tags(" "); - assert_eq!(config.tags.tags(), &HashMap::new()); - assert_eq!(config.tags.function_tags(), None); + temp_env::with_vars( + [ + ("DD_API_KEY", Some("_not_a_real_key_")), + ( + "ASCSVCRT_SPRING__APPLICATION__NAME", + Some("test-spring-app"), + ), + ("DD_TAGS", Some(" ")), + ], + || { + let config = test_config_with_dd_tags(); + assert_eq!(config.tags.tags(), &HashMap::new()); + assert_eq!(config.tags.function_tags(), None); + }, + ); // Test with just commas and spaces - let config = test_config_with_dd_tags(" , , "); - assert_eq!(config.tags.tags(), &HashMap::new()); - assert_eq!(config.tags.function_tags(), None); + temp_env::with_vars( + [ + ("DD_API_KEY", Some("_not_a_real_key_")), + ( + "ASCSVCRT_SPRING__APPLICATION__NAME", + Some("test-spring-app"), + ), + ("DD_TAGS", Some(" , , ")), + ], + || { + let config = test_config_with_dd_tags(); + assert_eq!(config.tags.tags(), &HashMap::new()); + assert_eq!(config.tags.function_tags(), None); + }, + ); } } diff --git a/crates/datadog-trace-agent/src/env_verifier.rs b/crates/datadog-trace-agent/src/env_verifier.rs index 74bed2cb..29fcc5c7 100644 --- a/crates/datadog-trace-agent/src/env_verifier.rs +++ b/crates/datadog-trace-agent/src/env_verifier.rs @@ -110,12 +110,16 @@ impl ServerlessEnvVerifier { metadata } Err(err) => { - error!("The Mini Agent can only be run in Google Cloud Functions & Azure Functions. Verification has failed, shutting down now. Error: {err}"); + error!( + "The Mini Agent can only be run in Google Cloud Functions & Azure Functions. Verification has failed, shutting down now. Error: {err}" + ); process::exit(1); } }, Err(_) => { - error!("Google Metadata request timeout of {verify_env_timeout} ms exceeded. Using default values."); + error!( + "Google Metadata request timeout of {verify_env_timeout} ms exceeded. Using default values." + ); GCPMetadata::default() } }; @@ -262,7 +266,9 @@ async fn verify_azure_environment_or_exit(os: &str) { debug!("Successfully verified Azure Function Environment."); } Err(e) => { - error!("The Mini Agent can only be run in Google Cloud Functions & Azure Functions. Verification has failed, shutting down now. Error: {e}"); + error!( + "The Mini Agent can only be run in Google Cloud Functions & Azure Functions. Verification has failed, shutting down now. Error: {e}" + ); process::exit(1); } } @@ -354,19 +360,19 @@ async fn ensure_azure_function_environment( #[cfg(test)] mod tests { use async_trait::async_trait; - use hyper::{body::Bytes, Response, StatusCode}; + use hyper::{Response, StatusCode, body::Bytes}; use libdd_common::hyper_migration; use libdd_trace_utils::trace_utils; use serde_json::json; use serial_test::serial; - use std::{env, fs, path::Path, time::Duration}; + use std::{fs, path::Path, time::Duration}; use crate::env_verifier::{ - ensure_azure_function_environment, ensure_gcp_function_environment, - get_region_from_gcp_region_string, is_azure_flex_without_resource_group, - AzureVerificationClient, AzureVerificationClientWrapper, GCPInstance, GCPMetadata, - GCPProject, GoogleMetadataClient, AZURE_FUNCTION_JSON_NAME, AZURE_HOST_JSON_NAME, - DD_AZURE_RESOURCE_GROUP, WEBSITE_SKU, + AZURE_FUNCTION_JSON_NAME, AZURE_HOST_JSON_NAME, AzureVerificationClient, + AzureVerificationClientWrapper, DD_AZURE_RESOURCE_GROUP, GCPInstance, GCPMetadata, + GCPProject, GoogleMetadataClient, WEBSITE_SKU, ensure_azure_function_environment, + ensure_gcp_function_environment, get_region_from_gcp_region_string, + is_azure_flex_without_resource_group, }; use super::{EnvVerifier, ServerlessEnvVerifier}; @@ -642,28 +648,36 @@ mod tests { #[test] #[serial] fn test_is_azure_flex_without_resource_group_true() { - env::remove_var(DD_AZURE_RESOURCE_GROUP); - env::set_var(WEBSITE_SKU, "FlexConsumption"); - assert!(is_azure_flex_without_resource_group()); - env::remove_var(WEBSITE_SKU); + temp_env::with_vars( + [ + (DD_AZURE_RESOURCE_GROUP, None::<&str>), + (WEBSITE_SKU, Some("FlexConsumption")), + ], + || assert!(is_azure_flex_without_resource_group()), + ); } #[test] #[serial] fn test_is_azure_flex_without_resource_group_false_resource_group_set() { - env::set_var(DD_AZURE_RESOURCE_GROUP, "test-resource-group"); - env::set_var(WEBSITE_SKU, "FlexConsumption"); - assert!(!is_azure_flex_without_resource_group()); - env::remove_var(DD_AZURE_RESOURCE_GROUP); - env::remove_var(WEBSITE_SKU); + temp_env::with_vars( + [ + (DD_AZURE_RESOURCE_GROUP, Some("test-resource-group")), + (WEBSITE_SKU, Some("FlexConsumption")), + ], + || assert!(!is_azure_flex_without_resource_group()), + ); } #[test] #[serial] fn test_is_azure_flex_without_resource_group_false_not_flex() { - env::remove_var(DD_AZURE_RESOURCE_GROUP); - env::set_var(WEBSITE_SKU, "ElasticPremium"); - assert!(!is_azure_flex_without_resource_group()); - env::remove_var(WEBSITE_SKU); + temp_env::with_vars( + [ + (DD_AZURE_RESOURCE_GROUP, None::<&str>), + (WEBSITE_SKU, Some("ElasticPremium")), + ], + || assert!(!is_azure_flex_without_resource_group()), + ); } } diff --git a/crates/datadog-trace-agent/src/http_utils.rs b/crates/datadog-trace-agent/src/http_utils.rs index c330cf20..74bc5103 100644 --- a/crates/datadog-trace-agent/src/http_utils.rs +++ b/crates/datadog-trace-agent/src/http_utils.rs @@ -4,9 +4,8 @@ use core::time::Duration; use datadog_fips::reqwest_adapter::create_reqwest_client_builder; use hyper::{ - header, + Response, StatusCode, header, http::{self, HeaderMap}, - Response, StatusCode, }; use libdd_common::hyper_migration; use serde_json::json; @@ -130,9 +129,9 @@ pub fn build_client( #[cfg(test)] mod tests { use http_body_util::BodyExt; - use hyper::header; use hyper::HeaderMap; use hyper::StatusCode; + use hyper::header; use libdd_common::hyper_migration; use super::verify_request_content_length; diff --git a/crates/datadog-trace-agent/src/mini_agent.rs b/crates/datadog-trace-agent/src/mini_agent.rs index 6af32b12..855290c7 100644 --- a/crates/datadog-trace-agent/src/mini_agent.rs +++ b/crates/datadog-trace-agent/src/mini_agent.rs @@ -3,7 +3,7 @@ use http_body_util::BodyExt; use hyper::service::service_fn; -use hyper::{http, Method, Response, StatusCode}; +use hyper::{Method, Response, StatusCode, http}; use libdd_common::hyper_migration; use serde_json::json; use std::io; diff --git a/crates/datadog-trace-agent/src/stats_flusher.rs b/crates/datadog-trace-agent/src/stats_flusher.rs index 573bbeb0..6c6e5805 100644 --- a/crates/datadog-trace-agent/src/stats_flusher.rs +++ b/crates/datadog-trace-agent/src/stats_flusher.rs @@ -3,7 +3,7 @@ use async_trait::async_trait; use std::{sync::Arc, time}; -use tokio::sync::{mpsc::Receiver, Mutex}; +use tokio::sync::{Mutex, mpsc::Receiver}; use tracing::{debug, error}; use libdd_trace_protobuf::pb; diff --git a/crates/datadog-trace-agent/src/stats_processor.rs b/crates/datadog-trace-agent/src/stats_processor.rs index d15be854..889e5f2c 100644 --- a/crates/datadog-trace-agent/src/stats_processor.rs +++ b/crates/datadog-trace-agent/src/stats_processor.rs @@ -5,7 +5,7 @@ use std::sync::Arc; use std::time::UNIX_EPOCH; use async_trait::async_trait; -use hyper::{http, StatusCode}; +use hyper::{StatusCode, http}; use libdd_common::hyper_migration; use tokio::sync::mpsc::Sender; use tracing::debug; diff --git a/crates/datadog-trace-agent/src/trace_flusher.rs b/crates/datadog-trace-agent/src/trace_flusher.rs index 03b78224..cf2619e0 100644 --- a/crates/datadog-trace-agent/src/trace_flusher.rs +++ b/crates/datadog-trace-agent/src/trace_flusher.rs @@ -3,10 +3,10 @@ use async_trait::async_trait; use std::{error::Error, sync::Arc, time}; -use tokio::sync::{mpsc::Receiver, Mutex}; +use tokio::sync::{Mutex, mpsc::Receiver}; use tracing::{debug, error}; -use libdd_common::{hyper_migration, GenericHttpClient}; +use libdd_common::{GenericHttpClient, hyper_migration}; use libdd_trace_utils::trace_utils; use libdd_trace_utils::trace_utils::SendData; diff --git a/crates/datadog-trace-agent/src/trace_processor.rs b/crates/datadog-trace-agent/src/trace_processor.rs index 41f6e9e8..16851371 100644 --- a/crates/datadog-trace-agent/src/trace_processor.rs +++ b/crates/datadog-trace-agent/src/trace_processor.rs @@ -4,7 +4,7 @@ use std::sync::Arc; use async_trait::async_trait; -use hyper::{http, StatusCode}; +use hyper::{StatusCode, http}; use libdd_common::hyper_migration; use tokio::sync::mpsc::Sender; use tracing::debug; @@ -123,19 +123,19 @@ impl TraceProcessor for ServerlessTraceProcessor { return log_and_create_traces_success_http_response( &format!("Error processing trace chunks: {err}"), StatusCode::INTERNAL_SERVER_ERROR, - ) + ); } }; // Add function_tags to payload if we can - if let Some(function_tags) = config.tags.function_tags() { - if let TracerPayloadCollection::V07(ref mut tracer_payloads) = payload { - for tracer_payload in tracer_payloads { - tracer_payload.tags.insert( - TRACER_PAYLOAD_FUNCTION_TAGS_TAG_KEY.to_string(), - function_tags.to_string(), - ); - } + if let Some(function_tags) = config.tags.function_tags() + && let TracerPayloadCollection::V07(ref mut tracer_payloads) = payload + { + for tracer_payload in tracer_payloads { + tracer_payload.tags.insert( + TRACER_PAYLOAD_FUNCTION_TAGS_TAG_KEY.to_string(), + function_tags.to_string(), + ); } } @@ -168,9 +168,9 @@ mod tests { use crate::{ config::{Config, Tags}, - trace_processor::{self, TraceProcessor, TRACER_PAYLOAD_FUNCTION_TAGS_TAG_KEY}, + trace_processor::{self, TRACER_PAYLOAD_FUNCTION_TAGS_TAG_KEY, TraceProcessor}, }; - use libdd_common::{hyper_migration, Endpoint}; + use libdd_common::{Endpoint, hyper_migration}; use libdd_trace_protobuf::pb; use libdd_trace_utils::test_utils::{create_test_gcp_json_span, create_test_gcp_span}; use libdd_trace_utils::trace_utils::MiniAgentMetadata; diff --git a/crates/datadog-trace-agent/tests/common/mock_server.rs b/crates/datadog-trace-agent/tests/common/mock_server.rs index 5e0b485d..b78b96c0 100644 --- a/crates/datadog-trace-agent/tests/common/mock_server.rs +++ b/crates/datadog-trace-agent/tests/common/mock_server.rs @@ -4,7 +4,7 @@ //! Simple mock HTTP server for testing flushers use http_body_util::BodyExt; -use hyper::{body::Incoming, Request, Response}; +use hyper::{Request, Response, body::Incoming}; use hyper_util::rt::TokioIo; use libdd_common::hyper_migration; use std::net::SocketAddr; diff --git a/crates/datadog-trace-agent/tests/integration_test.rs b/crates/datadog-trace-agent/tests/integration_test.rs index a240b812..bf28d4f8 100644 --- a/crates/datadog-trace-agent/tests/integration_test.rs +++ b/crates/datadog-trace-agent/tests/integration_test.rs @@ -7,7 +7,7 @@ use common::helpers::{create_test_trace_payload, send_tcp_request}; use common::mock_server::MockServer; use common::mocks::{MockEnvVerifier, MockStatsFlusher, MockStatsProcessor, MockTraceFlusher}; use datadog_trace_agent::{ - config::{test_helpers::create_tcp_test_config, Config}, + config::{Config, test_helpers::create_tcp_test_config}, mini_agent::MiniAgent, proxy_flusher::ProxyFlusher, trace_flusher::TraceFlusher, diff --git a/crates/dogstatsd/src/aggregator/core.rs b/crates/dogstatsd/src/aggregator/core.rs index 15799e59..62b53a0e 100644 --- a/crates/dogstatsd/src/aggregator/core.rs +++ b/crates/dogstatsd/src/aggregator/core.rs @@ -153,7 +153,10 @@ impl Aggregator { || (this_batch_size + next_chunk_size >= self.max_batch_bytes_sketch_metric) { if this_batch_size == 0 { - warn!("Only one distribution exceeds max batch size, adding it anyway: {:?} with {}", sketch.metric, next_chunk_size); + warn!( + "Only one distribution exceeds max batch size, adding it anyway: {:?} with {}", + sketch.metric, next_chunk_size + ); } else { batched_payloads.push(sketch_payload); sketch_payload = SketchPayload::new(); @@ -218,7 +221,10 @@ impl Aggregator { >= self.max_batch_bytes_single_metric) { if this_batch_size == 0 { - warn!("Only one metric exceeds max batch size, adding it anyway: {:?} with {}", metric.metric, serialized_metric_size); + warn!( + "Only one metric exceeds max batch size, adding it anyway: {:?} with {}", + metric.metric, serialized_metric_size + ); } else { batched_payloads.push(series_payload); series_payload = Series { @@ -320,7 +326,7 @@ fn build_metric(entry: &Metric, mut base_tag_vec: SortedTags) -> Option { if series_failed.is_empty() && sketches_failed.is_empty() { - debug!("Successfully flushed {n_series} series and {n_distributions} distributions"); + debug!( + "Successfully flushed {n_series} series and {n_distributions} distributions" + ); None // Return None to indicate success } else if series_had_error || sketches_had_error { // Only return the metrics if there was an actual shipping error - error!("Failed to flush some metrics due to shipping errors: {} series and {} sketches", - series_failed.len(), sketches_failed.len()); + error!( + "Failed to flush some metrics due to shipping errors: {} series and {} sketches", + series_failed.len(), + sketches_failed.len() + ); // Return the failed metrics for potential retry Some((series_failed, sketches_failed)) } else { diff --git a/crates/dogstatsd/src/metric.rs b/crates/dogstatsd/src/metric.rs index 72fe630e..7698e163 100644 --- a/crates/dogstatsd/src/metric.rs +++ b/crates/dogstatsd/src/metric.rs @@ -406,7 +406,7 @@ mod tests { use proptest::{collection, option, strategy::Strategy, string::string_regex}; use ustr::Ustr; - use crate::metric::{id, parse, timestamp_to_bucket, MetricValue, SortedTags}; + use crate::metric::{MetricValue, SortedTags, id, parse, timestamp_to_bucket}; use super::ParseError; @@ -629,15 +629,19 @@ mod tests { #[test] fn parse_tag_no_value() { - let result = parse("datadog.tracer.flush_triggered:1|c|#lang:go,lang_version:go1.22.10,_dd.origin:lambda,runtime-id:d66f501c-d09b-4d0d-970f-515235c4eb56,v1.65.1,service:aws.lambda,reason:scheduled"); + let result = parse( + "datadog.tracer.flush_triggered:1|c|#lang:go,lang_version:go1.22.10,_dd.origin:lambda,runtime-id:d66f501c-d09b-4d0d-970f-515235c4eb56,v1.65.1,service:aws.lambda,reason:scheduled", + ); assert!(result.is_ok()); - assert!(result - .unwrap() - .tags - .unwrap() - .values - .iter() - .any(|(k, v)| k == "v1.65.1" && v.is_empty())); + assert!( + result + .unwrap() + .tags + .unwrap() + .values + .iter() + .any(|(k, v)| k == "v1.65.1" && v.is_empty()) + ); } #[test] diff --git a/crates/dogstatsd/src/util.rs b/crates/dogstatsd/src/util.rs index c0137383..8a16ef0b 100644 --- a/crates/dogstatsd/src/util.rs +++ b/crates/dogstatsd/src/util.rs @@ -58,7 +58,8 @@ pub fn parse_metric_namespace(namespace: &str) -> Option { { tracing::error!( "DD_STATSD_METRIC_NAMESPACE contains invalid character '{}' in '{}'. Only ASCII alphanumerics, underscores, and periods are allowed. Ignoring namespace.", - invalid_char, trimmed + invalid_char, + trimmed ); return None; } diff --git a/crates/dogstatsd/tests/integration_test.rs b/crates/dogstatsd/tests/integration_test.rs index 49a6f1c2..3155ef87 100644 --- a/crates/dogstatsd/tests/integration_test.rs +++ b/crates/dogstatsd/tests/integration_test.rs @@ -15,7 +15,7 @@ use mockito::Server; use std::sync::Arc; use tokio::{ net::UdpSocket, - time::{sleep, timeout, Duration}, + time::{Duration, sleep, timeout}, }; use tokio_util::sync::CancellationToken; use zstd::zstd_safe::CompressionLevel; @@ -133,7 +133,7 @@ async fn start_dogstatsd_on_port( #[tokio::test] async fn test_send_with_retry_immediate_failure() { use dogstatsd::datadog::{DdApi, DdDdUrl, RetryStrategy}; - use dogstatsd::metric::{parse, SortedTags}; + use dogstatsd::metric::{SortedTags, parse}; let mut server = Server::new_async().await; let mock = server @@ -182,7 +182,7 @@ async fn test_send_with_retry_immediate_failure() { #[tokio::test] async fn test_send_with_retry_linear_backoff_success() { use dogstatsd::datadog::{DdApi, DdDdUrl, RetryStrategy}; - use dogstatsd::metric::{parse, SortedTags}; + use dogstatsd::metric::{SortedTags, parse}; let mut server = Server::new_async().await; let mock = server @@ -246,7 +246,7 @@ async fn test_send_with_retry_linear_backoff_success() { async fn test_send_with_retry_immediate_failure_after_one_attempt() { use dogstatsd::datadog::{DdApi, DdDdUrl, RetryStrategy}; use dogstatsd::flusher::ShippingError; - use dogstatsd::metric::{parse, SortedTags}; + use dogstatsd::metric::{SortedTags, parse}; let mut server = Server::new_async().await; let mock = server