From bc0a1f7cb275a966f672911638ce611dbbf06157 Mon Sep 17 00:00:00 2001 From: jevansnyc Date: Tue, 3 Feb 2026 07:39:11 +1000 Subject: [PATCH 1/5] Add DataDome bot protection integration --- crates/common/src/integrations/datadome.rs | 788 ++++++++++++++++++ crates/common/src/integrations/mod.rs | 2 + .../js/lib/src/integrations/datadome/index.ts | 23 + .../src/integrations/datadome/script_guard.ts | 185 ++++ .../datadome/script_guard.test.ts | 523 ++++++++++++ docs/.vitepress/config.mts | 6 + docs/guide/integrations/datadome.md | 170 ++++ trusted-server.toml | 13 + 8 files changed, 1710 insertions(+) create mode 100644 crates/common/src/integrations/datadome.rs create mode 100644 crates/js/lib/src/integrations/datadome/index.ts create mode 100644 crates/js/lib/src/integrations/datadome/script_guard.ts create mode 100644 crates/js/lib/test/integrations/datadome/script_guard.test.ts create mode 100644 docs/guide/integrations/datadome.md diff --git a/crates/common/src/integrations/datadome.rs b/crates/common/src/integrations/datadome.rs new file mode 100644 index 00000000..479bf1be --- /dev/null +++ b/crates/common/src/integrations/datadome.rs @@ -0,0 +1,788 @@ +//! `DataDome` integration for bot protection and security. +//! +//! This module provides transparent proxying for `DataDome`'s JavaScript tag and signal +//! collection API, enabling first-party bot protection while maintaining the permissionless +//! Trusted Server approach (no DNS/CNAME changes required). +//! +//! ## Endpoints +//! +//! - `GET /integrations/datadome/tags.js` - Proxies the `DataDome` SDK script +//! - `ANY /integrations/datadome/js/*` - Proxies signal collection API calls +//! +//! ## Script Rewriting +//! +//! The integration rewrites the `tags.js` script to replace hardcoded `DataDome` API +//! endpoints with first-party paths through Trusted Server. This ensures all browser +//! requests go through the publisher's domain rather than directly to `DataDome`. + +use std::sync::Arc; + +use async_trait::async_trait; +use error_stack::{Report, ResultExt}; +use fastly::http::{header, Method, StatusCode}; +use fastly::{Request, Response}; +use regex::Regex; +use serde::Deserialize; +use validator::Validate; + +use crate::backend::ensure_backend_from_url; +use crate::error::TrustedServerError; +use crate::integrations::{ + AttributeRewriteAction, IntegrationAttributeContext, IntegrationAttributeRewriter, + IntegrationEndpoint, IntegrationProxy, IntegrationRegistration, +}; +use crate::settings::{IntegrationConfig, Settings}; + +const DATADOME_INTEGRATION_ID: &str = "datadome"; + +/// Configuration for `DataDome` integration. +#[derive(Debug, Clone, Deserialize, Validate)] +pub struct DataDomeConfig { + /// Enable/disable the integration + #[serde(default = "default_enabled")] + pub enabled: bool, + + /// `DataDome` JavaScript key (client-side key from `DataDome` dashboard) + /// If provided, Trusted Server can inject the config script automatically + #[serde(default)] + pub js_key: Option, + + /// Base URL for `DataDome` SDK script (default: ) + /// Used for fetching and serving tags.js + #[serde(default = "default_sdk_origin")] + #[validate(url)] + pub sdk_origin: String, + + /// Base URL for `DataDome` signal collection API (default: ) + /// Used for proxying /js/* API requests + #[serde(default = "default_api_origin")] + #[validate(url)] + pub api_origin: String, + + /// Cache TTL for tags.js in seconds (default: 3600 = 1 hour) + #[serde(default = "default_cache_ttl")] + #[validate(range(min = 60, max = 86400))] + pub cache_ttl_seconds: u32, + + /// Whether to rewrite `DataDome` script URLs in HTML to first-party paths + #[serde(default = "default_rewrite_sdk")] + pub rewrite_sdk: bool, +} + +fn default_enabled() -> bool { + false +} + +fn default_sdk_origin() -> String { + "https://js.datadome.co".to_string() +} + +fn default_api_origin() -> String { + "https://api-js.datadome.co".to_string() +} + +fn default_cache_ttl() -> u32 { + 3600 +} + +fn default_rewrite_sdk() -> bool { + true +} + +impl Default for DataDomeConfig { + fn default() -> Self { + Self { + enabled: default_enabled(), + js_key: None, + sdk_origin: default_sdk_origin(), + api_origin: default_api_origin(), + cache_ttl_seconds: default_cache_ttl(), + rewrite_sdk: default_rewrite_sdk(), + } + } +} + +impl IntegrationConfig for DataDomeConfig { + fn is_enabled(&self) -> bool { + self.enabled + } +} + +/// `DataDome` integration implementation. +pub struct DataDomeIntegration { + config: DataDomeConfig, +} + +impl DataDomeIntegration { + fn new(config: DataDomeConfig) -> Arc { + Arc::new(Self { config }) + } + + fn error(message: impl Into) -> TrustedServerError { + TrustedServerError::Integration { + integration: DATADOME_INTEGRATION_ID.to_string(), + message: message.into(), + } + } + + /// Rewrite `DataDome` API URLs in the tags.js script to use first-party paths. + /// + /// `DataDome`'s script contains hardcoded references like: + /// - `js.datadome.co/tags.js` for SDK script + /// - `api-js.datadome.co/js/` for signal collection API + /// - `js.datadome.co` as bare domain references + /// + /// We rewrite these to root-relative paths like `/integrations/datadome/...` so all traffic + /// flows through Trusted Server. Root-relative paths work correctly regardless of the + /// current page path. + /// + /// Uses regex to handle all URL variants: + /// - Absolute URLs: `https://js.datadome.co/path` or `https://api-js.datadome.co/path` + /// - Protocol-relative: `//js.datadome.co/path` or `//api-js.datadome.co/path` + /// - Bare domain: `js.datadome.co/path` or `api-js.datadome.co/path` + /// - All quote styles: `"..."` and `'...'` + fn rewrite_script_content(&self, content: &str) -> String { + // Pattern breakdown: + // (['"]) - Capture group 1: opening quote (single or double) + // (https?:)? - Capture group 2: optional protocol (http: or https:) + // (//)? - Capture group 3: optional protocol-relative slashes + // (api-)? - Capture group 4: optional "api-" prefix for api-js.datadome.co + // js\.datadome\.co - Literal domain we're rewriting + // (/[^'"]*)? - Capture group 5: optional path (everything until closing quote) + // (['"]) - Capture group 6: closing quote + // + // This handles: + // - "https://js.datadome.co/tags.js" + // - "https://api-js.datadome.co/js/check" + // - '//js.datadome.co/js/check' + // - "api-js.datadome.co/js/check" + // - "js.datadome.co" + let pattern = Regex::new(r#"(['"])(https?:)?(//)?(api-)?js\.datadome\.co(/[^'"]*)?(['"])"#) + .expect("DataDome URL rewrite regex should compile"); + + pattern + .replace_all(content, |caps: ®ex::Captures| { + let open_quote = &caps[1]; + let path = caps.get(5).map_or("", |m| m.as_str()); + let close_quote = &caps[6]; + + // Rewrite to root-relative first-party paths + // The path already includes the leading slash if present + if path.is_empty() { + // Bare domain reference: "js.datadome.co" or "api-js.datadome.co" + format!("{}/integrations/datadome{}", open_quote, close_quote) + } else { + // Domain with path: "js.datadome.co/js/check" or "api-js.datadome.co/js/check" + format!( + "{}/integrations/datadome{}{}", + open_quote, path, close_quote + ) + } + }) + .into_owned() + } + + /// Build target URL for proxying SDK requests to `DataDome` (js.datadome.co). + fn build_sdk_url(&self, path: &str, query: Option<&str>) -> String { + let base = self.config.sdk_origin.trim_end_matches('/'); + match query { + Some(q) => format!("{}{}?{}", base, path, q), + None => format!("{}{}", base, path), + } + } + + /// Build target URL for proxying API requests to `DataDome` (api-js.datadome.co). + fn build_api_url(&self, path: &str, query: Option<&str>) -> String { + let base = self.config.api_origin.trim_end_matches('/'); + match query { + Some(q) => format!("{}{}?{}", base, path, q), + None => format!("{}{}", base, path), + } + } + + /// Extract the host from a URL for use in the Host header. + fn extract_host(url: &str) -> &str { + url.trim_start_matches("https://") + .trim_start_matches("http://") + .split('/') + .next() + .unwrap_or("api-js.datadome.co") + } + + /// Handle the /tags.js endpoint - fetch and rewrite the `DataDome` SDK. + async fn handle_tags_js( + &self, + _settings: &Settings, + req: Request, + ) -> Result> { + let target_url = self.build_sdk_url("/tags.js", req.get_query_str()); + + log::info!("[datadome] Fetching tags.js from {}", target_url); + + let backend = + ensure_backend_from_url(&target_url).change_context(Self::error("Invalid SDK URL"))?; + + let sdk_host = Self::extract_host(&self.config.sdk_origin); + + let mut backend_req = Request::new(Method::GET, &target_url); + backend_req.set_header(header::HOST, sdk_host); + backend_req.set_header(header::ACCEPT, "application/javascript, */*"); + + // Copy relevant headers from original request + if let Some(ua) = req.get_header(header::USER_AGENT) { + backend_req.set_header(header::USER_AGENT, ua); + } + + let mut backend_resp = backend_req + .send(&backend) + .change_context(Self::error("Failed to fetch tags.js from DataDome"))?; + + if backend_resp.get_status() != StatusCode::OK { + log::warn!( + "[datadome] tags.js fetch returned status {}", + backend_resp.get_status() + ); + return Ok(backend_resp); + } + + // Read and rewrite the script content + let body = backend_resp.take_body_str(); + let rewritten = self.rewrite_script_content(&body); + + // Build response with caching headers + let mut response = Response::new(); + response.set_status(StatusCode::OK); + response.set_header( + header::CONTENT_TYPE, + "application/javascript; charset=utf-8", + ); + response.set_header( + header::CACHE_CONTROL, + format!("public, max-age={}", self.config.cache_ttl_seconds), + ); + + // Copy CORS headers if present + if let Some(cors) = backend_resp.get_header(header::ACCESS_CONTROL_ALLOW_ORIGIN) { + response.set_header(header::ACCESS_CONTROL_ALLOW_ORIGIN, cors); + } + + response.set_body(rewritten); + Ok(response) + } + + /// Handle the /js/* signal collection endpoint - proxy pass-through to api-js.datadome.co. + async fn handle_js_api( + &self, + _settings: &Settings, + req: Request, + ) -> Result> { + let original_path = req.get_path(); + + // Strip our prefix to get the DataDome path + let datadome_path = original_path + .strip_prefix("/integrations/datadome") + .unwrap_or(original_path); + + // Use api_origin (api-js.datadome.co) for signal collection requests + let target_url = self.build_api_url(datadome_path, req.get_query_str()); + let api_host = Self::extract_host(&self.config.api_origin); + + log::info!( + "[datadome] Proxying signal request to {} (method: {}, host: {})", + target_url, + req.get_method(), + api_host + ); + + let backend = + ensure_backend_from_url(&target_url).change_context(Self::error("Invalid API URL"))?; + + let mut backend_req = Request::new(req.get_method().clone(), &target_url); + backend_req.set_header(header::HOST, api_host); + + // Copy relevant headers + let headers_to_copy = [ + header::USER_AGENT, + header::ACCEPT, + header::ACCEPT_LANGUAGE, + header::ACCEPT_ENCODING, + header::CONTENT_TYPE, + header::CONTENT_LENGTH, + header::ORIGIN, + header::REFERER, + ]; + + for h in &headers_to_copy { + if let Some(value) = req.get_header(h) { + backend_req.set_header(h, value); + } + } + + // Copy body for POST/PUT requests + if req.get_method() == Method::POST || req.get_method() == Method::PUT { + let body = req.into_body(); + backend_req.set_body(body); + } + + let backend_resp = backend_req + .send(&backend) + .change_context(Self::error("Failed to proxy signal request to DataDome"))?; + + log::info!( + "[datadome] Signal request returned status {}", + backend_resp.get_status() + ); + + Ok(backend_resp) + } + + /// Extract the path portion after the `DataDome` domain from a URL. + /// + /// Returns the path (including leading slash) or `/tags.js` as default. + fn extract_datadome_path(url: &str) -> &str { + url.split_once("js.datadome.co") + .and_then(|(_, after)| { + if after.starts_with('/') { + Some(after) + } else { + None + } + }) + .unwrap_or("/tags.js") + } +} + +#[async_trait(?Send)] +impl IntegrationProxy for DataDomeIntegration { + fn integration_name(&self) -> &'static str { + DATADOME_INTEGRATION_ID + } + + fn routes(&self) -> Vec { + vec![ + // SDK script endpoint + self.get("/tags.js"), + // Signal collection API - all methods + // Need both exact /js/ and wildcard /js/* since matchit's {*rest} requires content + self.get("/js/"), + self.get("/js/*"), + self.post("/js/"), + self.post("/js/*"), + ] + } + + async fn handle( + &self, + settings: &Settings, + req: Request, + ) -> Result> { + let path = req.get_path(); + + if path == "/integrations/datadome/tags.js" { + self.handle_tags_js(settings, req).await + } else if path.starts_with("/integrations/datadome/js/") { + self.handle_js_api(settings, req).await + } else { + Err(Report::new(Self::error(format!( + "Unknown DataDome route: {}", + path + )))) + } + } +} + +impl IntegrationAttributeRewriter for DataDomeIntegration { + fn integration_id(&self) -> &'static str { + DATADOME_INTEGRATION_ID + } + + fn handles_attribute(&self, attribute: &str) -> bool { + self.config.rewrite_sdk && matches!(attribute, "src" | "href") + } + + fn rewrite( + &self, + _attr_name: &str, + attr_value: &str, + ctx: &IntegrationAttributeContext<'_>, + ) -> AttributeRewriteAction { + // Check if this is a DataDome script URL + let is_datadome = + attr_value.contains("js.datadome.co") || attr_value.contains("datadome.co/tags.js"); + + if !is_datadome { + return AttributeRewriteAction::Keep; + } + + let path = Self::extract_datadome_path(attr_value); + let new_url = format!( + "{}://{}/integrations/datadome{}", + ctx.request_scheme, ctx.request_host, path + ); + + log::info!( + "[datadome] Rewriting script src from {} to {}", + attr_value, + new_url + ); + + AttributeRewriteAction::Replace(new_url) + } +} + +fn build(settings: &Settings) -> Option> { + let config = match settings.integration_config::(DATADOME_INTEGRATION_ID) { + Ok(Some(config)) => config, + Ok(None) => { + log::debug!("[datadome] Integration disabled or not configured"); + return None; + } + Err(err) => { + log::error!("[datadome] Failed to load integration config: {err:?}"); + return None; + } + }; + + log::info!( + "[datadome] Registering integration (sdk_origin: {}, rewrite_sdk: {})", + config.sdk_origin, + config.rewrite_sdk + ); + + Some(DataDomeIntegration::new(config)) +} + +/// Register the `DataDome` integration with Trusted Server. +#[must_use] +pub fn register(settings: &Settings) -> Option { + let integration = build(settings)?; + + Some( + IntegrationRegistration::builder(DATADOME_INTEGRATION_ID) + .with_proxy(integration.clone()) + .with_attribute_rewriter(integration) + .build(), + ) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn test_config() -> DataDomeConfig { + DataDomeConfig { + enabled: true, + js_key: Some("test-key".to_string()), + sdk_origin: "https://js.datadome.co".to_string(), + api_origin: "https://api-js.datadome.co".to_string(), + cache_ttl_seconds: 3600, + rewrite_sdk: true, + } + } + + #[test] + fn rewrite_script_content() { + let integration = DataDomeIntegration::new(test_config()); + + let original = r#" + var endpoint = "js.datadome.co/js/"; + var endpoint2 = "https://js.datadome.co/js/endpoint"; + var host = "js.datadome.co"; + "#; + + let rewritten = integration.rewrite_script_content(original); + + // All URLs should be rewritten to root-relative /integrations/datadome/... + assert!( + rewritten.contains("\"/integrations/datadome/js/\""), + "Bare domain with path should be rewritten to root-relative. Got: {}", + rewritten + ); + assert!( + rewritten.contains("\"/integrations/datadome/js/endpoint\""), + "Absolute URL should be rewritten to root-relative. Got: {}", + rewritten + ); + assert!( + rewritten.contains("\"/integrations/datadome\""), + "Bare domain should be rewritten to root-relative. Got: {}", + rewritten + ); + // Original domain should not appear + assert!( + !rewritten.contains("js.datadome.co"), + "Original domain should be replaced. Got: {}", + rewritten + ); + } + + #[test] + fn rewrite_script_content_all_url_formats() { + let integration = DataDomeIntegration::new(test_config()); + + // Test all URL format variations + let original = r#" + var a = "js.datadome.co/js/check"; + var b = 'js.datadome.co/js/check'; + var c = "//js.datadome.co/js/check"; + var d = '//js.datadome.co/js/check'; + var e = "https://js.datadome.co/js/check"; + var f = 'https://js.datadome.co/js/check'; + var g = "http://js.datadome.co/js/check"; + var h = "js.datadome.co"; + var i = 'js.datadome.co'; + "#; + + let rewritten = integration.rewrite_script_content(original); + + // Check each format is rewritten correctly to root-relative paths + assert!(rewritten.contains(r#"var a = "/integrations/datadome/js/check""#)); + assert!(rewritten.contains(r#"var b = '/integrations/datadome/js/check'"#)); + assert!(rewritten.contains(r#"var c = "/integrations/datadome/js/check""#)); + assert!(rewritten.contains(r#"var d = '/integrations/datadome/js/check'"#)); + assert!(rewritten.contains(r#"var e = "/integrations/datadome/js/check""#)); + assert!(rewritten.contains(r#"var f = '/integrations/datadome/js/check'"#)); + assert!(rewritten.contains(r#"var g = "/integrations/datadome/js/check""#)); + assert!(rewritten.contains(r#"var h = "/integrations/datadome""#)); + assert!(rewritten.contains(r#"var i = '/integrations/datadome'"#)); + + // No original domain should remain + assert!(!rewritten.contains("js.datadome.co")); + } + + #[test] + fn rewrite_script_content_preserves_non_datadome_urls() { + let integration = DataDomeIntegration::new(test_config()); + + let original = r#" + var other = "https://example.com/some/path"; + var datadome = "https://js.datadome.co/js/check"; + var text = "This mentions js.datadome.co in text"; + "#; + + let rewritten = integration.rewrite_script_content(original); + + // Non-DataDome URLs should be preserved + assert!(rewritten.contains(r#""https://example.com/some/path""#)); + // DataDome URL should be rewritten to root-relative path + assert!(rewritten.contains(r#""/integrations/datadome/js/check""#)); + // Plain text mention (not in quotes as URL) should be preserved + // The regex only matches quoted strings, so inline text is untouched + assert!(rewritten.contains("mentions js.datadome.co in text")); + } + + #[test] + fn rewrite_script_content_api_js_subdomain() { + let integration = DataDomeIntegration::new(test_config()); + + // Test api-js.datadome.co URLs (signal collection API) + let original = r#" + var apiEndpoint = "https://api-js.datadome.co/js/"; + var apiCheck = "api-js.datadome.co/js/check"; + var apiProtocolRelative = "//api-js.datadome.co/js/signal"; + var sdkUrl = "https://js.datadome.co/tags.js"; + "#; + + let rewritten = integration.rewrite_script_content(original); + + // api-js.datadome.co URLs should be rewritten to root-relative paths + assert!( + rewritten.contains(r#""/integrations/datadome/js/""#), + "Absolute api-js URL should be rewritten. Got: {}", + rewritten + ); + assert!( + rewritten.contains(r#""/integrations/datadome/js/check""#), + "Bare api-js URL should be rewritten. Got: {}", + rewritten + ); + assert!( + rewritten.contains(r#""/integrations/datadome/js/signal""#), + "Protocol-relative api-js URL should be rewritten. Got: {}", + rewritten + ); + // js.datadome.co should also be rewritten + assert!( + rewritten.contains(r#""/integrations/datadome/tags.js""#), + "SDK URL should be rewritten. Got: {}", + rewritten + ); + + // No original DataDome domains should remain + assert!( + !rewritten.contains("api-js.datadome.co"), + "api-js.datadome.co should be replaced. Got: {}", + rewritten + ); + assert!( + !rewritten.contains("js.datadome.co"), + "js.datadome.co should be replaced. Got: {}", + rewritten + ); + } + + #[test] + fn build_sdk_url() { + let integration = DataDomeIntegration::new(test_config()); + + assert_eq!( + integration.build_sdk_url("/tags.js", None), + "https://js.datadome.co/tags.js" + ); + + assert_eq!( + integration.build_sdk_url("/tags.js", Some("key=abc")), + "https://js.datadome.co/tags.js?key=abc" + ); + } + + #[test] + fn build_api_url() { + let integration = DataDomeIntegration::new(test_config()); + + assert_eq!( + integration.build_api_url("/js/check", None), + "https://api-js.datadome.co/js/check" + ); + + assert_eq!( + integration.build_api_url("/js/check", Some("foo=bar")), + "https://api-js.datadome.co/js/check?foo=bar" + ); + } + + #[test] + fn extract_host() { + assert_eq!( + DataDomeIntegration::extract_host("https://api-js.datadome.co"), + "api-js.datadome.co" + ); + assert_eq!( + DataDomeIntegration::extract_host("https://js.datadome.co/path"), + "js.datadome.co" + ); + assert_eq!( + DataDomeIntegration::extract_host("http://example.com:8080/path"), + "example.com:8080" + ); + } + + #[test] + fn extract_datadome_path() { + assert_eq!( + DataDomeIntegration::extract_datadome_path("https://js.datadome.co/tags.js"), + "/tags.js" + ); + assert_eq!( + DataDomeIntegration::extract_datadome_path("//js.datadome.co/js/check"), + "/js/check" + ); + assert_eq!( + DataDomeIntegration::extract_datadome_path("js.datadome.co/js/signal"), + "/js/signal" + ); + // Bare domain without path should default to /tags.js + assert_eq!( + DataDomeIntegration::extract_datadome_path("https://js.datadome.co"), + "/tags.js" + ); + // api-js subdomain + assert_eq!( + DataDomeIntegration::extract_datadome_path("https://api-js.datadome.co/js/"), + "/js/" + ); + } + + #[test] + fn attribute_rewriter_matches_datadome() { + let integration = DataDomeIntegration::new(test_config()); + + // Should handle both src and href attributes + assert!(integration.handles_attribute("src")); + assert!(integration.handles_attribute("href")); + assert!(!integration.handles_attribute("data-src")); + + let ctx = IntegrationAttributeContext { + attribute_name: "src", + request_host: "publisher.com", + request_scheme: "https", + origin_host: "origin.publisher.com", + }; + + // Should rewrite DataDome URLs in src + let action = integration.rewrite("src", "https://js.datadome.co/tags.js", &ctx); + match action { + AttributeRewriteAction::Replace(new_url) => { + assert_eq!( + new_url, + "https://publisher.com/integrations/datadome/tags.js" + ); + } + _ => panic!("Expected Replace action"), + } + + // Should rewrite DataDome URLs in href (for link preload/prefetch) + let action = integration.rewrite("href", "https://js.datadome.co/tags.js", &ctx); + match action { + AttributeRewriteAction::Replace(new_url) => { + assert_eq!( + new_url, + "https://publisher.com/integrations/datadome/tags.js" + ); + } + _ => panic!("Expected Replace action for href"), + } + + // Should not rewrite other URLs + let action = integration.rewrite("src", "https://example.com/script.js", &ctx); + assert!(matches!(action, AttributeRewriteAction::Keep)); + } + + #[test] + fn attribute_rewriter_preserves_path() { + let integration = DataDomeIntegration::new(test_config()); + + let ctx = IntegrationAttributeContext { + attribute_name: "src", + request_host: "publisher.com", + request_scheme: "https", + origin_host: "origin.publisher.com", + }; + + // Should preserve /js/... paths for signal collection API + let action = integration.rewrite("src", "https://js.datadome.co/js/check", &ctx); + match action { + AttributeRewriteAction::Replace(new_url) => { + assert_eq!( + new_url, + "https://publisher.com/integrations/datadome/js/check" + ); + } + _ => panic!("Expected Replace action"), + } + + // Should handle protocol-relative URLs + let action = integration.rewrite("href", "//js.datadome.co/js/signal", &ctx); + match action { + AttributeRewriteAction::Replace(new_url) => { + assert_eq!( + new_url, + "https://publisher.com/integrations/datadome/js/signal" + ); + } + _ => panic!("Expected Replace action for protocol-relative URL"), + } + + // Bare domain without path should default to /tags.js + let action = integration.rewrite("src", "https://js.datadome.co", &ctx); + match action { + AttributeRewriteAction::Replace(new_url) => { + assert_eq!( + new_url, + "https://publisher.com/integrations/datadome/tags.js" + ); + } + _ => panic!("Expected Replace action for bare domain"), + } + } +} diff --git a/crates/common/src/integrations/mod.rs b/crates/common/src/integrations/mod.rs index af1b5ea1..83041811 100644 --- a/crates/common/src/integrations/mod.rs +++ b/crates/common/src/integrations/mod.rs @@ -4,6 +4,7 @@ use crate::settings::Settings; pub mod adserver_mock; pub mod aps; +pub mod datadome; pub mod didomi; pub mod lockr; pub mod nextjs; @@ -30,5 +31,6 @@ pub(crate) fn builders() -> &'static [IntegrationBuilder] { permutive::register, lockr::register, didomi::register, + datadome::register, ] } diff --git a/crates/js/lib/src/integrations/datadome/index.ts b/crates/js/lib/src/integrations/datadome/index.ts new file mode 100644 index 00000000..b7dacdeb --- /dev/null +++ b/crates/js/lib/src/integrations/datadome/index.ts @@ -0,0 +1,23 @@ +import { log } from '../../core/log'; + +import { installDataDomeGuard } from './script_guard'; + +/** + * DataDome integration for tsjs + * + * Installs a script guard to intercept dynamically inserted DataDome SDK + * scripts and rewrites them to use the first-party proxy endpoint. + * + * The guard intercepts: + * - Script elements with src containing js.datadome.co + * - Link preload elements for DataDome scripts + * + * URLs are rewritten to preserve the original path: + * - https://js.datadome.co/tags.js -> /integrations/datadome/tags.js + * - https://js.datadome.co/js/check -> /integrations/datadome/js/check + */ + +if (typeof window !== 'undefined') { + installDataDomeGuard(); + log.info('DataDome integration initialized'); +} diff --git a/crates/js/lib/src/integrations/datadome/script_guard.ts b/crates/js/lib/src/integrations/datadome/script_guard.ts new file mode 100644 index 00000000..ed5cd55a --- /dev/null +++ b/crates/js/lib/src/integrations/datadome/script_guard.ts @@ -0,0 +1,185 @@ +import { log } from '../../core/log'; + +/** + * DataDome SDK Script Interception Guard + * + * Intercepts any dynamically inserted script tag that loads the DataDome SDK + * and rewrites it to use the first-party domain proxy endpoint. This works + * across all frameworks (Next.js, Nuxt, Gatsby, vanilla JS, etc.) and catches + * scripts inserted via appendChild, insertBefore, or any other dynamic DOM + * manipulation. + * + * Unlike Lockr/Permutive guards that use a fixed proxy path, the DataDome guard + * preserves the original path from the DataDome URL (e.g., /tags.js, /js/check) + * in the rewritten first-party URL. + */ + +let installed = false; +const GUARD_NAME = 'DataDome'; + +/** + * Check if a URL is a DataDome SDK URL. + * Matches URLs where js.datadome.co is the host (not just a substring) + */ +function isDataDomeSdkUrl(url: string): boolean { + if (!url) return false; + + const lower = url.toLowerCase(); + + // Must match js.datadome.co as a domain, not as part of a filename + // Valid patterns: + // - https://js.datadome.co/... + // - //js.datadome.co/... + // - js.datadome.co/... (bare domain) + // Invalid: + // - https://cdn.example.com/js.datadome.co.js (domain is not js.datadome.co) + return ( + lower.includes('://js.datadome.co/') || + (lower.includes('://js.datadome.co') && lower.endsWith('js.datadome.co')) || + lower.startsWith('//js.datadome.co/') || + (lower.startsWith('//js.datadome.co') && lower === '//js.datadome.co') || + lower.startsWith('js.datadome.co/') || + lower === 'js.datadome.co' + ); +} + +/** + * Extract the path from a DataDome URL to preserve it in the rewrite. + * e.g., "https://js.datadome.co/tags.js" -> "/tags.js" + * "https://js.datadome.co/js/check" -> "/js/check" + */ +function extractDataDomePath(url: string): string { + try { + // Handle protocol-relative URLs + let normalizedUrl = url; + if (url.startsWith('//')) { + normalizedUrl = 'https:' + url; + } else if (!url.startsWith('http')) { + normalizedUrl = 'https://' + url; + } + + const parsed = new URL(normalizedUrl); + // Return pathname + search (query string) if present + return parsed.pathname + parsed.search; + } catch { + // Fallback: try to extract path after js.datadome.co + const match = url.match(/js\.datadome\.co(\/[^'"]*)?/i); + return match?.[1] || '/tags.js'; + } +} + +/** + * Build a first-party URL from the current page origin and the DataDome path. + */ +function rewriteDataDomeUrl(originalUrl: string): string { + const protocol = window.location.protocol === 'https:' ? 'https' : 'http'; + const host = window.location.host; + const path = extractDataDomePath(originalUrl); + + return `${protocol}://${host}/integrations/datadome${path}`; +} + +/** + * Check and rewrite a node if it's a DataDome script or preload link. + */ +function rewriteIfDataDome(node: Node): void { + if (!node || !(node instanceof HTMLElement)) { + return; + } + + // Script elements + if (node.tagName === 'SCRIPT') { + const script = node as HTMLScriptElement; + const src = script.src || script.getAttribute('src'); + if (src && isDataDomeSdkUrl(src)) { + const rewritten = rewriteDataDomeUrl(src); + log.info(`${GUARD_NAME} guard: rewriting dynamically inserted SDK script`, { + original: src, + rewritten, + }); + script.src = rewritten; + script.setAttribute('src', rewritten); + } + return; + } + + // Link preload/prefetch elements + if (node.tagName === 'LINK') { + const link = node as HTMLLinkElement; + const rel = link.getAttribute('rel'); + // Handle both preload and prefetch links for scripts + if ((rel !== 'preload' && rel !== 'prefetch') || link.getAttribute('as') !== 'script') { + return; + } + const href = link.href || link.getAttribute('href'); + if (href && isDataDomeSdkUrl(href)) { + const rewritten = rewriteDataDomeUrl(href); + log.info(`${GUARD_NAME} guard: rewriting SDK ${rel} link`, { + original: href, + rewritten, + }); + link.href = rewritten; + link.setAttribute('href', rewritten); + } + } +} + +/** + * Install the DataDome guard to intercept dynamic script loading. + * Patches Element.prototype.appendChild and insertBefore to catch + * ANY dynamically inserted DataDome SDK script elements and rewrite their URLs + * before insertion. Works across all frameworks and vanilla JavaScript. + * + * Unlike the base script guard, this preserves the original path from the + * DataDome URL (e.g., /tags.js, /js/check) in the rewritten URL. + */ +export function installDataDomeGuard(): void { + if (installed) { + log.debug(`${GUARD_NAME} guard: already installed, skipping`); + return; + } + + if (typeof window === 'undefined' || typeof Element === 'undefined') { + log.debug(`${GUARD_NAME} guard: not in browser environment, skipping`); + return; + } + + log.info(`${GUARD_NAME} guard: installing DOM interception for SDK`); + + const originalAppendChild = Element.prototype.appendChild; + const originalInsertBefore = Element.prototype.insertBefore; + + Element.prototype.appendChild = function (this: Element, node: T): T { + rewriteIfDataDome(node); + return originalAppendChild.call(this, node) as T; + }; + + Element.prototype.insertBefore = function ( + this: Element, + node: T, + reference: Node | null + ): T { + rewriteIfDataDome(node); + return originalInsertBefore.call(this, node, reference) as T; + }; + + installed = true; + log.info(`${GUARD_NAME} guard: DOM interception installed successfully`); +} + +/** + * Check if the guard is currently installed. + */ +export function isGuardInstalled(): boolean { + return installed; +} + +/** + * Reset the guard installation state (primarily for testing). + */ +export function resetGuardState(): void { + installed = false; +} + +// Export for testing +export { isDataDomeSdkUrl, extractDataDomePath, rewriteDataDomeUrl }; diff --git a/crates/js/lib/test/integrations/datadome/script_guard.test.ts b/crates/js/lib/test/integrations/datadome/script_guard.test.ts new file mode 100644 index 00000000..696f0977 --- /dev/null +++ b/crates/js/lib/test/integrations/datadome/script_guard.test.ts @@ -0,0 +1,523 @@ +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import { + installDataDomeGuard, + isGuardInstalled, + resetGuardState, + isDataDomeSdkUrl, + extractDataDomePath, + rewriteDataDomeUrl, +} from '../../../src/integrations/datadome/script_guard'; + +describe('DataDome SDK Script Interception Guard', () => { + let originalAppendChild: typeof Element.prototype.appendChild; + let originalInsertBefore: typeof Element.prototype.insertBefore; + + beforeEach(() => { + // Store original methods + originalAppendChild = Element.prototype.appendChild; + originalInsertBefore = Element.prototype.insertBefore; + + // Reset guard state before each test + resetGuardState(); + }); + + afterEach(() => { + // Restore original methods + Element.prototype.appendChild = originalAppendChild; + Element.prototype.insertBefore = originalInsertBefore; + + // Reset guard state after each test + resetGuardState(); + }); + + describe('isDataDomeSdkUrl', () => { + it('should detect js.datadome.co URLs', () => { + expect(isDataDomeSdkUrl('https://js.datadome.co/tags.js')).toBe(true); + expect(isDataDomeSdkUrl('https://js.datadome.co/js/check')).toBe(true); + expect(isDataDomeSdkUrl('//js.datadome.co/tags.js')).toBe(true); + expect(isDataDomeSdkUrl('http://js.datadome.co/tags.js')).toBe(true); + }); + + it('should be case-insensitive', () => { + expect(isDataDomeSdkUrl('https://JS.DATADOME.CO/tags.js')).toBe(true); + expect(isDataDomeSdkUrl('https://Js.DataDome.Co/tags.js')).toBe(true); + }); + + it('should not match other datadome subdomains', () => { + expect(isDataDomeSdkUrl('https://api.datadome.co/check')).toBe(false); + expect(isDataDomeSdkUrl('https://datadome.co/tags.js')).toBe(false); + }); + + it('should not match non-datadome URLs', () => { + expect(isDataDomeSdkUrl('https://example.com/tags.js')).toBe(false); + expect(isDataDomeSdkUrl('https://cdn.example.com/js.datadome.co.js')).toBe(false); + }); + + it('should handle empty and null values', () => { + expect(isDataDomeSdkUrl('')).toBe(false); + expect(isDataDomeSdkUrl(null as unknown as string)).toBe(false); + expect(isDataDomeSdkUrl(undefined as unknown as string)).toBe(false); + }); + }); + + describe('extractDataDomePath', () => { + it('should extract path from absolute URLs', () => { + expect(extractDataDomePath('https://js.datadome.co/tags.js')).toBe('/tags.js'); + expect(extractDataDomePath('https://js.datadome.co/js/check')).toBe('/js/check'); + expect(extractDataDomePath('http://js.datadome.co/js/foo/bar')).toBe('/js/foo/bar'); + }); + + it('should extract path from protocol-relative URLs', () => { + expect(extractDataDomePath('//js.datadome.co/tags.js')).toBe('/tags.js'); + expect(extractDataDomePath('//js.datadome.co/js/check')).toBe('/js/check'); + }); + + it('should preserve query strings', () => { + expect(extractDataDomePath('https://js.datadome.co/tags.js?key=abc')).toBe( + '/tags.js?key=abc' + ); + expect(extractDataDomePath('https://js.datadome.co/js/check?foo=bar&baz=qux')).toBe( + '/js/check?foo=bar&baz=qux' + ); + }); + + it('should handle bare domain', () => { + expect(extractDataDomePath('https://js.datadome.co')).toBe('/'); + expect(extractDataDomePath('https://js.datadome.co/')).toBe('/'); + }); + }); + + describe('rewriteDataDomeUrl', () => { + it('should rewrite to first-party URL with path preserved', () => { + const rewritten = rewriteDataDomeUrl('https://js.datadome.co/tags.js'); + expect(rewritten).toContain('/integrations/datadome/tags.js'); + expect(rewritten).toContain(window.location.host); + }); + + it('should preserve the js/ path', () => { + const rewritten = rewriteDataDomeUrl('https://js.datadome.co/js/check'); + expect(rewritten).toContain('/integrations/datadome/js/check'); + }); + + it('should preserve query strings', () => { + const rewritten = rewriteDataDomeUrl('https://js.datadome.co/tags.js?key=abc'); + expect(rewritten).toContain('/integrations/datadome/tags.js?key=abc'); + }); + }); + + describe('installDataDomeGuard', () => { + it('should install the guard successfully', () => { + expect(isGuardInstalled()).toBe(false); + + installDataDomeGuard(); + + expect(isGuardInstalled()).toBe(true); + }); + + it('should not install twice', () => { + installDataDomeGuard(); + const firstInstall = Element.prototype.appendChild; + + installDataDomeGuard(); + const secondInstall = Element.prototype.appendChild; + + // Should be the same reference (no double patching) + expect(firstInstall).toBe(secondInstall); + }); + + it('should patch Element.prototype.appendChild', () => { + installDataDomeGuard(); + + expect(Element.prototype.appendChild).not.toBe(originalAppendChild); + }); + + it('should patch Element.prototype.insertBefore', () => { + installDataDomeGuard(); + + expect(Element.prototype.insertBefore).not.toBe(originalInsertBefore); + }); + }); + + describe('appendChild interception', () => { + it('should rewrite DataDome SDK URL', () => { + installDataDomeGuard(); + + const container = document.createElement('div'); + const script = document.createElement('script'); + script.src = 'https://js.datadome.co/tags.js'; + + container.appendChild(script); + + expect(script.src).toContain('/integrations/datadome/tags.js'); + expect(script.src).not.toContain('js.datadome.co'); + }); + + it('should preserve path when rewriting', () => { + installDataDomeGuard(); + + const container = document.createElement('div'); + const script = document.createElement('script'); + script.src = 'https://js.datadome.co/js/check'; + + container.appendChild(script); + + expect(script.src).toContain('/integrations/datadome/js/check'); + }); + + it('should use location.host for rewritten URL', () => { + installDataDomeGuard(); + + const container = document.createElement('div'); + const script = document.createElement('script'); + script.src = 'https://js.datadome.co/tags.js'; + + container.appendChild(script); + + expect(script.src).toContain(window.location.host); + expect(script.src).toMatch(/^https?:\/\//); + }); + + it('should not rewrite non-DataDome scripts', () => { + installDataDomeGuard(); + + const container = document.createElement('div'); + const script = document.createElement('script'); + script.src = 'https://example.com/some-script.js'; + + container.appendChild(script); + + expect(script.src).toBe('https://example.com/some-script.js'); + }); + + it('should handle scripts with setAttribute', () => { + installDataDomeGuard(); + + const container = document.createElement('div'); + const script = document.createElement('script'); + script.setAttribute('src', 'https://js.datadome.co/tags.js'); + + container.appendChild(script); + + expect(script.getAttribute('src')).toContain('/integrations/datadome/tags.js'); + }); + + it('should not affect non-script elements', () => { + installDataDomeGuard(); + + const container = document.createElement('div'); + const img = document.createElement('img'); + img.src = 'https://js.datadome.co/image.png'; + + container.appendChild(img); + + expect(img.src).toBe('https://js.datadome.co/image.png'); + }); + + it('should preserve other script attributes', () => { + installDataDomeGuard(); + + const container = document.createElement('div'); + const script = document.createElement('script'); + script.setAttribute('async', ''); + script.setAttribute('crossorigin', 'anonymous'); + script.setAttribute('id', 'datadome-sdk'); + script.src = 'https://js.datadome.co/tags.js'; + + container.appendChild(script); + + expect(script.getAttribute('async')).toBe(''); + expect(script.getAttribute('crossorigin')).toBe('anonymous'); + expect(script.getAttribute('id')).toBe('datadome-sdk'); + }); + }); + + describe('insertBefore interception', () => { + it('should rewrite DataDome SDK URL', () => { + installDataDomeGuard(); + + const container = document.createElement('div'); + const reference = document.createElement('div'); + container.appendChild(reference); + + const script = document.createElement('script'); + script.src = 'https://js.datadome.co/tags.js'; + + container.insertBefore(script, reference); + + expect(script.src).toContain('/integrations/datadome/tags.js'); + expect(script.src).not.toContain('js.datadome.co'); + }); + + it('should not rewrite non-DataDome scripts', () => { + installDataDomeGuard(); + + const container = document.createElement('div'); + const reference = document.createElement('div'); + container.appendChild(reference); + + const script = document.createElement('script'); + script.src = 'https://example.com/some-script.js'; + + container.insertBefore(script, reference); + + expect(script.src).toBe('https://example.com/some-script.js'); + }); + + it('should work with null reference node', () => { + installDataDomeGuard(); + + const container = document.createElement('div'); + const script = document.createElement('script'); + script.src = 'https://js.datadome.co/tags.js'; + + container.insertBefore(script, null); + + expect(script.src).toContain('/integrations/datadome/tags.js'); + }); + }); + + describe('link preload interception', () => { + it('should rewrite DataDome SDK preload link', () => { + installDataDomeGuard(); + + const container = document.createElement('div'); + const link = document.createElement('link'); + link.setAttribute('rel', 'preload'); + link.setAttribute('as', 'script'); + link.href = 'https://js.datadome.co/tags.js'; + + container.appendChild(link); + + expect(link.href).toContain('/integrations/datadome/tags.js'); + expect(link.href).not.toContain('js.datadome.co'); + }); + + it('should use location.host for rewritten preload URL', () => { + installDataDomeGuard(); + + const container = document.createElement('div'); + const link = document.createElement('link'); + link.setAttribute('rel', 'preload'); + link.setAttribute('as', 'script'); + link.href = 'https://js.datadome.co/tags.js'; + + container.appendChild(link); + + expect(link.href).toContain(window.location.host); + expect(link.href).toMatch(/^https?:\/\//); + }); + + it('should not rewrite preload links without as="script"', () => { + installDataDomeGuard(); + + const container = document.createElement('div'); + const link = document.createElement('link'); + link.setAttribute('rel', 'preload'); + link.setAttribute('as', 'style'); + link.href = 'https://js.datadome.co/tags.js'; + + container.appendChild(link); + + expect(link.href).toBe('https://js.datadome.co/tags.js'); + }); + + it('should not rewrite links without rel="preload"', () => { + installDataDomeGuard(); + + const container = document.createElement('div'); + const link = document.createElement('link'); + link.setAttribute('rel', 'stylesheet'); + link.setAttribute('as', 'script'); + link.href = 'https://js.datadome.co/tags.js'; + + container.appendChild(link); + + expect(link.href).toBe('https://js.datadome.co/tags.js'); + }); + + it('should not rewrite non-DataDome preload links', () => { + installDataDomeGuard(); + + const container = document.createElement('div'); + const link = document.createElement('link'); + link.setAttribute('rel', 'preload'); + link.setAttribute('as', 'script'); + link.href = 'https://example.com/other-script.js'; + + container.appendChild(link); + + expect(link.href).toBe('https://example.com/other-script.js'); + }); + + it('should work with insertBefore for preload links', () => { + installDataDomeGuard(); + + const container = document.createElement('div'); + const reference = document.createElement('div'); + container.appendChild(reference); + + const link = document.createElement('link'); + link.setAttribute('rel', 'preload'); + link.setAttribute('as', 'script'); + link.href = 'https://js.datadome.co/tags.js'; + + container.insertBefore(link, reference); + + expect(link.href).toContain('/integrations/datadome/tags.js'); + }); + + it('should handle preload link with setAttribute', () => { + installDataDomeGuard(); + + const container = document.createElement('div'); + const link = document.createElement('link'); + link.setAttribute('rel', 'preload'); + link.setAttribute('as', 'script'); + link.setAttribute('href', 'https://js.datadome.co/tags.js'); + + container.appendChild(link); + + expect(link.getAttribute('href')).toContain('/integrations/datadome/tags.js'); + }); + + it('should preserve other link attributes', () => { + installDataDomeGuard(); + + const container = document.createElement('div'); + const link = document.createElement('link'); + link.setAttribute('rel', 'preload'); + link.setAttribute('as', 'script'); + link.setAttribute('crossorigin', 'anonymous'); + link.setAttribute('id', 'datadome-preload'); + link.href = 'https://js.datadome.co/tags.js'; + + container.appendChild(link); + + expect(link.getAttribute('rel')).toBe('preload'); + expect(link.getAttribute('as')).toBe('script'); + expect(link.getAttribute('crossorigin')).toBe('anonymous'); + expect(link.getAttribute('id')).toBe('datadome-preload'); + }); + }); + + describe('link prefetch interception', () => { + it('should rewrite DataDome SDK prefetch link', () => { + installDataDomeGuard(); + + const container = document.createElement('div'); + const link = document.createElement('link'); + link.setAttribute('rel', 'prefetch'); + link.setAttribute('as', 'script'); + link.href = 'https://js.datadome.co/tags.js'; + + container.appendChild(link); + + expect(link.href).toContain('/integrations/datadome/tags.js'); + expect(link.href).not.toContain('js.datadome.co'); + }); + + it('should not rewrite prefetch links without as="script"', () => { + installDataDomeGuard(); + + const container = document.createElement('div'); + const link = document.createElement('link'); + link.setAttribute('rel', 'prefetch'); + link.setAttribute('as', 'style'); + link.href = 'https://js.datadome.co/tags.js'; + + container.appendChild(link); + + expect(link.href).toBe('https://js.datadome.co/tags.js'); + }); + + it('should not rewrite non-DataDome prefetch links', () => { + installDataDomeGuard(); + + const container = document.createElement('div'); + const link = document.createElement('link'); + link.setAttribute('rel', 'prefetch'); + link.setAttribute('as', 'script'); + link.href = 'https://example.com/other-script.js'; + + container.appendChild(link); + + expect(link.href).toBe('https://example.com/other-script.js'); + }); + + it('should work with insertBefore for prefetch links', () => { + installDataDomeGuard(); + + const container = document.createElement('div'); + const reference = document.createElement('div'); + container.appendChild(reference); + + const link = document.createElement('link'); + link.setAttribute('rel', 'prefetch'); + link.setAttribute('as', 'script'); + link.href = 'https://js.datadome.co/tags.js'; + + container.insertBefore(link, reference); + + expect(link.href).toContain('/integrations/datadome/tags.js'); + }); + }); + + describe('integration scenarios', () => { + it('should handle multiple script insertions', () => { + installDataDomeGuard(); + + const container = document.createElement('div'); + + const script1 = document.createElement('script'); + script1.src = 'https://js.datadome.co/tags.js'; + + const script2 = document.createElement('script'); + script2.src = 'https://example.com/other.js'; + + container.appendChild(script1); + container.appendChild(script2); + + expect(script1.src).toContain('/integrations/datadome/tags.js'); + expect(script2.src).toBe('https://example.com/other.js'); + }); + + it('should handle both script and preload link together', () => { + installDataDomeGuard(); + + const container = document.createElement('div'); + + // Add preload link first (typical framework behavior) + const link = document.createElement('link'); + link.setAttribute('rel', 'preload'); + link.setAttribute('as', 'script'); + link.href = 'https://js.datadome.co/tags.js'; + + // Add script tag + const script = document.createElement('script'); + script.src = 'https://js.datadome.co/tags.js'; + + container.appendChild(link); + container.appendChild(script); + + expect(link.href).toContain('/integrations/datadome/tags.js'); + expect(script.src).toContain('/integrations/datadome/tags.js'); + expect(link.href).toBe(script.src); // Should be the same URL + }); + + it('should work with vanilla JavaScript script insertion pattern', () => { + installDataDomeGuard(); + + const container = document.createElement('div'); + const script = document.createElement('script'); + script.type = 'text/javascript'; + script.async = true; + script.src = 'https://js.datadome.co/tags.js'; + + container.appendChild(script); + + expect(script.src).toContain('/integrations/datadome/tags.js'); + expect(script.type).toBe('text/javascript'); + expect(script.async).toBe(true); + }); + }); +}); diff --git a/docs/.vitepress/config.mts b/docs/.vitepress/config.mts index b4a473e1..6b7e9396 100644 --- a/docs/.vitepress/config.mts +++ b/docs/.vitepress/config.mts @@ -162,6 +162,12 @@ export default withMermaid( text: 'Framework Support', items: [{ text: 'Next.js', link: '/guide/integrations/nextjs' }], }, + { + text: 'Security', + items: [ + { text: 'DataDome', link: '/guide/integrations/datadome' }, + ], + }, ], }, ], diff --git a/docs/guide/integrations/datadome.md b/docs/guide/integrations/datadome.md new file mode 100644 index 00000000..252f2816 --- /dev/null +++ b/docs/guide/integrations/datadome.md @@ -0,0 +1,170 @@ +# DataDome Integration + +DataDome provides bot protection and fraud prevention for websites. This integration enables first-party delivery of DataDome's JavaScript tag and signal collection through Trusted Server, eliminating the need for DNS/CNAME configuration. + +## Overview + +The DataDome integration: + +- Proxies `tags.js` SDK through your first-party domain +- Rewrites internal DataDome URLs to route through Trusted Server +- Proxies signal collection API (`/js/*`) through first-party context +- Automatically rewrites ` + +``` + +If `rewrite_sdk` is enabled, Trusted Server will automatically rewrite any existing DataDome script tags in your HTML: + +```html + + + + + +``` + +## Endpoints + +The integration exposes the following routes: + +| Method | Path | Description | +| ---------- | -------------------------------- | --------------------- | +| `GET` | `/integrations/datadome/tags.js` | DataDome SDK script | +| `GET/POST` | `/integrations/datadome/js/*` | Signal collection API | + +## How It Works + +```mermaid +sequenceDiagram + participant Browser + participant TS as Trusted Server + participant SDK as js.datadome.co + participant API as api-js.datadome.co + + Browser->>TS: GET /integrations/datadome/tags.js + TS->>SDK: GET /tags.js + SDK-->>TS: JavaScript SDK + Note over TS: Rewrite internal URLs + TS-->>Browser: Modified SDK (first-party URLs) + + Browser->>TS: POST /integrations/datadome/js/ + TS->>API: POST /js/ + API-->>TS: Response + TS-->>Browser: Response +``` + +### Request Flow + +1. **SDK Loading**: Browser requests `/integrations/datadome/tags.js` +2. **Proxy & Rewrite**: Trusted Server fetches from `js.datadome.co`, rewrites internal URLs to first-party paths +3. **Signal Collection**: SDK sends signals to `/integrations/datadome/js/` +4. **Transparent Proxy**: Trusted Server forwards to `api-js.datadome.co`, returns response + +## Environment Variables + +Override configuration via environment variables: + +```bash +TRUSTED_SERVER__INTEGRATIONS__DATADOME__ENABLED=true +TRUSTED_SERVER__INTEGRATIONS__DATADOME__SDK_ORIGIN=https://js.datadome.co +TRUSTED_SERVER__INTEGRATIONS__DATADOME__API_ORIGIN=https://api-js.datadome.co +TRUSTED_SERVER__INTEGRATIONS__DATADOME__CACHE_TTL_SECONDS=3600 +TRUSTED_SERVER__INTEGRATIONS__DATADOME__REWRITE_SDK=true +``` + +## Client-Side Script Guard + +For single-page applications (SPAs) and frameworks like Next.js that dynamically insert script tags, the integration includes a client-side guard. When the `datadome` module is included in your tsjs bundle, it automatically intercepts dynamically inserted DataDome scripts and rewrites them to use first-party paths. + +The guard handles: + +- `