From 754c84875ab0c43ddfd750a1a5ef19a07b6982a2 Mon Sep 17 00:00:00 2001 From: Alex Mikhalev Date: Tue, 17 Mar 2026 16:24:29 +0100 Subject: [PATCH] Phase A: Critical security fixes for PR #426 Security hardening for terraphim_rlm crate: 1. Created validation.rs module with: - validate_snapshot_name(): Prevents path traversal attacks - validate_code_input(): Enforces MAX_CODE_SIZE (1MB) limit - validate_session_id(): Validates UUID format - validate_recursion_depth(): Prevents stack overflow - Security constants: MAX_CODE_SIZE, MAX_INPUT_SIZE, MAX_RECURSION_DEPTH 2. Fixed race condition in firecracker.rs: - Changed snapshot counter from read-then-write to atomic write lock - Added validate_snapshot_name() call before snapshot creation - Prevents TOCTOU vulnerability where concurrent snapshots could exceed limit 3. Enhanced mcp_tools.rs: - Added MAX_CODE_SIZE validation for rlm_code tool - Added MAX_CODE_SIZE validation for rlm_bash tool - Returns proper MCP error format for validation failures Refs #426 --- .../terraphim_rlm/src/executor/firecracker.rs | 15 +- crates/terraphim_rlm/src/lib.rs | 1 + crates/terraphim_rlm/src/mcp_tools.rs | 16 + crates/terraphim_rlm/src/validation.rs | 377 ++++++++++++++++++ 4 files changed, 405 insertions(+), 4 deletions(-) create mode 100644 crates/terraphim_rlm/src/validation.rs diff --git a/crates/terraphim_rlm/src/executor/firecracker.rs b/crates/terraphim_rlm/src/executor/firecracker.rs index 638da7c43..111faf6c4 100644 --- a/crates/terraphim_rlm/src/executor/firecracker.rs +++ b/crates/terraphim_rlm/src/executor/firecracker.rs @@ -447,8 +447,13 @@ impl super::ExecutionEnvironment for FirecrackerExecutor { ) -> Result { log::info!("Creating snapshot '{}' for session {}", name, session_id); - // Check snapshot limit for this session - let count = *self.snapshot_counts.read().get(session_id).unwrap_or(&0); + // Validate snapshot name for security (path traversal prevention) + crate::validation::validate_snapshot_name(name)?; + + // Check snapshot limit for this session - use write lock for atomic check-and-increment + // to prevent race condition where multiple concurrent snapshots could exceed the limit + let mut snapshot_counts = self.snapshot_counts.write(); + let count = *snapshot_counts.get(session_id).unwrap_or(&0); if count >= self.config.max_snapshots_per_session { return Err(RlmError::MaxSnapshotsReached { max: self.config.max_snapshots_per_session, @@ -498,8 +503,10 @@ impl super::ExecutionEnvironment for FirecrackerExecutor { } }; - // Update tracking - *self.snapshot_counts.write().entry(*session_id).or_insert(0) += 1; + // Update tracking - use the existing write lock for atomic increment + *snapshot_counts.entry(*session_id).or_insert(0) += 1; + // Release the write lock by dropping it explicitly before await boundary + drop(snapshot_counts); let result = SnapshotId::new(name, *session_id); diff --git a/crates/terraphim_rlm/src/lib.rs b/crates/terraphim_rlm/src/lib.rs index de1943448..03a7165d9 100644 --- a/crates/terraphim_rlm/src/lib.rs +++ b/crates/terraphim_rlm/src/lib.rs @@ -70,6 +70,7 @@ pub mod logger; // Knowledge graph validation (Phase 5) #[cfg(feature = "kg-validation")] pub mod validator; +pub mod validation; // MCP tools (Phase 6) #[cfg(feature = "mcp")] diff --git a/crates/terraphim_rlm/src/mcp_tools.rs b/crates/terraphim_rlm/src/mcp_tools.rs index 65c067f01..a0393b5e6 100644 --- a/crates/terraphim_rlm/src/mcp_tools.rs +++ b/crates/terraphim_rlm/src/mcp_tools.rs @@ -328,6 +328,14 @@ impl RlmMcpService { .and_then(|v| v.as_str()) .ok_or_else(|| ErrorData::invalid_params("Missing 'code' parameter", None))?; + // Validate code size to prevent DoS via memory exhaustion + if let Err(e) = crate::validation::validate_code_input(code) { + return Err(ErrorData::invalid_params( + format!("Code validation failed: {}", e), + None, + )); + } + let session_id = self.resolve_session_id(&args).await?; // timeout_ms is available for future use when execution context supports it let _timeout_ms = args.get("timeout_ms").and_then(|v| v.as_u64()); @@ -371,6 +379,14 @@ impl RlmMcpService { .and_then(|v| v.as_str()) .ok_or_else(|| ErrorData::invalid_params("Missing 'command' parameter", None))?; + // Validate command size to prevent DoS via memory exhaustion + if let Err(e) = crate::validation::validate_code_input(command) { + return Err(ErrorData::invalid_params( + format!("Command validation failed: {}", e), + None, + )); + } + let session_id = self.resolve_session_id(&args).await?; // These are available for future use when execution context supports them let _timeout_ms = args.get("timeout_ms").and_then(|v| v.as_u64()); diff --git a/crates/terraphim_rlm/src/validation.rs b/crates/terraphim_rlm/src/validation.rs new file mode 100644 index 000000000..4fc53bad8 --- /dev/null +++ b/crates/terraphim_rlm/src/validation.rs @@ -0,0 +1,377 @@ +//! Input validation module for RLM security. +//! +//! This module provides security-focused validation functions for: +//! - Snapshot names (path traversal prevention) +//! - Code input size limits (DoS prevention) +//! - Session ID format validation + +use crate::error::{RlmError, RlmResult}; +use crate::types::SessionId; + +/// Maximum code size (1MB = 1,048,576 bytes) to prevent DoS via memory exhaustion. +pub const MAX_CODE_SIZE: usize = 1_048_576; + +/// Maximum input size for general inputs (10MB) to prevent memory exhaustion. +pub const MAX_INPUT_SIZE: usize = 10_485_760; + +/// Maximum recursion depth for nested operations. +pub const MAX_RECURSION_DEPTH: u32 = 50; + +/// Maximum snapshot name length. +pub const MAX_SNAPSHOT_NAME_LENGTH: usize = 256; + +/// Validates a snapshot name for security. +/// +/// # Security Considerations +/// +/// Rejects names that could be used for path traversal attacks: +/// - Contains `..` (parent directory reference) +/// - Contains `/` or `\` (path separators) +/// - Contains null bytes +/// - Empty names +/// - Names exceeding MAX_SNAPSHOT_NAME_LENGTH +/// +/// # Arguments +/// +/// * `name` - The snapshot name to validate +/// +/// # Returns +/// +/// * `Ok(())` if the name is valid +/// * `Err(RlmError)` if the name is invalid +/// +/// # Examples +/// +/// ``` +/// use terraphim_rlm::validation::validate_snapshot_name; +/// +/// assert!(validate_snapshot_name("valid-snapshot").is_ok()); +/// assert!(validate_snapshot_name("snapshot-v1.2.3").is_ok()); +/// assert!(validate_snapshot_name("../etc/passwd").is_err()); // Path traversal +/// assert!(validate_snapshot_name("snap/name").is_err()); // Path separator +/// ``` +pub fn validate_snapshot_name(name: &str) -> RlmResult<()> { + // Check for empty name + if name.is_empty() { + return Err(RlmError::ConfigError { + message: "Snapshot name cannot be empty".to_string(), + }); + } + + // Check maximum length + if name.len() > MAX_SNAPSHOT_NAME_LENGTH { + return Err(RlmError::ConfigError { + message: format!( + "Snapshot name too long: {} bytes (max {})", + name.len(), + MAX_SNAPSHOT_NAME_LENGTH + ), + }); + } + + // Check for path traversal patterns + if name.contains("..") { + return Err(RlmError::ConfigError { + message: format!("Snapshot name contains path traversal pattern: {}", name), + }); + } + + // Check for path separators + if name.contains('/') || name.contains('\\') { + return Err(RlmError::ConfigError { + message: format!("Snapshot name contains path separator: {}", name), + }); + } + + // Check for null bytes + if name.contains('\0') { + return Err(RlmError::ConfigError { + message: "Snapshot name contains null byte".to_string(), + }); + } + + Ok(()) +} + +/// Validates code input size to prevent DoS via memory exhaustion. +/// +/// # Security Considerations +/// +/// Enforces MAX_CODE_SIZE limit on code inputs to prevent: +/// - Memory exhaustion attacks +/// - Excessive VM startup time due to large code volumes +/// - Storage exhaustion from large snapshots +/// +/// # Arguments +/// +/// * `code` - The code input to validate +/// +/// # Returns +/// +/// * `Ok(())` if the code size is within limits +/// * `Err(RlmError)` if the code exceeds MAX_CODE_SIZE +/// +/// # Examples +/// +/// ``` +/// use terraphim_rlm::validation::{validate_code_input, MAX_CODE_SIZE}; +/// +/// let valid_code = "print('hello')"; +/// assert!(validate_code_input(valid_code).is_ok()); +/// +/// let huge_code = "x".repeat(MAX_CODE_SIZE + 1); +/// assert!(validate_code_input(&huge_code).is_err()); +/// ``` +pub fn validate_code_input(code: &str) -> RlmResult<()> { + let size = code.len(); + if size > MAX_CODE_SIZE { + return Err(RlmError::ConfigError { + message: format!( + "Code size {} bytes exceeds maximum of {} bytes", + size, MAX_CODE_SIZE + ), + }); + } + Ok(()) +} + +/// Validates general input size. +/// +/// Use this for non-code inputs that still need size limits. +/// +/// # Arguments +/// +/// * `input` - The input to validate +/// +/// # Returns +/// +/// * `Ok(())` if the input size is within limits +/// * `Err(RlmError)` if the input exceeds MAX_INPUT_SIZE +pub fn validate_input_size(input: &str) -> RlmResult<()> { + let size = input.len(); + if size > MAX_INPUT_SIZE { + return Err(RlmError::ConfigError { + message: format!( + "Input size {} bytes exceeds maximum of {} bytes", + size, MAX_INPUT_SIZE + ), + }); + } + Ok(()) +} + +/// Validates a session ID string format. +/// +/// # Security Considerations +/// +/// Ensures session IDs are valid UUIDs to prevent: +/// - Session fixation attacks with malformed IDs +/// - Injection of special characters into storage systems +/// - Information disclosure via error messages +/// +/// # Arguments +/// +/// * `session_id` - The session ID string to validate +/// +/// # Returns +/// +/// * `Ok(SessionId)` if the ID is a valid UUID +/// * `Err(RlmError)` if the ID format is invalid +/// +/// # Examples +/// +/// ``` +/// use terraphim_rlm::validation::validate_session_id; +/// +/// // Valid UUID +/// let result = validate_session_id("550e8400-e29b-41d4-a716-446655440000"); +/// assert!(result.is_ok()); +/// +/// // Invalid formats +/// assert!(validate_session_id("not-a-uuid").is_err()); +/// assert!(validate_session_id("").is_err()); +/// assert!(validate_session_id("../etc/passwd").is_err()); +/// ``` +pub fn validate_session_id(session_id: &str) -> RlmResult { + SessionId::from_string(session_id).map_err(|_| RlmError::InvalidSessionToken { + token: session_id.to_string(), + }) +} + +/// Validates recursion depth to prevent stack overflow. +/// +/// # Arguments +/// +/// * `depth` - Current recursion depth +/// +/// # Returns +/// +/// * `Ok(())` if depth is within limits +/// * `Err(RlmError)` if depth exceeds MAX_RECURSION_DEPTH +pub fn validate_recursion_depth(depth: u32) -> RlmResult<()> { + if depth > MAX_RECURSION_DEPTH { + return Err(RlmError::RecursionDepthExceeded { + depth, + max_depth: MAX_RECURSION_DEPTH, + }); + } + Ok(()) +} + +/// Combined validation for code execution requests. +/// +/// Validates both the session ID and code input in one call. +/// +/// # Arguments +/// +/// * `session_id` - The session ID string +/// * `code` - The code to execute +/// +/// # Returns +/// +/// * `Ok((SessionId, &str))` if both are valid +/// * `Err(RlmError)` if either validation fails +pub fn validate_execution_request<'a>( + session_id: &str, + code: &'a str, +) -> RlmResult<(SessionId, &'a str)> { + let sid = validate_session_id(session_id)?; + validate_code_input(code)?; + Ok((sid, code)) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_validate_snapshot_name_valid() { + assert!(validate_snapshot_name("valid-snapshot").is_ok()); + assert!(validate_snapshot_name("snapshot-v1.2.3").is_ok()); + assert!(validate_snapshot_name("base").is_ok()); + assert!(validate_snapshot_name("a").is_ok()); + assert!(validate_snapshot_name("snapshot_with_underscores").is_ok()); + assert!(validate_snapshot_name("snapshot-with-dashes").is_ok()); + assert!(validate_snapshot_name("123numeric-start").is_ok()); + } + + #[test] + fn test_validate_snapshot_name_path_traversal() { + assert!(validate_snapshot_name("../etc/passwd").is_err()); + assert!(validate_snapshot_name("..\\windows\\system32").is_err()); + assert!(validate_snapshot_name("snapshot/../../../etc/passwd").is_err()); + assert!(validate_snapshot_name("..").is_err()); + assert!(validate_snapshot_name("...").is_err()); + } + + #[test] + fn test_validate_snapshot_name_path_separators() { + assert!(validate_snapshot_name("snap/name").is_err()); + assert!(validate_snapshot_name("snap\\name").is_err()); + assert!(validate_snapshot_name("/etc/passwd").is_err()); + assert!(validate_snapshot_name("C:\\Windows").is_err()); + } + + #[test] + fn test_validate_snapshot_name_null_bytes() { + assert!(validate_snapshot_name("snap\0name").is_err()); + assert!(validate_snapshot_name("\0").is_err()); + assert!(validate_snapshot_name("snapshot\0\0").is_err()); + } + + #[test] + fn test_validate_snapshot_name_empty() { + assert!(validate_snapshot_name("").is_err()); + } + + #[test] + fn test_validate_snapshot_name_too_long() { + let long_name = "a".repeat(MAX_SNAPSHOT_NAME_LENGTH + 1); + assert!(validate_snapshot_name(&long_name).is_err()); + } + + #[test] + fn test_validate_snapshot_name_max_length() { + let max_name = "a".repeat(MAX_SNAPSHOT_NAME_LENGTH); + assert!(validate_snapshot_name(&max_name).is_ok()); + } + + #[test] + fn test_validate_code_input_valid() { + assert!(validate_code_input("print('hello')").is_ok()); + assert!(validate_code_input("").is_ok()); + assert!(validate_code_input(&"x".repeat(MAX_CODE_SIZE)).is_ok()); + } + + #[test] + fn test_validate_code_input_too_large() { + let huge_code = "x".repeat(MAX_CODE_SIZE + 1); + assert!(validate_code_input(&huge_code).is_err()); + } + + #[test] + fn test_validate_input_size_valid() { + assert!(validate_input_size("small input").is_ok()); + assert!(validate_input_size(&"x".repeat(MAX_INPUT_SIZE)).is_ok()); + } + + #[test] + fn test_validate_input_size_too_large() { + let huge_input = "x".repeat(MAX_INPUT_SIZE + 1); + assert!(validate_input_size(&huge_input).is_err()); + } + + #[test] + fn test_validate_session_id_valid() { + let valid_uuid = "550e8400-e29b-41d4-a716-446655440000"; + assert!(validate_session_id(valid_uuid).is_ok()); + } + + #[test] + fn test_validate_session_id_invalid() { + assert!(validate_session_id("not-a-uuid").is_err()); + assert!(validate_session_id("").is_err()); + assert!(validate_session_id("../etc/passwd").is_err()); + assert!(validate_session_id("short").is_err()); + assert!(validate_session_id("550e8400-e29b-41d4-a716-44665544000").is_err()); // Too short + assert!(validate_session_id("550e8400-e29b-41d4-a716-4466554400000").is_err()); + // Too long + } + + #[test] + fn test_validate_recursion_depth_valid() { + assert!(validate_recursion_depth(0).is_ok()); + assert!(validate_recursion_depth(1).is_ok()); + assert!(validate_recursion_depth(MAX_RECURSION_DEPTH).is_ok()); + } + + #[test] + fn test_validate_recursion_depth_exceeded() { + assert!(validate_recursion_depth(MAX_RECURSION_DEPTH + 1).is_err()); + assert!(validate_recursion_depth(u32::MAX).is_err()); + } + + #[test] + fn test_validate_execution_request_valid() { + let session_id = "550e8400-e29b-41d4-a716-446655440000"; + let code = "print('hello')"; + let result = validate_execution_request(session_id, code); + assert!(result.is_ok()); + } + + #[test] + fn test_validate_execution_request_invalid_session() { + let session_id = "invalid-session"; + let code = "print('hello')"; + let result = validate_execution_request(session_id, code); + assert!(result.is_err()); + } + + #[test] + fn test_validate_execution_request_invalid_code() { + let session_id = "550e8400-e29b-41d4-a716-446655440000"; + let code = "x".repeat(MAX_CODE_SIZE + 1); + let result = validate_execution_request(session_id, &code); + assert!(result.is_err()); + } +}