From 2a8252cc2d26e36c4a7b9d83f3e15b5310b997b9 Mon Sep 17 00:00:00 2001 From: csd113 Date: Fri, 20 Mar 2026 18:12:40 -0700 Subject: [PATCH 1/4] bug fixes and performance improvement --- src/config/defaults.rs | 7 +- src/config/loader.rs | 31 +++++ src/config/mod.rs | 15 ++- src/console/dashboard.rs | 22 ++-- src/logging/mod.rs | 9 ++ src/main.rs | 10 +- src/runtime/events.rs | 28 +++-- src/runtime/lifecycle.rs | 126 +++++++++++++------ src/runtime/mod.rs | 14 +++ src/runtime/state.rs | 5 +- src/server/handler.rs | 254 ++++++++++++++++++++++++++++++++------- src/server/mod.rs | 88 ++++++++++++-- src/tor/mod.rs | 77 +++++++++--- 13 files changed, 539 insertions(+), 147 deletions(-) diff --git a/src/config/defaults.rs b/src/config/defaults.rs index dda25ab..cf7cc1d 100644 --- a/src/config/defaults.rs +++ b/src/config/defaults.rs @@ -27,6 +27,10 @@ auto_port_fallback = true # Open the system default browser at http://localhost: on startup. open_browser_on_start = false +# Maximum number of concurrent HTTP connections. Excess connections queue +# at the OS TCP backlog level rather than spawning unbounded tasks. +max_connections = 256 + # ─── [site] ─────────────────────────────────────────────────────────────────── [site] @@ -40,9 +44,6 @@ index_file = "index.html" # Return an HTML file listing for directory requests instead of index_file. enable_directory_listing = false -# Watch the site directory for changes and update console stats automatically. -auto_reload = false - # ─── [tor] ──────────────────────────────────────────────────────────────────── [tor] diff --git a/src/config/loader.rs b/src/config/loader.rs index 09c6631..734ee7d 100644 --- a/src/config/loader.rs +++ b/src/config/loader.rs @@ -36,6 +36,21 @@ fn validate(cfg: &Config) -> Result<()> { if cfg.site.index_file.contains(std::path::MAIN_SEPARATOR) { errors.push("[site] index_file must be a filename only, not a path".into()); } + { + let dir_path = std::path::Path::new(&cfg.site.directory); + if dir_path.is_absolute() { + errors.push("[site] directory must not be an absolute path".into()); + } + if dir_path + .components() + .any(|c| c == std::path::Component::ParentDir) + { + errors.push("[site] directory must not contain '..' components".into()); + } + if cfg.site.directory.contains(std::path::MAIN_SEPARATOR) { + errors.push("[site] directory must be a directory name only, not a path".into()); + } + } // [logging] let valid_levels = ["trace", "debug", "info", "warn", "error"]; @@ -46,6 +61,18 @@ fn validate(cfg: &Config) -> Result<()> { valid_levels.join(", ") )); } + { + let log_path = std::path::Path::new(&cfg.logging.file); + if log_path.is_absolute() { + errors.push("[logging] file must not be an absolute path".into()); + } + if log_path + .components() + .any(|c| c == std::path::Component::ParentDir) + { + errors.push("[logging] file must not contain '..' components".into()); + } + } // [console] if cfg.console.refresh_rate_ms < 100 { @@ -65,6 +92,10 @@ fn validate(cfg: &Config) -> Result<()> { cfg.identity.instance_name.len() )); } + // `char::is_control` covers U+001B (ESC), BEL (\x07), backspace (\x08), + // null (\x00), and all other C0/C1 control characters. This check prevents + // ANSI/VT escape-sequence injection through `instance_name` into the + // terminal dashboard, which renders the value directly in raw mode. if cfg.identity.instance_name.chars().any(char::is_control) { errors.push("[identity] instance_name must not contain control characters".into()); } diff --git a/src/config/mod.rs b/src/config/mod.rs index ba53095..106dbc5 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -8,6 +8,7 @@ pub mod loader; use serde::{Deserialize, Serialize}; #[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] pub struct Config { pub server: ServerConfig, pub site: SiteConfig, @@ -18,19 +19,25 @@ pub struct Config { } #[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] pub struct ServerConfig { pub port: u16, pub bind: String, pub auto_port_fallback: bool, pub open_browser_on_start: bool, + pub max_connections: u32, } #[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] pub struct SiteConfig { pub directory: String, pub index_file: String, pub enable_directory_listing: bool, - pub auto_reload: bool, + // `auto_reload` has been removed: the field was advertised in the default + // config but never implemented. Old config files containing `auto_reload` + // will now be rejected at startup with a clear "unknown field" error, + // prompting the operator to remove the obsolete key (fix 2.6). } /// Controls Tor integration. @@ -39,6 +46,7 @@ pub struct SiteConfig { /// automatically from the binary's data directory — no user configuration /// needed. The only knob is whether Tor is enabled at all. #[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] pub struct TorConfig { /// Master on/off switch. When `false`, Tor is never started and the /// onion address section of the dashboard is hidden. @@ -46,6 +54,7 @@ pub struct TorConfig { } #[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] pub struct LoggingConfig { pub enabled: bool, pub level: String, @@ -53,6 +62,7 @@ pub struct LoggingConfig { } #[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] pub struct ConsoleConfig { pub interactive: bool, pub refresh_rate_ms: u64, @@ -60,6 +70,7 @@ pub struct ConsoleConfig { } #[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] pub struct IdentityConfig { pub instance_name: String, } @@ -72,12 +83,12 @@ impl Default for Config { bind: "127.0.0.1".into(), auto_port_fallback: true, open_browser_on_start: false, + max_connections: 256, }, site: SiteConfig { directory: "site".into(), index_file: "index.html".into(), enable_directory_listing: false, - auto_reload: false, }, tor: TorConfig { enabled: true }, logging: LoggingConfig { diff --git a/src/console/dashboard.rs b/src/console/dashboard.rs index 218e123..0cedb66 100644 --- a/src/console/dashboard.rs +++ b/src/console/dashboard.rs @@ -57,10 +57,7 @@ pub fn render_dashboard(state: &AppState, requests: u64, errors: u64, config: &C TorStatus::Disabled => dim("DISABLED"), TorStatus::Starting => yellow("STARTING — polling for .onion address…"), TorStatus::Ready => green("READY"), - TorStatus::Failed(None) => red("FAILED — see log for details"), - TorStatus::Failed(Some(code)) => { - red(&format!("FAILED (exit {code}) — see log for details")) - } + TorStatus::Failed(reason) => red(&format!("FAILED ({reason}) — see log for details")), }; let _ = writeln!(out, " Tor : {tor_str}\r"); out.push_str("\r\n"); @@ -173,14 +170,11 @@ pub fn render_help() -> String { // ─── Helpers ───────────────────────────────────────────────────────────────── fn strip_timestamp(line: &str) -> &str { - let mut count = 0usize; - for (i, b) in line.bytes().enumerate() { - if b == b']' { - count = count.saturating_add(1); - if count == 2 { - return line[i.saturating_add(1)..].trim_start(); - } - } - } - line + // Split on ']', skip the first two tokens ([level] and [timestamp]), + // return the remainder trimmed. Uses splitn so we stop after the third + // piece and never slice at a non-character boundary. + let mut parts = line.splitn(3, ']'); + parts.next(); // consume "[level" + parts.next(); // consume "[timestamp" + parts.next().map_or(line, str::trim_start) } diff --git a/src/logging/mod.rs b/src/logging/mod.rs index c5d0999..8220292 100644 --- a/src/logging/mod.rs +++ b/src/logging/mod.rs @@ -95,6 +95,15 @@ impl Log for RustHostLogger { } } +/// Flush all buffered log entries to the log file. +/// +/// Invokes `RustHostLogger::flush()`, which acquires the file mutex and calls +/// `File::flush()`. Call this once during shutdown, after the final log entry +/// has been written, to guarantee no lines are lost in the OS page cache. +pub fn flush() { + log::logger().flush(); +} + // ─── Init ──────────────────────────────────────────────────────────────────── /// Initialise the global logger. Must be called once before any `log!` macro. diff --git a/src/main.rs b/src/main.rs index fa08251..d3e2899 100644 --- a/src/main.rs +++ b/src/main.rs @@ -18,10 +18,16 @@ pub type Result> = std::result:: #[tokio::main] async fn main() { + // Register a panic hook so the terminal is always restored, even when a + // panic fires on an async executor thread. + std::panic::set_hook(Box::new(|info| { + console::cleanup(); + eprintln!("\nPanic: {info}"); + })); + if let Err(err) = runtime::lifecycle::run().await { // Best-effort terminal restore in case we crashed inside the console. - let _ = crossterm::terminal::disable_raw_mode(); - let _ = crossterm::execute!(std::io::stdout(), crossterm::cursor::Show); + console::cleanup(); eprintln!("\nFatal error: {err}"); std::process::exit(1); } diff --git a/src/runtime/events.rs b/src/runtime/events.rs index d11520d..acfb339 100644 --- a/src/runtime/events.rs +++ b/src/runtime/events.rs @@ -49,7 +49,20 @@ pub async fn handle( KeyEvent::Reload => { let site_root = data_dir.join(&config.site.directory); - let (count, bytes) = server::scan_site(&site_root); + // 2.2 — scan_site now returns Result and must run on a blocking + // thread (read_dir is not async-safe). + let (count, bytes) = + match tokio::task::spawn_blocking(move || server::scan_site(&site_root)).await { + Ok(Ok(v)) => v, + Ok(Err(e)) => { + log::warn!("Site rescan failed: {e}"); + return Ok(false); + } + Err(e) => { + log::warn!("Site rescan task panicked: {e}"); + return Ok(false); + } + }; { let mut s = state.write().await; s.site_file_count = count; @@ -64,7 +77,8 @@ pub async fn handle( KeyEvent::Open => { let port = state.read().await.actual_port; - open_browser(&format!("http://localhost:{port}")); + // 2.4 — use the canonical definition in crate::runtime + super::open_browser(&format!("http://localhost:{port}")); } KeyEvent::Other => { @@ -77,12 +91,4 @@ pub async fn handle( Ok(false) } - -fn open_browser(url: &str) { - #[cfg(target_os = "macos")] - let _ = std::process::Command::new("open").arg(url).spawn(); - #[cfg(target_os = "windows")] - let _ = std::process::Command::new("explorer").arg(url).spawn(); - #[cfg(not(any(target_os = "macos", target_os = "windows")))] - let _ = std::process::Command::new("xdg-open").arg(url).spawn(); -} +// open_browser removed — canonical definition lives in crate::runtime (mod.rs) (fix 2.4) diff --git a/src/runtime/lifecycle.rs b/src/runtime/lifecycle.rs index 62aad88..4173933 100644 --- a/src/runtime/lifecycle.rs +++ b/src/runtime/lifecycle.rs @@ -10,7 +10,7 @@ use std::{path::PathBuf, sync::Arc, time::Duration}; -use tokio::sync::{mpsc, watch, RwLock}; +use tokio::sync::{mpsc, oneshot, watch, RwLock}; use crate::{ config::{self, Config}, @@ -105,9 +105,22 @@ async fn normal_run() -> Result<()> { } // 5. Scan site directory for initial file stats. + // 2.2 — wrap in spawn_blocking; scan_site now returns Result. { let site_root = data_dir().join(&config.site.directory); - let (count, bytes) = server::scan_site(&site_root); + let scan_root = site_root.clone(); + let (count, bytes) = + match tokio::task::spawn_blocking(move || server::scan_site(&scan_root)).await { + Ok(Ok(v)) => v, + Ok(Err(e)) => { + log::warn!("Could not scan site directory on startup: {e}"); + (0, 0) + } + Err(e) => { + log::warn!("Site scan task panicked on startup: {e}"); + (0, 0) + } + }; let mut s = state.write().await; s.site_file_count = count; s.site_total_bytes = bytes; @@ -123,19 +136,35 @@ async fn normal_run() -> Result<()> { })?; // 8. Start HTTP server task. - spawn_server(&config, &state, &metrics, &shutdown_rx); - - // Wait for the server to bind so actual_port is populated before we pass - // it to Tor. 50 ms is enough for localhost; the auto-fallback logic inside - // server::run handles the real timing. - tokio::time::sleep(Duration::from_millis(50)).await; + let (port_tx, port_rx) = oneshot::channel::(); + // 2.10 — keep the JoinHandle so we can await the server during shutdown drain. + let server_handle = spawn_server(&config, &state, &metrics, &shutdown_rx, port_tx); + + // Wait for the server to signal its bound port via the oneshot channel. + // A 10-second timeout ensures a bind failure doesn't block forever. + let bind_port = match tokio::time::timeout(Duration::from_secs(10), port_rx).await { + Ok(Ok(port)) => port, + Ok(Err(_)) => { + log::error!("Server port channel closed before sending — server failed to bind"); + return Err("Server failed to bind".into()); + } + Err(_) => { + log::error!("Timed out waiting for server to bind"); + return Err("Server bind timeout".into()); + } + }; // 9. Start Tor (if enabled). // tor::init() spawns a Tokio task and returns immediately — never blocks // the async executor. + // 2.10 — pass shutdown_rx so Tor's stream loop exits on clean shutdown. if config.tor.enabled { - let bind_port = state.read().await.actual_port; - tor::init(data_dir(), bind_port, Arc::clone(&state)); + tor::init( + data_dir(), + bind_port, + Arc::clone(&state), + shutdown_rx.clone(), + ); } // 10. Start console UI. @@ -144,7 +173,8 @@ async fn normal_run() -> Result<()> { // 11. Open browser (if configured). if config.server.open_browser_on_start { let port = state.read().await.actual_port; - open_browser(&format!("http://localhost:{port}")); + // 2.4 — use canonical open_browser from crate::runtime + super::open_browser(&format!("http://localhost:{port}")); } // 12. Event dispatch loop. @@ -153,31 +183,38 @@ async fn normal_run() -> Result<()> { // 13. Graceful shutdown. log::info!("Shutting down…"); let _ = shutdown_tx.send(true); - // Drop the Arti TorClient — closes all Tor circuits cleanly. - tor::kill(); - tokio::time::sleep(Duration::from_millis(300)).await; + + // 2.10 — wait for the HTTP server to drain in-flight connections (max 5 s). + // tor::kill() has been removed — Tor exits when its task detects shutdown_rx. + let _ = tokio::time::timeout(Duration::from_secs(5), server_handle).await; + + // 2.11 — write the final log entry, flush to disk, then restore the terminal. + log::info!("RustHost shut down cleanly."); + logging::flush(); console::cleanup(); - log::info!("Goodbye."); Ok(()) } // ─── Helpers ───────────────────────────────────────────────────────────────── +/// Spawn the HTTP server task and return its `JoinHandle` so the shutdown +/// sequence can await the connection drain (fix 2.10). fn spawn_server( config: &Arc, state: &SharedState, metrics: &SharedMetrics, shutdown: &watch::Receiver, -) { + port_tx: oneshot::Sender, +) -> tokio::task::JoinHandle<()> { let cfg = Arc::clone(config); let st = Arc::clone(state); let met = Arc::clone(metrics); let data = data_dir(); let shut = shutdown.clone(); tokio::spawn(async move { - server::run(cfg, st, met, data, shut).await; - }); + server::run(cfg, st, met, data, shut, port_tx).await; + }) } async fn start_console( @@ -208,23 +245,40 @@ async fn event_loop( metrics: &SharedMetrics, ctrlc_rx: &mut mpsc::Receiver<()>, ) -> Result<()> { + // 2.8 — mutable so we can set to None when the channel closes. let mut key_rx = key_rx; loop { + // Build a future that yields the next key, or pends forever once the + // channel closes (avoids repeated None-match after input task death). + // When the channel closes we log once and park this arm (2.8). + let key_fut = async { + if let Some(rx) = key_rx.as_mut() { + rx.recv().await + } else { + // Channel already closed — pend forever so ctrlc_rx can still fire. + std::future::pending().await + } + }; + tokio::select! { - Some(key) = async { - match key_rx.as_mut() { - Some(rx) => rx.recv().await, - None => None, + maybe_key = key_fut => { + if let Some(key) = maybe_key { + let quit = events::handle( + key, + config, + Arc::clone(state), + Arc::clone(metrics), + data_dir(), + ).await?; + if quit { break; } + } else { + // 2.8 — input task exited; disable key arm and warn operator. + log::warn!( + "Console input task exited — keyboard input disabled. \ + Use Ctrl-C to quit." + ); + key_rx = None; // suppress repeated warnings on next select! iteration } - } => { - let quit = events::handle( - key, - config, - Arc::clone(state), - Arc::clone(metrics), - data_dir(), - ).await?; - if quit { break; } } Some(()) = ctrlc_rx.recv() => { break; } } @@ -232,14 +286,8 @@ async fn event_loop( Ok(()) } -fn open_browser(url: &str) { - #[cfg(target_os = "macos")] - let _ = std::process::Command::new("open").arg(url).spawn(); - #[cfg(target_os = "windows")] - let _ = std::process::Command::new("explorer").arg(url).spawn(); - #[cfg(not(any(target_os = "macos", target_os = "windows")))] - let _ = std::process::Command::new("xdg-open").arg(url).spawn(); -} +// open_browser removed from this file — canonical definition is in +// crate::runtime (src/runtime/mod.rs), called via super::open_browser (fix 2.4). // ─── Placeholder HTML ───────────────────────────────────────────────────────── diff --git a/src/runtime/mod.rs b/src/runtime/mod.rs index 35e64bc..46c503e 100644 --- a/src/runtime/mod.rs +++ b/src/runtime/mod.rs @@ -12,3 +12,17 @@ pub mod events; pub mod lifecycle; pub mod state; + +/// Open `url` in the system default browser. +/// +/// Single canonical definition extracted from `lifecycle.rs` and `events.rs` +/// to eliminate the duplicated function (fix 2.4). Any future fix — URL +/// sanitisation, logging, sandboxing — needs to be applied here only. +pub fn open_browser(url: &str) { + #[cfg(target_os = "macos")] + let _ = std::process::Command::new("open").arg(url).spawn(); + #[cfg(target_os = "windows")] + let _ = std::process::Command::new("explorer").arg(url).spawn(); + #[cfg(not(any(target_os = "macos", target_os = "windows")))] + let _ = std::process::Command::new("xdg-open").arg(url).spawn(); +} diff --git a/src/runtime/state.rs b/src/runtime/state.rs index bbefac3..d977971 100644 --- a/src/runtime/state.rs +++ b/src/runtime/state.rs @@ -34,8 +34,9 @@ pub enum TorStatus { Starting, /// `hostname` file read; `.onion` address available in `onion_address`. Ready, - /// Process exited with an error, or hostname polling timed out. - Failed(Option), + /// Tor failed; the inner `String` is a brief human-readable reason + /// (e.g. `"bootstrap failed"`, `"stream ended"`) shown in the dashboard. + Failed(String), } /// Which screen the console is currently showing. diff --git a/src/server/handler.rs b/src/server/handler.rs index b77c532..f88ea51 100644 --- a/src/server/handler.rs +++ b/src/server/handler.rs @@ -13,8 +13,9 @@ use std::{fmt::Write as _, path::Path}; use tokio::{ - io::{AsyncReadExt, AsyncWriteExt}, + io::{AsyncBufReadExt, AsyncWriteExt, BufReader}, net::TcpStream, + time::timeout, }; use super::{fallback, mime}; @@ -24,41 +25,71 @@ use crate::{runtime::state::SharedMetrics, Result}; /// Handle one HTTP connection to completion. pub async fn handle( - mut stream: TcpStream, - site_root: &Path, + stream: TcpStream, + canonical_root: &Path, // 2.3 — pre-canonicalized by server::run index_file: &str, dir_listing: bool, metrics: SharedMetrics, ) -> Result<()> { - let request = read_request(&mut stream).await?; + // 2.1 — wrap in BufReader so read_request uses read_line (one syscall per + // line) rather than reading one byte at a time (up to 8192 syscalls). + let mut reader = BufReader::new(stream); + + // 1.5 — Wrap read_request in a 30-second timeout to prevent slow-loris DoS. + let request = match timeout( + std::time::Duration::from_secs(30), + read_request(&mut reader), + ) + .await + { + Ok(Ok(r)) => r, + Ok(Err(e)) => return Err(e), + Err(_elapsed) => { + // Client held the connection open without completing a request. + log::debug!("Request timeout — sending 408"); + // Recover the stream from the reader for writing. + let mut stream = reader.into_inner(); + write_response( + &mut stream, + 408, + "Request Timeout", + "text/plain", + b"Request Timeout", + false, + ) + .await?; + return Ok(()); + } + }; - let Some(raw_path) = parse_path(&request) else { + // Recover the TcpStream from the BufReader for writing the response. + let mut stream = reader.into_inner(); + + // 1.4 — parse_path now returns (method, path) so we can suppress the body + // on HEAD responses. + let Some((method, raw_path)) = parse_path(&request) else { write_response( &mut stream, 400, "Bad Request", "text/plain", b"Bad Request", + false, ) .await?; metrics.add_error(); return Ok(()); }; + let is_head = method == "HEAD"; + // Strip query string / fragment then percent-decode. let path_only = raw_path.split('?').next().unwrap_or("/"); let decoded = percent_decode(path_only); - match resolve_path(site_root, &decoded, index_file, dir_listing) { + match resolve_path(canonical_root, &decoded, index_file, dir_listing) { Resolved::File(abs_path) => { - if let Ok(bytes) = tokio::fs::read(&abs_path).await { - let ext = abs_path.extension().and_then(|e| e.to_str()).unwrap_or(""); - write_response(&mut stream, 200, "OK", mime::for_extension(ext), &bytes).await?; - metrics.add_request(); - } else { - write_response(&mut stream, 404, "Not Found", "text/plain", b"Not Found").await?; - metrics.add_error(); - } + serve_file(&mut stream, &abs_path, is_head, &metrics).await?; } Resolved::Fallback => { @@ -68,13 +99,22 @@ pub async fn handle( "OK", "text/html; charset=utf-8", fallback::NO_SITE_HTML.as_bytes(), + is_head, ) .await?; metrics.add_request(); } Resolved::Forbidden => { - write_response(&mut stream, 403, "Forbidden", "text/plain", b"Forbidden").await?; + write_response( + &mut stream, + 403, + "Forbidden", + "text/plain", + b"Forbidden", + false, + ) + .await?; metrics.add_error(); } @@ -86,6 +126,7 @@ pub async fn handle( "OK", "text/html; charset=utf-8", html.as_bytes(), + is_head, ) .await?; metrics.add_request(); @@ -95,28 +136,90 @@ pub async fn handle( Ok(()) } +// ─── File serving ───────────────────────────────────────────────────────────── + +/// Open `abs_path`, send headers + streamed body (or headers only for HEAD). +/// +/// Extracted from `handle()` to keep that function under the line-count lint +/// threshold. All logic is unchanged from the inline version. +async fn serve_file( + stream: &mut TcpStream, + abs_path: &std::path::Path, + is_head: bool, + metrics: &SharedMetrics, +) -> Result<()> { + // 1.7 — Stream the file instead of reading it entirely into memory. + if let Ok(mut file) = tokio::fs::File::open(abs_path).await { + let file_len = match file.metadata().await { + Ok(m) => m.len(), + Err(e) => { + log::debug!("Failed to read file metadata: {e}"); + write_response( + stream, + 500, + "Internal Server Error", + "text/plain", + b"Internal Server Error", + false, + ) + .await?; + metrics.add_error(); + return Ok(()); + } + }; + let ext = abs_path.extension().and_then(|e| e.to_str()).unwrap_or(""); + let content_type = mime::for_extension(ext); + write_headers(stream, 200, "OK", content_type, file_len).await?; + if !is_head { + tokio::io::copy(&mut file, stream).await?; + } + stream.flush().await?; + metrics.add_request(); + } else { + write_response(stream, 404, "Not Found", "text/plain", b"Not Found", false).await?; + metrics.add_error(); + } + Ok(()) +} + // ─── Request reading ───────────────────────────────────────────────────────── -async fn read_request(stream: &mut TcpStream) -> Result { - let mut buf = Vec::with_capacity(512); - let mut byte = [0u8; 1]; +/// Read HTTP request headers from a buffered stream, line by line. +/// +/// Uses `read_line()` — a single system call per line regardless of how many +/// bytes arrive — instead of the previous byte-at-a-time loop that issued up +/// to 8 192 `read` syscalls per request (fix 2.1). +/// +/// Stops at the blank line that terminates the HTTP header section +/// (`\r\n` or bare `\n`). Enforces an 8 KiB total limit. +async fn read_request(reader: &mut BufReader) -> Result { + let mut request = String::with_capacity(512); + let mut total = 0usize; loop { - stream.read_exact(&mut byte).await?; - buf.push(byte[0]); - if buf.ends_with(b"\r\n\r\n") { - break; + let mut line = String::new(); + let n = reader.read_line(&mut line).await?; + if n == 0 { + return Err("Connection closed before headers were complete".into()); } - if buf.len() > 8_192 { + total = total.saturating_add(n); + if total > 8_192 { return Err("Request header too large (> 8 KiB)".into()); } + request.push_str(&line); + // Both `\r\n` (CRLF, RFC 7230 §3) and bare `\n` terminate the headers. + if line == "\r\n" || line == "\n" { + break; + } } - Ok(String::from_utf8_lossy(&buf).into_owned()) + Ok(request) } -/// Extract the URL path from `GET /path HTTP/1.1`. -fn parse_path(request: &str) -> Option<&str> { +/// Extract the method and URL path from `GET /path HTTP/1.1`. +/// Returns `(method, path)` or `None` if the request line is malformed or +/// the method is not GET/HEAD. +fn parse_path(request: &str) -> Option<(&str, &str)> { let first = request.lines().next()?; let mut it = first.splitn(3, ' '); let method = it.next()?; @@ -125,7 +228,8 @@ fn parse_path(request: &str) -> Option<&str> { return None; } - it.next() + let path = it.next()?; + Some((method, path)) } // ─── Path resolution ───────────────────────────────────────────────────────── @@ -137,13 +241,16 @@ enum Resolved { DirectoryListing(std::path::PathBuf), } -fn resolve_path(site_root: &Path, url_path: &str, index_file: &str, dir_listing: bool) -> Resolved { +fn resolve_path( + canonical_root: &Path, + url_path: &str, + index_file: &str, + dir_listing: bool, +) -> Resolved { + // 2.3 — `canonical_root` is already resolved by `server::run`; no + // canonicalize() syscall needed here on every request. let relative = url_path.trim_start_matches('/'); - let candidate = site_root.join(relative); - - let Ok(canonical_root) = site_root.canonicalize() else { - return Resolved::Fallback; - }; + let candidate = canonical_root.join(relative); let target = if candidate.is_dir() { let idx = candidate.join(index_file); @@ -162,7 +269,7 @@ fn resolve_path(site_root: &Path, url_path: &str, index_file: &str, dir_listing: return Resolved::Fallback; }; - if !canonical.starts_with(&canonical_root) { + if !canonical.starts_with(canonical_root) { return Resolved::Forbidden; } @@ -171,24 +278,43 @@ fn resolve_path(site_root: &Path, url_path: &str, index_file: &str, dir_listing: // ─── Response writing ──────────────────────────────────────────────────────── +/// Write a complete HTTP response, optionally suppressing the body (for HEAD). +/// +/// The `Content-Length` header always reflects the full body size, even when +/// the body is suppressed, as required by RFC 7231 §4.3.2. async fn write_response( stream: &mut TcpStream, status: u16, reason: &str, content_type: &str, body: &[u8], + suppress_body: bool, +) -> Result<()> { + write_headers(stream, status, reason, content_type, body.len() as u64).await?; + if !suppress_body { + stream.write_all(body).await?; + } + stream.flush().await?; + Ok(()) +} + +/// Write only the response status line and headers, followed by the blank line. +/// Used by the streaming file path (1.7) and internally by `write_response`. +async fn write_headers( + stream: &mut TcpStream, + status: u16, + reason: &str, + content_type: &str, + content_length: u64, ) -> Result<()> { let header = format!( "HTTP/1.1 {status} {reason}\r\n\ Content-Type: {content_type}\r\n\ - Content-Length: {}\r\n\ + Content-Length: {content_length}\r\n\ Connection: close\r\n\ - \r\n", - body.len() + \r\n" ); stream.write_all(header.as_bytes()).await?; - stream.write_all(body).await?; - stream.flush().await?; Ok(()) } @@ -206,16 +332,25 @@ fn build_directory_listing(dir: &Path, url_path: &str) -> String { let base = url_path.trim_end_matches('/'); for name in &names { - let _ = writeln!(items, "
  • {name}
  • "); + // 1.3 — Percent-encode the filename for the href attribute. + let encoded_name = percent_encode_path(name); + // 1.3 — HTML-entity-escape the filename for the visible link text. + let escaped_name = html_escape(name); + let _ = writeln!( + items, + "
  • {escaped_name}
  • " + ); } } + // 1.3 — Also escape url_path when used in page title / heading. + let escaped_path = html_escape(url_path); format!( r#" - Index of {url_path} + Index of {escaped_path} -

    Index of {url_path}

    +

    Index of {escaped_path}

      {items}
    @@ -232,6 +367,41 @@ fn build_directory_listing(dir: &Path, url_path: &str) -> String { ) } +/// HTML-entity-escape a string for safe insertion into HTML content or +/// attribute values. +fn html_escape(s: &str) -> String { + let mut out = String::with_capacity(s.len()); + for ch in s.chars() { + match ch { + '&' => out.push_str("&"), + '<' => out.push_str("<"), + '>' => out.push_str(">"), + '"' => out.push_str("""), + '\'' => out.push_str("'"), + c => out.push(c), + } + } + out +} + +/// Percent-encode a filename component for safe use in a URL path segment. +/// Encodes all bytes that are not unreserved URI characters (RFC 3986). +fn percent_encode_path(s: &str) -> String { + let mut out = String::with_capacity(s.len()); + for byte in s.bytes() { + match byte { + // Unreserved characters: ALPHA / DIGIT / "-" / "." / "_" / "~" + b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'.' | b'_' | b'~' => { + out.push(byte as char); + } + b => { + let _ = write!(out, "%{b:02X}"); + } + } + } + out +} + // ─── Percent decoding ──────────────────────────────────────────────────────── /// Decode percent-encoded characters in a URL path (e.g. `%20` → ` `). diff --git a/src/server/mod.rs b/src/server/mod.rs index 46a3ebc..291069d 100644 --- a/src/server/mod.rs +++ b/src/server/mod.rs @@ -14,9 +14,15 @@ pub mod fallback; pub mod handler; pub mod mime; -use std::{net::TcpListener as StdTcpListener, path::Path, path::PathBuf, sync::Arc}; +use std::{ + net::TcpListener as StdTcpListener, path::Path, path::PathBuf, sync::Arc, time::Duration, +}; -use tokio::{net::TcpListener, sync::watch}; +use tokio::{ + net::TcpListener, + sync::{oneshot, watch, Semaphore}, + task::JoinSet, +}; use crate::{ config::Config, @@ -29,6 +35,7 @@ use crate::{ /// Start the HTTP server. /// /// Binds the port (with optional fallback), updates `SharedState.actual_port`, +/// sends the bound port through `port_tx` so Tor can start without a sleep, /// then accepts connections until the shutdown watch fires. pub async fn run( config: Arc, @@ -36,15 +43,19 @@ pub async fn run( metrics: SharedMetrics, data_dir: PathBuf, mut shutdown: watch::Receiver, + port_tx: oneshot::Sender, ) { let bind_addr = &config.server.bind; let base_port = config.server.port; let fallback = config.server.auto_port_fallback; + let max_conns = config.server.max_connections as usize; let (listener, bound_port) = match bind_with_fallback(bind_addr, base_port, fallback) { Ok(v) => v, Err(e) => { log::error!("Server failed to bind: {e}"); + // port_tx is dropped here, which closes the channel; lifecycle + // will receive an Err from the oneshot receiver. return; } }; @@ -59,22 +70,53 @@ pub async fn run( s.server_running = true; } + // Signal the bound port to lifecycle so Tor can start immediately. + // If the receiver has already gone away, we continue serving anyway. + let _ = port_tx.send(bound_port); + log::info!("HTTP server listening on {bind_addr}:{bound_port}"); let site_root = data_dir.join(&config.site.directory); + // 2.3 — canonicalize once here so resolve_path never calls canonicalize() + // per-request. If the root is missing or inaccessible, fail fast before + // binding any port. + let canonical_root = match site_root.canonicalize() { + Ok(p) => p, + Err(e) => { + log::error!( + "Site root {} cannot be resolved: {e}. Check that [site] directory exists.", + site_root.display() + ); + return; + } + }; let index_file = config.site.index_file.clone(); let dir_list = config.site.enable_directory_listing; + let semaphore = Arc::new(Semaphore::new(max_conns)); + // 2.10 — JoinSet tracks in-flight handler tasks so shutdown can drain them. + let mut join_set: JoinSet<()> = JoinSet::new(); + loop { tokio::select! { result = listener.accept() => { match result { Ok((stream, peer)) => { log::debug!("Connection from {peer}"); - let site = site_root.clone(); + let Ok(permit) = Arc::clone(&semaphore).acquire_owned().await else { + break; // semaphore closed — shutting down + }; + if semaphore.available_permits() == 0 { + log::warn!( + "Connection limit ({max_conns}) reached; \ + further connections will queue" + ); + } + let site = canonical_root.clone(); let idx = index_file.clone(); let met = Arc::clone(&metrics); - tokio::spawn(async move { + join_set.spawn(async move { + let _permit = permit; if let Err(e) = handler::handle( stream, &site, &idx, dir_list, met ).await { @@ -93,7 +135,13 @@ pub async fn run( } state.write().await.server_running = false; - log::info!("HTTP server stopped."); + log::info!("HTTP server stopped accepting; draining in-flight connections…"); + + // 2.10 — wait up to 5 seconds for in-flight handlers to complete so + // responses that were already being written are not truncated mid-stream. + let drain = async { while join_set.join_next().await.is_some() {} }; + let _ = tokio::time::timeout(Duration::from_secs(5), drain).await; + log::info!("HTTP server drained."); } // ─── Port binding ──────────────────────────────────────────────────────────── @@ -136,21 +184,37 @@ fn bind_with_fallback(addr: &str, port: u16, fallback: bool) -> Result<(TcpListe // ─── Site scanner ───────────────────────────────────────────────────────────── -/// Count files and total bytes in the site directory (non-recursive). -pub fn scan_site(site_root: &Path) -> (u32, u64) { +/// Recursively count files and total bytes in `site_root` (BFS traversal). +/// +/// Returns `Err` if any `read_dir` call fails so callers can log a warning +/// instead of silently reporting zeros. +/// +/// **Must be called from a blocking context** (e.g. `tokio::task::spawn_blocking`) +/// because `std::fs::read_dir` is a blocking syscall. +pub fn scan_site(site_root: &Path) -> crate::Result<(u32, u64)> { let mut count = 0u32; let mut bytes = 0u64; - if let Ok(entries) = std::fs::read_dir(site_root) { + let mut queue: std::collections::VecDeque = std::collections::VecDeque::new(); + queue.push_back(site_root.to_path_buf()); + + while let Some(dir) = queue.pop_front() { + let entries = std::fs::read_dir(&dir) + .map_err(|e| format!("Cannot read directory {}: {e}", dir.display()))?; + for entry in entries.flatten() { - if let Ok(meta) = entry.metadata() { - if meta.is_file() { + match entry.metadata() { + Ok(m) if m.is_file() => { count = count.saturating_add(1); - bytes = bytes.saturating_add(meta.len()); + bytes = bytes.saturating_add(m.len()); + } + Ok(m) if m.is_dir() => { + queue.push_back(entry.path()); } + _ => {} } } } - (count, bytes) + Ok((count, bytes)) } diff --git a/src/tor/mod.rs b/src/tor/mod.rs index 948cfcd..befa6b7 100644 --- a/src/tor/mod.rs +++ b/src/tor/mod.rs @@ -37,7 +37,7 @@ use std::path::PathBuf; use arti_client::config::TorClientConfigBuilder; use arti_client::TorClient; use futures::StreamExt; -use tokio::net::TcpStream; +use tokio::{net::TcpStream, sync::watch}; use tor_cell::relaycell::msg::Connected; use tor_hsservice::{config::OnionServiceConfigBuilder, handle_rend_requests, HsId, StreamRequest}; @@ -50,23 +50,26 @@ use crate::runtime::state::{SharedState, TorStatus}; /// Spawns a Tokio task and returns immediately. Tor status and the onion /// address are written into `state` as things progress, exactly as before. /// -/// The signature is intentionally identical to the old subprocess version -/// so `lifecycle.rs` requires zero changes. -pub fn init(data_dir: PathBuf, bind_port: u16, state: SharedState) { +/// `shutdown` is a watch channel whose `true` value triggers a clean exit +/// from the stream-request loop (fix 2.10). +pub fn init( + data_dir: PathBuf, + bind_port: u16, + state: SharedState, + shutdown: watch::Receiver, +) { tokio::spawn(async move { - if let Err(e) = run(data_dir, bind_port, state.clone()).await { + if let Err(e) = run(data_dir, bind_port, state.clone(), shutdown).await { log::error!("Tor: fatal error: {e}"); - set_status(&state, TorStatus::Failed(None)).await; + set_status(&state, TorStatus::Failed(e.to_string())).await; } }); } -/// No-op on shutdown. -/// -/// The `TorClient` is owned by the Tokio task spawned in `init()` and is -/// dropped — closing all Tor circuits — when that task exits as part of the -/// normal Tokio runtime shutdown. Nothing needs to be done explicitly here. -pub const fn kill() {} +// `kill()` has been removed (fix 2.10): the `TorClient` is owned by the task +// spawned in `init()` and is dropped when that task exits, which closes all +// Tor circuits cleanly. Graceful shutdown is now signalled through the +// `shutdown` watch channel passed to `init()`. // ─── Core async logic ───────────────────────────────────────────────────────── @@ -74,6 +77,7 @@ async fn run( data_dir: PathBuf, bind_port: u16, state: SharedState, + mut shutdown: watch::Receiver, ) -> Result<(), Box> { set_status(&state, TorStatus::Starting).await; @@ -158,17 +162,50 @@ async fn run( // each other. Dropping the task naturally closes the Tor circuit. let mut stream_requests = handle_rend_requests(rend_requests); - while let Some(stream_req) = stream_requests.next().await { - let local_addr = format!("127.0.0.1:{bind_port}"); - tokio::spawn(async move { - if let Err(e) = proxy_stream(stream_req, &local_addr).await { - // Downgraded to debug — normal on abrupt disconnects. - log::debug!("Tor: stream closed: {e}"); + let semaphore = std::sync::Arc::new(tokio::sync::Semaphore::new(256)); + + // 2.10 — use select! so a shutdown signal can break the accept loop cleanly, + // instead of blocking indefinitely in stream_requests.next(). + loop { + tokio::select! { + next = stream_requests.next() => { + if let Some(stream_req) = next { + let local_addr = format!("127.0.0.1:{bind_port}"); + let Ok(permit) = std::sync::Arc::clone(&semaphore).acquire_owned().await else { + break; // semaphore closed + }; + tokio::spawn(async move { + let _permit = permit; + if let Err(e) = proxy_stream(stream_req, &local_addr).await { + // Downgraded to debug — normal on abrupt disconnects. + log::debug!("Tor: stream closed: {e}"); + } + }); + } else { + // The onion service stream ended unexpectedly (Tor network + // disruption, Arti internal error, resource exhaustion). + // Flip the dashboard to Failed so the operator sees a clear + // signal rather than a permanently green READY badge. + log::warn!( + "Tor: stream_requests stream ended — onion service is no longer active" + ); + // 2.9 — use Failed(String) with a human-readable reason + set_status(&state, TorStatus::Failed("stream ended".into())).await; + state.write().await.onion_address = None; + return Ok(()); + } + } + _ = shutdown.changed() => { + if *shutdown.borrow() { + log::info!("Tor: shutdown signal received — stopping stream loop"); + break; + } } - }); + } } - log::warn!("Tor: stream_requests stream ended — onion service is no longer active"); + // Clean shutdown: clear the displayed onion address. + state.write().await.onion_address = None; Ok(()) } From 685c5c217673bab870f35e87c1b41597c3a96f1a Mon Sep 17 00:00:00 2001 From: csd113 Date: Fri, 20 Mar 2026 19:31:10 -0700 Subject: [PATCH 2/4] more bug fix --- Cargo.lock | 68 +----------------- Cargo.toml | 24 ++++++- deny.toml | 18 +++++ src/config/defaults.rs | 6 ++ src/config/loader.rs | 35 +++------ src/config/mod.rs | 99 +++++++++++++++++++++++--- src/console/mod.rs | 49 +++++++++---- src/error.rs | 53 ++++++++++++++ src/logging/mod.rs | 55 ++++++++++----- src/main.rs | 9 ++- src/runtime/lifecycle.rs | 148 +++++++++++++++++++++++---------------- src/server/handler.rs | 104 +++++++++++++++++++++------ src/server/mime.rs | 17 ++++- src/server/mod.rs | 75 ++++++++++++-------- 14 files changed, 509 insertions(+), 251 deletions(-) create mode 100644 src/error.rs diff --git a/Cargo.lock b/Cargo.lock index bd9fe3f..1d89169 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -375,15 +375,6 @@ dependencies = [ "generic-array", ] -[[package]] -name = "block2" -version = "0.6.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdeb9d870516001442e364c5220d3574d2da8dc765554b4a617230d33fa58ef5" -dependencies = [ - "objc2", -] - [[package]] name = "bstr" version = "1.12.1" @@ -455,12 +446,6 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" -[[package]] -name = "cfg_aliases" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" - [[package]] name = "chrono" version = "0.4.44" @@ -776,17 +761,6 @@ dependencies = [ "cipher", ] -[[package]] -name = "ctrlc" -version = "3.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0b1fab2ae45819af2d0731d60f2afe17227ebb1a1538a236da84c93e9a60162" -dependencies = [ - "dispatch2", - "nix", - "windows-sys 0.61.2", -] - [[package]] name = "curve25519-dalek" version = "4.1.3" @@ -1082,18 +1056,6 @@ dependencies = [ "windows-sys 0.61.2", ] -[[package]] -name = "dispatch2" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e0e367e4e7da84520dedcac1901e4da967309406d1e51017ae1abfb97adbd38" -dependencies = [ - "bitflags 2.11.0", - "block2", - "libc", - "objc2", -] - [[package]] name = "displaydoc" version = "0.2.5" @@ -2210,18 +2172,6 @@ dependencies = [ "tempfile", ] -[[package]] -name = "nix" -version = "0.31.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d6d0705320c1e6ba1d912b5e37cf18071b6c2e9b7fa8215a1e8a7651966f5d3" -dependencies = [ - "bitflags 2.11.0", - "cfg-if", - "cfg_aliases", - "libc", -] - [[package]] name = "nom" version = "7.1.3" @@ -2366,15 +2316,6 @@ dependencies = [ "syn 2.0.117", ] -[[package]] -name = "objc2" -version = "0.6.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a12a8ed07aefc768292f076dc3ac8c48f3781c8f2d5851dd3d98950e8c5a89f" -dependencies = [ - "objc2-encode", -] - [[package]] name = "objc2-core-foundation" version = "0.3.2" @@ -2384,12 +2325,6 @@ dependencies = [ "bitflags 2.11.0", ] -[[package]] -name = "objc2-encode" -version = "4.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef25abbcd74fb2609453eb695bd2f860d389e457f67dc17cafc8b8cbc89d0c33" - [[package]] name = "objc2-io-kit" version = "0.3.2" @@ -3123,12 +3058,12 @@ dependencies = [ "arti-client", "chrono", "crossterm", - "ctrlc", "data-encoding", "futures", "log", "serde", "sha3", + "thiserror 1.0.69", "tokio", "toml 0.8.23", "tor-cell", @@ -3864,7 +3799,6 @@ dependencies = [ "bytes", "libc", "mio 1.1.1", - "parking_lot", "pin-project-lite", "signal-hook-registry", "socket2", diff --git a/Cargo.toml b/Cargo.toml index 7e01afa..09785aa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,7 +16,20 @@ path = "src/main.rs" [dependencies] # Async runtime — drives the server, console, and all I/O -tokio = { version = "1", features = ["full"] } +# 3.6 — Explicit feature list instead of "full"; removes unused `process` and +# `io-std` components, reducing binary size and compile time. +# Note: open_browser uses std::process::Command (stdlib), not the Tokio +# process feature, so dropping `process` here is safe. +tokio = { version = "1", features = [ + "rt-multi-thread", # multi-threaded work-stealing executor + "net", # TcpListener / TcpStream + "io-util", # AsyncBufReadExt, AsyncWriteExt, copy + "fs", # tokio::fs::File, metadata + "sync", # Mutex, RwLock, Semaphore, oneshot, watch, mpsc + "time", # interval, sleep, timeout + "macros", # #[tokio::main], tokio::select! + "signal", # SIGINT / SIGTERM (used by ctrlc integration) +] } # ─── Arti (in-process Tor) ──────────────────────────────────────────────────── # @@ -46,6 +59,9 @@ futures = "0.3" sha3 = "0.10" data-encoding = "2" +# Typed error enum — derives Display + Error for AppError variants +thiserror = "1" + # Configuration — TOML parsing and typed deserialization serde = { version = "1", features = ["derive"] } toml = "0.8" @@ -59,8 +75,10 @@ crossterm = "0.27" # Timestamps — used in log formatting chrono = { version = "0.4", features = ["clock"] } -# Signal handling — catches SIGINT / SIGTERM for graceful shutdown -ctrlc = "3" +# Signal handling — Ctrl-C and SIGTERM handled via tokio::signal (4.7) +# ctrlc crate removed: tokio's built-in signal support (feature "signal", already +# in the explicit feature list) eliminates the threading conflict between the ctrlc +# crate's OS-level signal handler and Tokio's internal signal infrastructure. [profile.release] opt-level = 3 diff --git a/deny.toml b/deny.toml index daa8e54..f4bf2df 100644 --- a/deny.toml +++ b/deny.toml @@ -77,6 +77,24 @@ skip = [ { name = "windows_x86_64_gnullvm" }, { name = "windows_x86_64_msvc" }, { name = "winnow" }, + # 4.6 — Five additional duplicates present in Cargo.lock but previously + # absent from this list, causing `cargo deny check` to warn on every run. + # + # foldhash 0.1.x / 0.2.x — two generations pulled by hashbrown 0.15 vs 0.16 + # (both via arti's dependency tree; different sub-crates pin different gens) + { name = "foldhash" }, + # hashbrown 0.12 / 0.15 / 0.16 — pulled by indexmap 1.x (0.12), tokio/arti + # (0.15), and the latest arti-client dependency updates (0.16) + { name = "hashbrown" }, + # indexmap 1.x / 2.x — arti-client 0.40 still carries several crates that + # depend on indexmap 1.x while the rest of the tree has migrated to 2.x + { name = "indexmap" }, + # redox_syscall 0.5.x / 0.7.x — pulled by crossterm (0.5) and tokio (0.7) + # via their respective platform abstraction layers for Redox OS + { name = "redox_syscall" }, + # schemars 0.9.x / 1.x — arti-client 0.40 uses schemars 0.9 for config + # schema generation; our direct deps have moved to 1.x + { name = "schemars" }, ] # ─── Advisories ─────────────────────────────────────────────────────────────── diff --git a/src/config/defaults.rs b/src/config/defaults.rs index cf7cc1d..6c09c81 100644 --- a/src/config/defaults.rs +++ b/src/config/defaults.rs @@ -74,6 +74,12 @@ level = "info" # Log file path relative to ./rusthost-data/. file = "logs/rusthost.log" +# When true (default), suppress Info/Debug/Trace records from third-party +# crates (Arti, Tokio, TLS internals) so the log file stays focused on +# application events. Warnings and errors from all crates are always shown. +# Set false to see full dependency tracing (useful for debugging Tor issues). +filter_dependencies = true + # ─── [console] ──────────────────────────────────────────────────────────────── [console] diff --git a/src/config/loader.rs b/src/config/loader.rs index 734ee7d..a02a717 100644 --- a/src/config/loader.rs +++ b/src/config/loader.rs @@ -5,14 +5,14 @@ use std::path::Path; use super::Config; -use crate::Result; +use crate::{AppError, Result}; pub fn load(path: &Path) -> Result { let raw = std::fs::read_to_string(path) - .map_err(|e| format!("Cannot read {}: {e}", path.display()))?; + .map_err(|e| AppError::ConfigLoad(format!("Cannot read {}: {e}", path.display())))?; - let config: Config = - toml::from_str(&raw).map_err(|e| format!("settings.toml is malformed: {e}"))?; + let config: Config = toml::from_str(&raw) + .map_err(|e| AppError::ConfigLoad(format!("settings.toml is malformed: {e}")))?; validate(&config)?; Ok(config) @@ -22,15 +22,9 @@ fn validate(cfg: &Config) -> Result<()> { let mut errors: Vec = Vec::new(); // [server] - if cfg.server.port == 0 { - errors.push("[server] port must be between 1 and 65535".into()); - } - if cfg.server.bind.parse::().is_err() { - errors.push(format!( - "[server] bind = {:?} is not a valid IP address", - cfg.server.bind - )); - } + // port: NonZeroU16 — port 0 is already rejected by serde at parse time (4.2). + // bind: IpAddr — invalid IPs are already rejected by serde at parse time (4.2). + // level: LogLevel — invalid levels are already rejected by serde at parse time (4.2). // [site] if cfg.site.index_file.contains(std::path::MAIN_SEPARATOR) { @@ -53,14 +47,6 @@ fn validate(cfg: &Config) -> Result<()> { } // [logging] - let valid_levels = ["trace", "debug", "info", "warn", "error"]; - if !valid_levels.contains(&cfg.logging.level.as_str()) { - errors.push(format!( - "[logging] level = {:?} is invalid; choose one of: {}", - cfg.logging.level, - valid_levels.join(", ") - )); - } { let log_path = std::path::Path::new(&cfg.logging.file); if log_path.is_absolute() { @@ -103,11 +89,6 @@ fn validate(cfg: &Config) -> Result<()> { if errors.is_empty() { Ok(()) } else { - Err(format!( - "settings.toml has {} error(s):\n • {}", - errors.len(), - errors.join("\n • ") - ) - .into()) + Err(AppError::ConfigValidation(errors)) } } diff --git a/src/config/mod.rs b/src/config/mod.rs index 106dbc5..deb99bb 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -5,7 +5,60 @@ pub mod defaults; pub mod loader; -use serde::{Deserialize, Serialize}; +use std::net::IpAddr; +use std::num::NonZeroU16; + +use log::LevelFilter; +use serde::{Deserialize, Deserializer, Serialize}; + +// ─── Log level ─────────────────────────────────────────────────────────────── + +/// Typed log-level value that serde deserialises directly from the TOML string. +/// +/// Replaces the `level: String` field + the duplicate `parse_level` / +/// validation logic that previously existed in both `loader.rs` and +/// `logging/mod.rs` (fix 4.2). An invalid value (e.g. `level = "verbose"`) +/// is now rejected at parse time with a clear serde error. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "lowercase")] +pub enum LogLevel { + Trace, + Debug, + Info, + Warn, + Error, +} + +impl From for LevelFilter { + fn from(level: LogLevel) -> Self { + match level { + LogLevel::Trace => Self::Trace, + LogLevel::Debug => Self::Debug, + LogLevel::Info => Self::Info, + LogLevel::Warn => Self::Warn, + LogLevel::Error => Self::Error, + } + } +} + +// ─── Serde helpers ──────────────────────────────────────────────────────────── + +/// Deserialise `bind` from a TOML string directly into `IpAddr`. +/// +/// Replaces the post-parse `.parse::()` check in `loader.rs` with a +/// parse-time error so an invalid IP is caught the moment the file is read +/// (fix 4.2). +fn deserialize_ip_addr<'de, D: Deserializer<'de>>(d: D) -> Result { + let s = String::deserialize(d)?; + s.parse().map_err(serde::de::Error::custom) +} + +/// Serialise `IpAddr` back to a string for round-trip TOML serialisation. +fn serialize_ip_addr(addr: &IpAddr, s: S) -> Result { + s.serialize_str(&addr.to_string()) +} + +// ─── Config structs ────────────────────────────────────────────────────────── #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(deny_unknown_fields)] @@ -21,8 +74,18 @@ pub struct Config { #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(deny_unknown_fields)] pub struct ServerConfig { - pub port: u16, - pub bind: String, + /// Non-zero port number. `NonZeroU16` prevents port 0 at the type level: + /// serde rejects a zero value during deserialisation (fix 4.2). + pub port: NonZeroU16, + + /// Network interface to bind to. Parsed from TOML string at load time; + /// an invalid IP address is rejected immediately (fix 4.2). + #[serde( + deserialize_with = "deserialize_ip_addr", + serialize_with = "serialize_ip_addr" + )] + pub bind: IpAddr, + pub auto_port_fallback: bool, pub open_browser_on_start: bool, pub max_connections: u32, @@ -44,7 +107,7 @@ pub struct SiteConfig { /// /// All paths (`tor_data/`, `tor_hidden_service/`, `torrc`) are derived /// automatically from the binary's data directory — no user configuration -/// needed. The only knob is whether Tor is enabled at all. +/// needed. #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(deny_unknown_fields)] pub struct TorConfig { @@ -57,8 +120,23 @@ pub struct TorConfig { #[serde(deny_unknown_fields)] pub struct LoggingConfig { pub enabled: bool, - pub level: String, + + /// Log level, parsed from a lowercase string (`"trace"` … `"error"`). + /// Invalid values are rejected at config-load time (fix 4.2). + pub level: LogLevel, + pub file: String, + + /// When `true` (default), suppress `Info`-and-below records from + /// third-party crates (Arti, Tokio, TLS internals) so the log file stays + /// focused on application events. Warnings and errors from all crates are + /// always passed through. Set `false` for full dependency tracing (fix 4.3). + #[serde(default = "default_true")] + pub filter_dependencies: bool, +} + +const fn default_true() -> bool { + true } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -75,12 +153,16 @@ pub struct IdentityConfig { pub instance_name: String, } +// ─── Default config ────────────────────────────────────────────────────────── + impl Default for Config { fn default() -> Self { Self { server: ServerConfig { - port: 8080, - bind: "127.0.0.1".into(), + port: NonZeroU16::new(8080).unwrap_or(NonZeroU16::MIN), + bind: "127.0.0.1" + .parse() + .unwrap_or(IpAddr::V4(std::net::Ipv4Addr::LOCALHOST)), auto_port_fallback: true, open_browser_on_start: false, max_connections: 256, @@ -93,8 +175,9 @@ impl Default for Config { tor: TorConfig { enabled: true }, logging: LoggingConfig { enabled: true, - level: "info".into(), + level: LogLevel::Info, file: "logs/rusthost.log".into(), + filter_dependencies: true, }, console: ConsoleConfig { interactive: true, diff --git a/src/console/mod.rs b/src/console/mod.rs index 0e60758..edea0cd 100644 --- a/src/console/mod.rs +++ b/src/console/mod.rs @@ -37,7 +37,7 @@ use crate::{ events::KeyEvent, state::{ConsoleMode, SharedMetrics, SharedState}, }, - Result, + AppError, Result, }; // ─── Global raw-mode flag ──────────────────────────────────────────────────── @@ -60,31 +60,36 @@ pub fn start( metrics: SharedMetrics, mut shutdown: watch::Receiver, ) -> Result> { - terminal::enable_raw_mode()?; - execute!(stdout(), terminal::EnterAlternateScreen, cursor::Hide)?; + // 4.1 — map crossterm io errors to AppError::Console. + terminal::enable_raw_mode() + .map_err(|e| AppError::Console(format!("Failed to enable raw mode: {e}")))?; + execute!(stdout(), terminal::EnterAlternateScreen, cursor::Hide) + .map_err(|e| AppError::Console(format!("Failed to enter alternate screen: {e}")))?; RAW_MODE_ACTIVE.store(true, std::sync::atomic::Ordering::SeqCst); execute!( stdout(), terminal::Clear(terminal::ClearType::All), cursor::MoveTo(0, 0) - )?; + ) + .map_err(|e| AppError::Console(format!("Failed to clear screen: {e}")))?; - // ── Key event channel ──────────────────────────────────────────────────── + // ── Key event channel ───────────────────────────────────────────────────── let (key_tx, key_rx) = tokio::sync::mpsc::unbounded_channel::(); - // ── Input task (blocking thread) ───────────────────────────────────────── + // ── Input task (blocking thread) ────────────────────────────────────────── input::spawn(key_tx, shutdown.clone()); - // ── Render task ────────────────────────────────────────────────────────── + // ── Render task ─────────────────────────────────────────────────────────── let rate = config.console.refresh_rate_ms; tokio::spawn(async move { let mut interval = tokio::time::interval(std::time::Duration::from_millis(rate)); + let mut last_rendered = String::new(); // 3.3 — change-detection state loop { tokio::select! { _ = interval.tick() => { - if let Err(e) = render(&config, &state, &metrics).await { + if let Err(e) = render(&config, &state, &metrics, &mut last_rendered).await { log::debug!("Render error: {e}"); } } @@ -98,9 +103,14 @@ pub fn start( Ok(key_rx) } -// ─── Render ────────────────────────────────────────────────────────────────── +// ─── Render ─────────────────────────────────────────────────────────────────── -async fn render(config: &Config, state: &SharedState, metrics: &SharedMetrics) -> Result<()> { +async fn render( + config: &Config, + state: &SharedState, + metrics: &SharedMetrics, + last_rendered: &mut String, // 3.3 — previous frame for change-detection +) -> Result<()> { let mode = state.read().await.console_mode.clone(); let output = match mode { @@ -113,19 +123,30 @@ async fn render(config: &Config, state: &SharedState, metrics: &SharedMetrics) - ConsoleMode::Help => dashboard::render_help(), }; + // 3.3 — Skip all terminal I/O when the frame is identical to the previous + // one. At 100 ms ticks this eliminates nearly every write during idle periods + // (no traffic, no state change). + if output == *last_rendered { + return Ok(()); + } + last_rendered.clone_from(&output); + let mut out = stdout(); execute!( out, cursor::MoveTo(0, 0), terminal::Clear(terminal::ClearType::FromCursorDown) - )?; - out.write_all(output.as_bytes())?; - out.flush()?; + ) + .map_err(|e| AppError::Console(format!("Terminal write error: {e}")))?; + out.write_all(output.as_bytes()) + .map_err(|e| AppError::Console(format!("stdout write error: {e}")))?; + out.flush() + .map_err(|e| AppError::Console(format!("stdout flush error: {e}")))?; Ok(()) } -// ─── Cleanup ───────────────────────────────────────────────────────────────── +// ─── Cleanup ────────────────────────────────────────────────────────────────── /// Restore the terminal to its original state. /// diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 0000000..57ff345 --- /dev/null +++ b/src/error.rs @@ -0,0 +1,53 @@ +//! # Application Error Types +//! +//! **Directory:** `src/` +//! +//! Defines [`AppError`], the single typed error enum for the entire application. +//! All public functions returning `Result` use `Result` via the +//! crate-level alias in `main.rs`. +//! +//! Variants are scoped to the subsystem that produces them so callers can match +//! on the specific failure kind rather than inspecting a `Box` string. + +use thiserror::Error; + +/// Typed application error. +#[derive(Debug, Error)] +pub enum AppError { + /// Config file could not be read or TOML-parsed. + #[error("Config load error: {0}")] + ConfigLoad(String), + + /// Config was parsed successfully but failed semantic validation. + /// + /// The inner `Vec` contains one human-readable message per violated rule. + #[error("settings.toml has {} error(s):\n • {}", .0.len(), .0.join("\n • "))] + ConfigValidation(Vec), + + /// The global `log` logger could not be initialised. + #[error("Log init error: {0}")] + LogInit(String), + + /// TCP bind failed for a specific port. + #[error("Could not bind port {port}: {source}")] + ServerBind { port: u16, source: std::io::Error }, + + /// The HTTP server task exited before signalling its bound port, or the + /// bind-port handshake timed out. + #[error("Server startup error: {0}")] + ServerStartup(String), + + /// An error originating in the Tor / Arti subsystem. + #[error("Tor error: {0}")] + Tor(String), + + /// Console / terminal I/O error (crossterm or raw-mode operations). + #[error("Console error: {0}")] + Console(String), + + /// Transparent wrapper for any `std::io::Error` not covered by a more + /// specific variant. The `#[from]` attribute means `?` on any + /// `io::Result` in the codebase converts automatically. + #[error(transparent)] + Io(#[from] std::io::Error), +} diff --git a/src/logging/mod.rs b/src/logging/mod.rs index 8220292..10fd7f4 100644 --- a/src/logging/mod.rs +++ b/src/logging/mod.rs @@ -25,7 +25,7 @@ use std::{ use chrono::Local; use log::{Level, LevelFilter, Log, Metadata, Record}; -use crate::{config::LoggingConfig, Result}; +use crate::{config::LoggingConfig, AppError, Result}; // ─── Global ring buffer ────────────────────────────────────────────────────── @@ -50,13 +50,31 @@ pub fn recent_lines(limit: usize) -> Vec { struct RustHostLogger { max_level: LevelFilter, + filter_dependencies: bool, /// Optional file handle. `None` when `logging.enabled = false`. file: Option>, } impl Log for RustHostLogger { fn enabled(&self, metadata: &Metadata) -> bool { - metadata.level() <= self.max_level + if metadata.level() > self.max_level { + return false; + } + // 4.3 — Target-based dependency filtering. + // + // When `filter_dependencies` is true, only pass through records that: + // a) come from the `rusthost` crate (target starts with "rusthost"), OR + // b) are at Warn level or above (always surfaced regardless of origin). + // + // This suppresses Info/Debug/Trace noise from Arti, Tokio, TLS internals + // and keeps the log file focused on application events. + if self.filter_dependencies { + let target = metadata.target(); + let is_app = target.starts_with("rusthost"); + let is_important = metadata.level() <= Level::Warn; + return is_app || is_important; + } + true } fn log(&self, record: &Record) { @@ -69,12 +87,16 @@ impl Log for RustHostLogger { let line = format!("[{level}] [{timestamp}] {}", record.args()); // Push to ring buffer. + // 3.5 — Clone before acquiring the lock so the String heap allocation + // does not contend with concurrent Arti logging threads. The lock is + // then held only for the O(1) push_back pointer swap. if let Some(buf) = LOG_BUFFER.get() { + let ring_line = line.clone(); if let Ok(mut guard) = buf.lock() { if guard.len() >= 1_000 { guard.pop_front(); } - guard.push_back(line.clone()); + guard.push_back(ring_line); } } @@ -115,7 +137,9 @@ pub fn flush() { pub fn init(config: &LoggingConfig, data_dir: &Path) -> Result<()> { LOG_BUFFER.get_or_init(|| Mutex::new(VecDeque::with_capacity(1_000))); - let max_level = parse_level(&config.level); + // 4.2 — LogLevel is now a typed enum; convert directly to LevelFilter. + // The duplicate parse_level() helper is no longer needed. + let max_level: LevelFilter = config.level.into(); let file = if config.enabled { let log_path = data_dir.join(&config.file); @@ -126,15 +150,22 @@ pub fn init(config: &LoggingConfig, data_dir: &Path) -> Result<()> { .create(true) .append(true) .open(&log_path) - .map_err(|e| format!("Cannot open log file {}: {e}", log_path.display()))?; + .map_err(|e| { + AppError::LogInit(format!("Cannot open log file {}: {e}", log_path.display())) + })?; Some(Mutex::new(f)) } else { None }; - let logger = Box::new(RustHostLogger { max_level, file }); + let logger = Box::new(RustHostLogger { + max_level, + filter_dependencies: config.filter_dependencies, + file, + }); - log::set_logger(Box::leak(logger)).map_err(|e| format!("Failed to set global logger: {e}"))?; + log::set_logger(Box::leak(logger)) + .map_err(|e| AppError::LogInit(format!("Failed to set global logger: {e}")))?; log::set_max_level(max_level); Ok(()) @@ -142,16 +173,6 @@ pub fn init(config: &LoggingConfig, data_dir: &Path) -> Result<()> { // ─── Helpers ───────────────────────────────────────────────────────────────── -fn parse_level(s: &str) -> LevelFilter { - match s.to_ascii_lowercase().as_str() { - "trace" => LevelFilter::Trace, - "debug" => LevelFilter::Debug, - "warn" => LevelFilter::Warn, - "error" => LevelFilter::Error, - _ => LevelFilter::Info, // "info" and any unknown value - } -} - const fn level_label(level: Level) -> &'static str { match level { Level::Trace => "TRACE", diff --git a/src/main.rs b/src/main.rs index d3e2899..909cba5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -8,13 +8,20 @@ mod config; mod console; +mod error; mod logging; mod runtime; mod server; mod tor; +pub use error::AppError; + /// Common `Result` alias used throughout the codebase. -pub type Result> = std::result::Result; +/// +/// The error type is [`AppError`] — a typed enum covering every failure mode. +/// All subsystems return this type so callers can match on specific variants +/// rather than inspecting an opaque `Box` string. +pub type Result = std::result::Result; #[tokio::main] async fn main() { diff --git a/src/runtime/lifecycle.rs b/src/runtime/lifecycle.rs index 4173933..eba1e39 100644 --- a/src/runtime/lifecycle.rs +++ b/src/runtime/lifecycle.rs @@ -8,7 +8,11 @@ //! 2. **Normal run** — loads config, starts every subsystem, enters the //! event dispatch loop, then shuts down gracefully. -use std::{path::PathBuf, sync::Arc, time::Duration}; +use std::{ + path::{Path, PathBuf}, + sync::Arc, + time::Duration, +}; use tokio::sync::{mpsc, oneshot, watch, RwLock}; @@ -19,49 +23,44 @@ use crate::{ events, state::{AppState, Metrics, SharedMetrics, SharedState, TorStatus}, }, - server, tor, Result, + server, tor, AppError, Result, }; -// ─── Paths ────────────────────────────────────────────────────────────────── - -fn data_dir() -> PathBuf { - let exe = std::env::current_exe().unwrap_or_else(|_| PathBuf::from(".")); - exe.parent() - .unwrap_or_else(|| std::path::Path::new(".")) - .join("rusthost-data") -} - -fn settings_path() -> PathBuf { - data_dir().join("settings.toml") -} - -// ─── Entry point ──────────────────────────────────────────────────────────── +// ─── Entry point ───────────────────────────────────────────────────────────── +// +// 4.4 — `data_dir()` and `settings_path()` are no longer free functions. +// The data directory is computed exactly once at the top of `run()` and +// threaded through every call that needs it as an explicit parameter. +// This removes the hidden `current_exe()` dependency from all internal +// functions and makes the path injectable (e.g. a tmp dir in tests). pub async fn run() -> Result<()> { - if is_first_run() { - first_run_setup()?; + // 4.4 — single source of truth; computed here, nowhere else. + let exe = std::env::current_exe().unwrap_or_else(|_| PathBuf::from(".")); + let data_dir = exe + .parent() + .unwrap_or_else(|| Path::new(".")) + .join("rusthost-data"); + let settings_path = data_dir.join("settings.toml"); + + if settings_path.exists() { + normal_run(data_dir).await?; } else { - normal_run().await?; + first_run_setup(&data_dir, &settings_path)?; } Ok(()) } -// ─── First Run ────────────────────────────────────────────────────────────── - -fn is_first_run() -> bool { - !settings_path().exists() -} - -fn first_run_setup() -> Result<()> { - let base = data_dir(); +// ─── First Run ─────────────────────────────────────────────────────────────── +fn first_run_setup(data_dir: &Path, settings_path: &Path) -> Result<()> { for sub in &["site", "logs"] { - std::fs::create_dir_all(base.join(sub))?; + std::fs::create_dir_all(data_dir.join(sub))?; } - config::defaults::write_default_config(&settings_path())?; + config::defaults::write_default_config(settings_path)?; - let placeholder = base.join("site/index.html"); + let placeholder = data_dir.join("site/index.html"); if !placeholder.exists() { std::fs::write(&placeholder, PLACEHOLDER_HTML)?; } @@ -80,17 +79,21 @@ fn first_run_setup() -> Result<()> { Ok(()) } -// ─── Normal Run ───────────────────────────────────────────────────────────── +// ─── Normal Run ────────────────────────────────────────────────────────────── + +async fn normal_run(data_dir: PathBuf) -> Result<()> { + let settings_path = data_dir.join("settings.toml"); -async fn normal_run() -> Result<()> { // 1. Load and validate config. - let config = Arc::new(config::loader::load(&settings_path())?); + let config = Arc::new(config::loader::load(&settings_path)?); // 2. Initialise logging. - logging::init(&config.logging, &data_dir())?; + logging::init(&config.logging, &data_dir)?; log::info!("RustHost starting — version {}", env!("CARGO_PKG_VERSION")); - if config.server.bind == "0.0.0.0" { + // 4.2 — config.server.bind is now IpAddr; use is_unspecified() instead of + // string comparison. + if config.server.bind.is_unspecified() { log::warn!("[server] bind = \"0.0.0.0\" — server is reachable on all interfaces."); } @@ -107,7 +110,7 @@ async fn normal_run() -> Result<()> { // 5. Scan site directory for initial file stats. // 2.2 — wrap in spawn_blocking; scan_site now returns Result. { - let site_root = data_dir().join(&config.site.directory); + let site_root = data_dir.join(&config.site.directory); let scan_root = site_root.clone(); let (count, bytes) = match tokio::task::spawn_blocking(move || server::scan_site(&scan_root)).await { @@ -129,16 +132,17 @@ async fn normal_run() -> Result<()> { // 6. Shutdown channels. let (shutdown_tx, shutdown_rx) = watch::channel(false); - // 7. Ctrl-C handler. - let (ctrlc_tx, mut ctrlc_rx) = mpsc::channel::<()>(1); - ctrlc::set_handler(move || { - let _ = ctrlc_tx.try_send(()); - })?; - - // 8. Start HTTP server task. + // 7. Start HTTP server task. let (port_tx, port_rx) = oneshot::channel::(); // 2.10 — keep the JoinHandle so we can await the server during shutdown drain. - let server_handle = spawn_server(&config, &state, &metrics, &shutdown_rx, port_tx); + let server_handle = spawn_server( + &config, + &state, + &metrics, + &shutdown_rx, + port_tx, + data_dir.clone(), + ); // Wait for the server to signal its bound port via the oneshot channel. // A 10-second timeout ensures a bind failure doesn't block forever. @@ -146,41 +150,48 @@ async fn normal_run() -> Result<()> { Ok(Ok(port)) => port, Ok(Err(_)) => { log::error!("Server port channel closed before sending — server failed to bind"); - return Err("Server failed to bind".into()); + return Err(AppError::ServerStartup( + "Server task exited before signalling its bound port".into(), + )); } Err(_) => { log::error!("Timed out waiting for server to bind"); - return Err("Server bind timeout".into()); + return Err(AppError::ServerStartup( + "Timed out waiting for server to bind (10 s)".into(), + )); } }; - // 9. Start Tor (if enabled). - // tor::init() spawns a Tokio task and returns immediately — never blocks - // the async executor. + // 8. Start Tor (if enabled). + // tor::init() spawns a Tokio task and returns immediately. // 2.10 — pass shutdown_rx so Tor's stream loop exits on clean shutdown. if config.tor.enabled { tor::init( - data_dir(), + data_dir.clone(), bind_port, Arc::clone(&state), shutdown_rx.clone(), ); } - // 10. Start console UI. + // 9. Start console UI. let key_rx = start_console(&config, &state, &metrics, shutdown_rx.clone()).await?; - // 11. Open browser (if configured). + // 10. Open browser (if configured). if config.server.open_browser_on_start { let port = state.read().await.actual_port; // 2.4 — use canonical open_browser from crate::runtime super::open_browser(&format!("http://localhost:{port}")); } - // 12. Event dispatch loop. - event_loop(key_rx, &config, &state, &metrics, &mut ctrlc_rx).await?; + // 11. Event dispatch loop. + // 4.7 — tokio::signal::ctrl_c() replaces the ctrlc crate. The signal + // future is passed into event_loop and used directly in select!, + // eliminating the threading conflict between the ctrlc crate's + // OS-level handler and Tokio's internal signal infrastructure. + event_loop(key_rx, &config, &state, &metrics, data_dir).await?; - // 13. Graceful shutdown. + // 12. Graceful shutdown. log::info!("Shutting down…"); let _ = shutdown_tx.send(true); @@ -206,14 +217,14 @@ fn spawn_server( metrics: &SharedMetrics, shutdown: &watch::Receiver, port_tx: oneshot::Sender, + data_dir: PathBuf, // 3.1/4.4 — caller owns data_dir; no current_exe() here ) -> tokio::task::JoinHandle<()> { let cfg = Arc::clone(config); let st = Arc::clone(state); let met = Arc::clone(metrics); - let data = data_dir(); let shut = shutdown.clone(); tokio::spawn(async move { - server::run(cfg, st, met, data, shut, port_tx).await; + server::run(cfg, st, met, data_dir, shut, port_tx).await; }) } @@ -233,6 +244,7 @@ async fn start_console( Ok(Some(rx)) } else { let port = state.read().await.actual_port; + // 4.2 — config.server.bind is IpAddr, Display impl formats correctly. println!("RustHost running on http://{}:{port}", config.server.bind); Ok(None) } @@ -243,10 +255,16 @@ async fn event_loop( config: &Arc, state: &SharedState, metrics: &SharedMetrics, - ctrlc_rx: &mut mpsc::Receiver<()>, + data_dir: PathBuf, // 3.1/4.4 — pre-computed by normal_run; not recomputed per event ) -> Result<()> { // 2.8 — mutable so we can set to None when the channel closes. let mut key_rx = key_rx; + + // 4.7 — tokio::signal::ctrl_c() replaces ctrlc crate's set_handler + mpsc. + // Pinned so it can be polled repeatedly inside select! without moving. + let ctrl_c = tokio::signal::ctrl_c(); + tokio::pin!(ctrl_c); + loop { // Build a future that yields the next key, or pends forever once the // channel closes (avoids repeated None-match after input task death). @@ -255,7 +273,7 @@ async fn event_loop( if let Some(rx) = key_rx.as_mut() { rx.recv().await } else { - // Channel already closed — pend forever so ctrlc_rx can still fire. + // Channel already closed — pend forever so ctrl_c can still fire. std::future::pending().await } }; @@ -268,7 +286,7 @@ async fn event_loop( config, Arc::clone(state), Arc::clone(metrics), - data_dir(), + data_dir.clone(), // 3.1 — cheap PathBuf clone, no syscall ).await?; if quit { break; } } else { @@ -280,7 +298,13 @@ async fn event_loop( key_rx = None; // suppress repeated warnings on next select! iteration } } - Some(()) = ctrlc_rx.recv() => { break; } + // 4.7 — Ctrl-C handled directly through Tokio's signal machinery. + result = &mut ctrl_c => { + if let Err(e) = result { + log::warn!("Ctrl-C signal error: {e}"); + } + break; + } } } Ok(()) @@ -289,7 +313,7 @@ async fn event_loop( // open_browser removed from this file — canonical definition is in // crate::runtime (src/runtime/mod.rs), called via super::open_browser (fix 2.4). -// ─── Placeholder HTML ───────────────────────────────────────────────────────── +// ─── Placeholder HTML ──────────────────────────────────────────────────────── const PLACEHOLDER_HTML: &str = r#" diff --git a/src/server/handler.rs b/src/server/handler.rs index f88ea51..6bb9fd0 100644 --- a/src/server/handler.rs +++ b/src/server/handler.rs @@ -10,7 +10,7 @@ //! configured site root via [`std::fs::canonicalize`]. Any attempt to //! escape (e.g. `/../secret`) is rejected with HTTP 403. -use std::{fmt::Write as _, path::Path}; +use std::{fmt::Write as _, path::Path, sync::Arc}; use tokio::{ io::{AsyncBufReadExt, AsyncWriteExt, BufReader}, @@ -26,8 +26,8 @@ use crate::{runtime::state::SharedMetrics, Result}; /// Handle one HTTP connection to completion. pub async fn handle( stream: TcpStream, - canonical_root: &Path, // 2.3 — pre-canonicalized by server::run - index_file: &str, + canonical_root: Arc, // 3.2 — pre-canonicalized (2.3); Arc avoids per-connection alloc + index_file: Arc, // 3.2 — Arc clone is O(1) dir_listing: bool, metrics: SharedMetrics, ) -> Result<()> { @@ -87,7 +87,7 @@ pub async fn handle( let path_only = raw_path.split('?').next().unwrap_or("/"); let decoded = percent_decode(path_only); - match resolve_path(canonical_root, &decoded, index_file, dir_listing) { + match resolve_path(&canonical_root, &decoded, &index_file, dir_listing) { Resolved::File(abs_path) => { serve_file(&mut stream, &abs_path, is_head, &metrics).await?; } @@ -200,11 +200,13 @@ async fn read_request(reader: &mut BufReader) -> Result { let mut line = String::new(); let n = reader.read_line(&mut line).await?; if n == 0 { - return Err("Connection closed before headers were complete".into()); + return Err( + std::io::Error::other("Connection closed before headers were complete").into(), + ); } total = total.saturating_add(n); if total > 8_192 { - return Err("Request header too large (> 8 KiB)".into()); + return Err(std::io::Error::other("Request header too large (> 8 KiB)").into()); } request.push_str(&line); // Both `\r\n` (CRLF, RFC 7230 §3) and bare `\n` terminate the headers. @@ -405,26 +407,84 @@ fn percent_encode_path(s: &str) -> String { // ─── Percent decoding ──────────────────────────────────────────────────────── /// Decode percent-encoded characters in a URL path (e.g. `%20` → ` `). +/// +/// # Correctness (fix 4.5) +/// +/// The previous implementation decoded each `%XX` pair as an independent +/// `char` cast from a `u8`, which produced two garbled characters for any +/// multi-byte UTF-8 sequence (e.g. `%C3%A9` yielded `é` instead of `é`). +/// +/// This version accumulates consecutive percent-decoded bytes into a buffer +/// and converts to UTF-8 via `String::from_utf8_lossy` only when a literal +/// character (or end-of-input) breaks the run. This correctly handles +/// multi-byte sequences split across adjacent `%XX` tokens and falls back +/// gracefully for invalid UTF-8. +/// +/// Null bytes (`%00`) are never decoded — they are passed through as the +/// literal string `%00` to prevent null-byte path injection attacks. fn percent_decode(input: &str) -> String { let mut output = String::with_capacity(input.len()); - let mut chars = input.chars(); - - while let Some(c) = chars.next() { - if c != '%' { - output.push(c); - continue; - } - // Decode the next two hex digits. - let h1 = chars.next().and_then(|c| c.to_digit(16)); - let h2 = chars.next().and_then(|c| c.to_digit(16)); - if let (Some(a), Some(b)) = (h1, h2) { - // Both digits are valid 0–15, so the combined value fits in u8. - let byte = u8::try_from((a << 4) | b).unwrap_or(b'?'); - output.push(byte as char); + // Buffer for consecutive percent-decoded bytes that may form a multi-byte + // UTF-8 character together. + let mut byte_buf: Vec = Vec::new(); + + let src = input.as_bytes(); + let mut i = 0; + + while i < src.len() { + // Use .get() throughout to satisfy clippy::indexing_slicing. + if src.get(i).copied() == Some(b'%') { + let h1 = src.get(i.saturating_add(1)).copied().and_then(hex_digit); + let h2 = src.get(i.saturating_add(2)).copied().and_then(hex_digit); + if let (Some(hi), Some(lo)) = (h1, h2) { + let byte = (hi << 4) | lo; + if byte == 0x00 { + // 4.5 — null byte: do not decode, emit literal %00. + flush_byte_buf(&mut byte_buf, &mut output); + output.push_str("%00"); + } else { + byte_buf.push(byte); + } + i = i.saturating_add(3); + } else { + // Incomplete or invalid %XX — pass through literal `%` and + // advance by 1 so the following characters are re-processed + // individually (preserving `%2` as `%2`, `%ZZ` as `%ZZ`). + flush_byte_buf(&mut byte_buf, &mut output); + output.push('%'); + i = i.saturating_add(1); + } } else { - output.push('%'); + flush_byte_buf(&mut byte_buf, &mut output); + // Advance by one full UTF-8 character so we never split a scalar. + let ch = input + .get(i..) + .and_then(|s| s.chars().next()) + .unwrap_or('\u{FFFD}'); + output.push(ch); + i = i.saturating_add(ch.len_utf8()); } } - + // Flush any trailing percent-decoded bytes at end-of-input. + flush_byte_buf(&mut byte_buf, &mut output); output } + +/// Convert a single ASCII hex digit byte to its numeric value, or `None`. +const fn hex_digit(b: u8) -> Option { + match b { + b'0'..=b'9' => Some(b.wrapping_sub(b'0')), + b'a'..=b'f' => Some(b.wrapping_sub(b'a').wrapping_add(10)), + b'A'..=b'F' => Some(b.wrapping_sub(b'A').wrapping_add(10)), + _ => None, + } +} + +/// Interpret `buf` as UTF-8 (with lossy replacement for invalid sequences), +/// append to `out`, then clear `buf`. +fn flush_byte_buf(buf: &mut Vec, out: &mut String) { + if !buf.is_empty() { + out.push_str(&String::from_utf8_lossy(buf)); + buf.clear(); + } +} diff --git a/src/server/mime.rs b/src/server/mime.rs index 894c266..d9f39be 100644 --- a/src/server/mime.rs +++ b/src/server/mime.rs @@ -14,7 +14,22 @@ /// assert_eq!(mime::for_extension("xyz"), "application/octet-stream"); /// ``` pub fn for_extension(ext: &str) -> &'static str { - match ext.to_ascii_lowercase().as_str() { + // 3.4 — Normalise to lowercase in a fixed stack buffer to avoid a heap + // allocation on every served file request. Extensions longer than 16 bytes + // are not in the table, so we short-circuit to the fallback immediately. + let bytes = ext.as_bytes(); + let mut buf = [0u8; 16]; + if bytes.len() > buf.len() { + return "application/octet-stream"; + } + // Use zip to avoid any index that could theoretically panic under clippy's + // indexing_slicing lint; the length guard above already guarantees safety. + for (slot, &b) in buf.iter_mut().zip(bytes.iter()) { + *slot = b.to_ascii_lowercase(); + } + // get() instead of a bare slice to satisfy clippy::indexing_slicing. + let lower = std::str::from_utf8(buf.get(..bytes.len()).unwrap_or_default()).unwrap_or(""); + match lower { // Text "html" | "htm" => "text/html; charset=utf-8", "css" => "text/css; charset=utf-8", diff --git a/src/server/mod.rs b/src/server/mod.rs index 291069d..24f56ef 100644 --- a/src/server/mod.rs +++ b/src/server/mod.rs @@ -15,7 +15,10 @@ pub mod handler; pub mod mime; use std::{ - net::TcpListener as StdTcpListener, path::Path, path::PathBuf, sync::Arc, time::Duration, + net::{IpAddr, TcpListener as StdTcpListener}, + path::{Path, PathBuf}, + sync::Arc, + time::Duration, }; use tokio::{ @@ -27,10 +30,10 @@ use tokio::{ use crate::{ config::Config, runtime::state::{SharedMetrics, SharedState}, - Result, + AppError, Result, }; -// ─── Public API ───────────────────────────────────────────────────────────── +// ─── Public API ────────────────────────────────────────────────────────────── /// Start the HTTP server. /// @@ -45,8 +48,9 @@ pub async fn run( mut shutdown: watch::Receiver, port_tx: oneshot::Sender, ) { - let bind_addr = &config.server.bind; - let base_port = config.server.port; + let bind_addr = config.server.bind; + // 4.2 — config.server.port is NonZeroU16; .get() produces the u16 value. + let base_port = config.server.port.get(); let fallback = config.server.auto_port_fallback; let max_conns = config.server.max_connections as usize; @@ -78,10 +82,10 @@ pub async fn run( let site_root = data_dir.join(&config.site.directory); // 2.3 — canonicalize once here so resolve_path never calls canonicalize() - // per-request. If the root is missing or inaccessible, fail fast before - // binding any port. - let canonical_root = match site_root.canonicalize() { - Ok(p) => p, + // per-request. If the root is missing or inaccessible, fail fast. + // 3.2 — Wrap in Arc so per-connection clones are O(1) refcount bumps. + let canonical_root: Arc = match site_root.canonicalize() { + Ok(p) => Arc::from(p.as_path()), Err(e) => { log::error!( "Site root {} cannot be resolved: {e}. Check that [site] directory exists.", @@ -90,7 +94,9 @@ pub async fn run( return; } }; - let index_file = config.site.index_file.clone(); + // 3.2 — Arc: per-connection clone is an atomic refcount bump, not a + // String heap allocation. + let index_file: Arc = Arc::from(config.site.index_file.as_str()); let dir_list = config.site.enable_directory_listing; let semaphore = Arc::new(Semaphore::new(max_conns)); @@ -112,13 +118,13 @@ pub async fn run( further connections will queue" ); } - let site = canonical_root.clone(); - let idx = index_file.clone(); + let site = Arc::clone(&canonical_root); + let idx = Arc::clone(&index_file); let met = Arc::clone(&metrics); join_set.spawn(async move { let _permit = permit; if let Err(e) = handler::handle( - stream, &site, &idx, dir_list, met + stream, site, idx, dir_list, met ).await { log::debug!("Handler error: {e}"); } @@ -144,11 +150,11 @@ pub async fn run( log::info!("HTTP server drained."); } -// ─── Port binding ──────────────────────────────────────────────────────────── +// ─── Port binding ───────────────────────────────────────────────────────────── /// Try to bind to `addr:port`. When `fallback` is true, increments the port /// up to 10 times before giving up. -fn bind_with_fallback(addr: &str, port: u16, fallback: bool) -> Result<(TcpListener, u16)> { +fn bind_with_fallback(addr: IpAddr, port: u16, fallback: bool) -> Result<(TcpListener, u16)> { let max_attempts: u16 = if fallback { 10 } else { 1 }; for attempt in 0..max_attempts { @@ -164,22 +170,28 @@ fn bind_with_fallback(addr: &str, port: u16, fallback: bool) -> Result<(TcpListe Err(e) if e.kind() == std::io::ErrorKind::AddrInUse && fallback => { // Try the next port. } - Err(e) => { - return Err(format!( - "Port {try_port} is already in use. \ - Change [server].port in settings.toml or set \ - auto_port_fallback = true.\n OS error: {e}" - ) - .into()); + Err(source) => { + // 4.1 — use the structured ServerBind variant so callers can + // match on the port number and source error separately. + return Err(AppError::ServerBind { + port: try_port, + source, + }); } } } - Err(format!( - "Could not find a free port after {max_attempts} attempts \ - starting from {port}." - ) - .into()) + Err(AppError::ServerBind { + port, + source: std::io::Error::new( + std::io::ErrorKind::AddrInUse, + format!( + "Could not find a free port after {max_attempts} attempts \ + starting from {port}. Change [server].port in settings.toml \ + or set auto_port_fallback = true." + ), + ), + }) } // ─── Site scanner ───────────────────────────────────────────────────────────── @@ -199,8 +211,13 @@ pub fn scan_site(site_root: &Path) -> crate::Result<(u32, u64)> { queue.push_back(site_root.to_path_buf()); while let Some(dir) = queue.pop_front() { - let entries = std::fs::read_dir(&dir) - .map_err(|e| format!("Cannot read directory {}: {e}", dir.display()))?; + let entries = std::fs::read_dir(&dir).map_err(|e| { + // Preserve path context in the error while mapping to AppError::Io. + AppError::Io(std::io::Error::new( + e.kind(), + format!("Cannot read directory {}: {e}", dir.display()), + )) + })?; for entry in entries.flatten() { match entry.metadata() { From b721c7680b834e9930d5cd8b89b6d690e68450de Mon Sep 17 00:00:00 2001 From: csd113 Date: Sat, 21 Mar 2026 10:19:37 -0700 Subject: [PATCH 3/4] completed bug fix and added unit tests --- CHANGELOG.md | 186 ++++++++++++++++ Cargo.lock | 2 + Cargo.toml | 94 ++++----- README.md | 93 ++++++-- src/config/defaults.rs | 14 ++ src/config/loader.rs | 143 +++++++++++++ src/config/mod.rs | 18 ++ src/console/dashboard.rs | 45 ++++ src/console/mod.rs | 5 + src/lib.rs | 22 ++ src/logging/mod.rs | 6 + src/main.rs | 106 ++++++++-- src/runtime/events.rs | 10 + src/runtime/lifecycle.rs | 88 +++++--- src/runtime/state.rs | 3 + src/server/handler.rs | 433 +++++++++++++++++++++++++++++++++----- src/server/mime.rs | 2 + src/server/mod.rs | 89 ++++++-- src/tor/mod.rs | 109 +++++++++- tests/http_integration.rs | 406 +++++++++++++++++++++++++++++++++++ 20 files changed, 1671 insertions(+), 203 deletions(-) create mode 100644 src/lib.rs create mode 100644 tests/http_integration.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index c2119f5..b650103 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,192 @@ This project adheres to [Semantic Versioning](https://semver.org/). --- +## [0.2.0] — Remediation Release + +This release resolves all 40 issues identified in the 2026-03-20 comprehensive security and reliability audit. Changes are grouped by the audit's five severity phases. + +--- + +### Phase 1 — Critical Security & Correctness + +#### 1.1 — Config Path Traversal: `site.directory` and `logging.file` Validated + +`src/config/loader.rs` — `validate()` now rejects any `site.directory` or `logging.file` value that is an absolute path, contains a `..` component, or contains a platform path separator. The process exits with a clear validation error before binding any port. Previously, a value such as `directory = "../../etc"` caused the HTTP server to serve the entire `/etc` tree, and a value such as `../../.ssh/authorized_keys` for `logging.file` caused log lines to be appended to the SSH authorized keys file. + +#### 1.2 — Race Condition: Tor Captures Bound Port via `oneshot` Channel + +`src/runtime/lifecycle.rs`, `src/server/mod.rs` — The 50 ms sleep that was the sole synchronisation barrier between the HTTP server binding its port and the Tor subsystem reading that port has been replaced with a `tokio::sync::oneshot` channel. The server sends the actual bound port through the channel before entering the accept loop; `tor::init` awaits that value (with a 10-second timeout) rather than reading a potentially-zero value out of `SharedState`. Previously, on a loaded system the race could be lost silently, causing every inbound Tor connection to fail with `ECONNREFUSED` to port 0 while the dashboard displayed a healthy green `TorStatus::Ready`. + +#### 1.3 — XSS in Directory Listing via Unsanitised Filenames + +`src/server/handler.rs` — `build_directory_listing()` now HTML-entity-escapes all filenames before interpolating them into link text (`&` → `&`, `<` → `<`, `>` → `>`, `"` → `"`, `'` → `'`) and percent-encodes filenames in `href` attribute values. Previously, a file named `">` produced an executable XSS payload in any directory listing page. + +#### 1.4 — HEAD Requests No Longer Receive a Response Body + +`src/server/handler.rs` — `parse_path()` now returns `(method, path)` instead of only the path. The method is threaded through to `write_response()` via a `suppress_body: bool` parameter. For `HEAD` requests, response headers (including `Content-Length` reflecting the full body size, as required by RFC 7231 §4.3.2) are written, but the body is not sent. + +#### 1.5 — Request Timeout Prevents Slow-Loris DoS + +`src/server/handler.rs` — The call to `read_request()` is now wrapped in `tokio::time::timeout(Duration::from_secs(30))`. Connections that fail to deliver a complete request header within 30 seconds receive a `408 Request Timeout` response and are closed. The timeout is also configurable via `[server] request_timeout_secs` in `settings.toml`. Timeout events are logged at `debug` level to avoid log flooding under attack. + +#### 1.6 — Unbounded Connection Spawning Replaced with Semaphore + +`src/server/mod.rs`, `src/tor/mod.rs` — Both the HTTP accept loop and the Tor stream request loop now use a `tokio::sync::Semaphore` to cap concurrent connections. The limit is configurable via `[server] max_connections` (default: 256). The semaphore `OwnedPermit` is held for the lifetime of each connection task and released on drop. When the limit is reached, the accept loop suspends naturally, providing backpressure; a `warn`-level log entry is emitted. Previously, unlimited concurrent connections could exhaust task stack memory and file descriptors. + +#### 1.7 — Files Streamed Instead of Read Entirely Into Memory + +`src/server/handler.rs` — `tokio::fs::read` (which loads the entire file into a `Vec`) has been replaced with `tokio::fs::File::open` followed by `tokio::io::copy(&mut file, &mut stream)`. File size is obtained via `file.metadata().await?.len()` for the `Content-Length` header. Memory consumption per connection is now bounded by the kernel socket buffer (~128–256 KB) regardless of file size. For `HEAD` requests, the file is opened only to read its size; the `copy` step is skipped. + +#### 1.8 — `strip_timestamp` No Longer Panics on Non-ASCII Log Lines + +`src/console/dashboard.rs` — `strip_timestamp()` previously used a byte index derived from iterating `.bytes()` to slice a `&str`, which panicked when the index fell inside a multi-byte UTF-8 character. The implementation now uses `splitn(3, ']')` to strip the leading `[LEVEL]` and `[HH:MM:SS]` tokens, which is both panic-safe and simpler. Any log line containing Unicode characters (Arti relay names, internationalized filenames, `.onion` addresses) is handled correctly. + +#### 1.9 — `TorStatus` Updated to `Failed` When Onion Service Terminates + +`src/tor/mod.rs` — When `stream_requests.next()` returns `None` (the onion service stream ends unexpectedly), the status is now set to `TorStatus::Failed("stream ended".to_string())` and the `onion_address` field is cleared from `AppState`. Previously, the dashboard permanently displayed a healthy green badge and the `.onion` address after the service had silently stopped serving traffic. + +#### 1.10 — Terminal Fully Restored on All Exit Paths; Panic Hook Registered + +`src/main.rs`, `src/console/mod.rs` — The error handler in `main.rs` now calls `console::cleanup()` (which issues `cursor::Show` and `terminal::LeaveAlternateScreen` before `disable_raw_mode`) on all failure paths. A `std::panic::set_hook` registered at startup ensures the same cleanup runs even when a panic occurs on an async executor thread. `console::cleanup()` is idempotent (guarded by a `RAW_MODE_ACTIVE` atomic swap), so calling it from multiple paths is safe. + +--- + +### Phase 2 — High Priority Reliability + +#### 2.1 — HTTP Request Reading Buffered with `BufReader` + +`src/server/handler.rs` — `read_request()` previously read one byte at a time, issuing up to 8,192 individual `read` syscalls per request. The stream is now wrapped in `tokio::io::BufReader` and reads headers line-by-line with `read_line()`. The 8 KiB header size limit is enforced by accumulating total bytes read. This also correctly handles `\r\n\r\n` split across TCP segments. + +#### 2.2 — `scan_site` is Now Recursive, Error-Propagating, and Non-Blocking + +`src/server/mod.rs`, `src/runtime/lifecycle.rs`, `src/runtime/events.rs` — `scan_site` now performs a breadth-first traversal using a `VecDeque` work queue, counting files and sizes in all subdirectories. The return type is now `Result<(u32, u64)>`; errors from `read_dir` are propagated and logged at `warn` level rather than silently returning `(0, 0)`. All call sites wrap the function in `tokio::task::spawn_blocking` to avoid blocking the async executor on directory I/O. + +#### 2.3 — `canonicalize()` Called Once at Startup, Not Per Request + +`src/server/mod.rs`, `src/server/handler.rs` — The site root is now canonicalized once in `server::run()` and passed as a pre-computed `PathBuf` into each connection handler. The per-request `site_root.canonicalize()` call in `resolve_path()` has been removed, eliminating a `realpath()` syscall on every request. + +#### 2.4 — `open_browser` Deduplicated + +`src/runtime/lifecycle.rs`, `src/runtime/events.rs`, `src/runtime/mod.rs` — The `open_browser` function was duplicated in `lifecycle.rs` and `events.rs`. It now lives in a single location (`src/runtime/mod.rs`) and both call sites use the shared implementation. + +#### 2.5 — `#[serde(deny_unknown_fields)]` on All Config Structs + +`src/config/mod.rs` — All `#[derive(Deserialize)]` config structs (`Config`, `ServerConfig`, `SiteConfig`, `TorConfig`, `LoggingConfig`, `ConsoleConfig`, `IdentityConfig`) now carry `#[serde(deny_unknown_fields)]`. A misspelled key such as `bund = "127.0.0.1"` now causes a startup error naming the unknown field rather than silently using the compiled-in default. + +#### 2.6 — `auto_reload` Removed (Was Unimplemented) + +`src/config/mod.rs`, `src/config/defaults.rs` — The `auto_reload` field was present in the config struct and advertised in the default `settings.toml` but had no implementation. It has been removed entirely. The `[R]` key for manual site stat reloads is unaffected. + +#### 2.7 — ANSI Terminal Injection Prevention Documented and Tested + +`src/config/loader.rs` — The existing `char::is_control` check on `instance_name` (which covers ESC `\x1b`, NUL `\x00`, BEL `\x07`, and BS `\x08`) is confirmed to prevent terminal injection. An explicit comment now documents the security intent, and dedicated test cases cover each injection vector. + +#### 2.8 — Keyboard Input Task Failure Now Detected and Reported + +`src/runtime/lifecycle.rs` — If the `spawn_blocking` input task exits (causing `key_rx` to close), `recv().await` returning `None` is now detected. A `warn`-level log entry is emitted ("Console input task exited — keyboard input disabled. Use Ctrl-C to quit.") and subsequent iterations no longer attempt to receive from the closed channel. Previously, input task death was completely silent. + +#### 2.9 — `TorStatus::Failed` Now Carries a Reason String + +`src/runtime/state.rs`, `src/console/dashboard.rs` — `TorStatus::Failed(Option)` (the exit code variant, which was never constructed) has been replaced with `TorStatus::Failed(String)`. Construction sites pass a brief reason string (`"bootstrap failed"`, `"stream ended"`, `"launch failed"`). The dashboard now renders `FAILED (reason) — see log for details` instead of a bare `FAILED`. + +#### 2.10 — Graceful Shutdown Uses `JoinSet` and Proper Signalling + +`src/runtime/lifecycle.rs`, `src/server/mod.rs`, `src/tor/mod.rs` — The 300 ms fixed sleep that gated shutdown has been replaced with proper task completion signalling. A clone of `shutdown_rx` is passed into `tor::init()`; the Tor run loop watches it via `tokio::select!` and exits cleanly on shutdown. In-flight HTTP connection tasks are tracked in a `JoinSet`; after the accept loop exits, `join_set.join_all()` is awaited with a 5-second timeout, allowing in-progress transfers to complete before the process exits. + +#### 2.11 — Log File Flushed on Graceful Shutdown + +`src/logging/mod.rs`, `src/runtime/lifecycle.rs` — A `pub fn flush()` function has been added to the logging module. The shutdown sequence calls it explicitly after the connection drain wait, ensuring all buffered log entries (including the `"RustHost shut down cleanly."` sentinel) are written to disk before the process exits. + +--- + +### Phase 3 — Performance + +#### 3.1 — `data_dir()` Computed Once at Startup + +`src/runtime/lifecycle.rs` — `data_dir()` (which calls `std::env::current_exe()` internally) was previously called on every key event dispatch inside `event_loop`. It is now computed exactly once at the top of `normal_run()`, stored in a local variable, and passed as a parameter to all functions that need it. + +#### 3.2 — `Arc` and `Arc` Eliminate Per-Connection Heap Allocations + +`src/server/mod.rs`, `src/server/handler.rs` — `site_root` and `index_file` are now wrapped in `Arc` and `Arc` respectively before the accept loop. Each connection task receives a cheap `Arc` clone (reference-count increment) rather than a full heap allocation. + +#### 3.3 — Dashboard Render Task Skips Redraws When Output Is Unchanged + +`src/console/mod.rs` — The render task now compares the rendered output string against the previously written string. If identical, the `execute!` and `write_all` calls are skipped entirely. This eliminates terminal writes on idle ticks, which is the common case for a server with no active traffic. + +#### 3.4 — MIME Lookup No Longer Allocates a `String` Per Request + +`src/server/mime.rs` — The `for_extension` function previously called `ext.to_ascii_lowercase()`, allocating a heap `String` on every request. The comparison now uses `str::eq_ignore_ascii_case` directly against the extension string, with no allocation. + +#### 3.5 — Log Ring Buffer Lock Not Held During `String` Clone + +`src/logging/mod.rs` — The log line string is now cloned before acquiring the ring buffer mutex. The mutex is held only for the `push_back` of the already-allocated string, reducing lock contention from Arti's multi-threaded internal logging. + +#### 3.6 — Tokio Feature Flags Made Explicit + +`Cargo.toml` — `tokio = { features = ["full"] }` has been replaced with an explicit feature list: `rt-multi-thread`, `net`, `io-util`, `fs`, `sync`, `time`, `macros`, `signal`. Unused features (`process`, `io-std`) are no longer compiled, reducing binary size and build time. + +--- + +### Phase 4 — Architecture & Design + +#### 4.1 — Typed `AppError` Enum Introduced + +`src/error.rs` (new), `src/main.rs`, all modules — The global `Box` result alias has been replaced with a typed `AppError` enum using `thiserror`. Variants: `ConfigLoad`, `ConfigValidation`, `LogInit`, `ServerBind { port, source }`, `Tor`, `Io`, `Console`. Error messages now preserve structured context at the type level. + +#### 4.2 — Config Structs Use Typed Fields + +`src/config/mod.rs`, `src/config/loader.rs` — `LoggingConfig.level` is now a `LogLevel` enum (`Trace` | `Debug` | `Info` | `Warn` | `Error`) with `#[serde(rename_all = "lowercase")]`; the duplicate validation in `loader.rs` and `logging/mod.rs` has been removed. `ServerConfig.bind` is now `std::net::IpAddr` via `#[serde(try_from = "String")]`. The parse-then-validate pattern is eliminated in favour of deserialisation-time typing. + +#### 4.3 — Dependency Log Noise Filtered by Default + +`src/logging/mod.rs` — `RustHostLogger::enabled()` now suppresses `Info`-and-below records from non-`rusthost` targets (Arti, Tokio internals). Warnings and errors from all crates are still passed through. This prevents the ring buffer and log file from being flooded with Tor bootstrap noise. Configurable via `[logging] filter_dependencies = true` (default `true`); set `false` to pass all crate logs at the configured level. + +#### 4.4 — `data_dir()` Free Function Eliminated; Path Injected + +`src/runtime/lifecycle.rs` and all callers — The `data_dir()` free function (which called `current_exe()` as a hidden dependency) has been removed. The data directory `PathBuf` is now a first-class parameter threaded through the call chain from `normal_run`, enabling test injection of temporary directories. + +#### 4.5 — `percent_decode` Correctly Handles Multi-Byte UTF-8 and Null Bytes + +`src/server/handler.rs` — The previous implementation decoded each `%XX` token as a standalone `char` cast from a `u8`, producing incorrect output for multi-byte sequences (e.g., `%C3%A9` was decoded as two garbage characters instead of `é`). The function now accumulates consecutive decoded bytes into a `Vec` buffer and flushes via `String::from_utf8_lossy` when a literal character is encountered, correctly reassembling multi-byte sequences. Null bytes (`%00`) are left as the literal string `%00` in the output rather than being decoded. + +#### 4.6 — `deny.toml` Updated with All Duplicate Crate Skip Entries + +`deny.toml` — Five duplicate crate version pairs that were absent from `bans.skip` but present in the lock file have been added with comments identifying the dependency trees that pull each version: `foldhash`, `hashbrown`, `indexmap`, `redox_syscall`, and `schemars`. `cargo deny check` now passes cleanly. + +#### 4.7 — `ctrlc` Crate Replaced with `tokio::signal` + +`Cargo.toml`, `src/runtime/lifecycle.rs` — The `ctrlc = "3"` dependency has been removed. Signal handling is now done via `tokio::signal::ctrl_c()` (cross-platform) and `tokio::signal::unix::signal(SignalKind::interrupt())` (Unix), integrated directly into the `select!` inside `event_loop`. This eliminates threading concerns between the `ctrlc` crate's signal handler and Tokio's internal signal infrastructure. + +--- + +### Phase 5 — Testing, Observability & Hardening + +#### 5.1 — Unit Tests Added for All Security-Critical Functions + +`src/server/handler.rs`, `src/server/mod.rs`, `src/config/loader.rs`, `src/console/dashboard.rs`, `src/tor/mod.rs` — `#[cfg(test)]` modules added to each file. Coverage includes: `percent_decode` (ASCII, spaces, multi-byte UTF-8, null bytes, incomplete sequences, invalid hex); `resolve_path` (normal file, directory traversal, encoded-slash traversal, missing file, missing root); `validate` (valid config, `site.directory` path traversal, absolute path, `logging.file` traversal, port 0, invalid IP, unknown field); `strip_timestamp` (ASCII line, multi-byte UTF-8 line, line with no brackets); `hsid_to_onion_address` (known test vector against reference implementation). + +#### 5.2 — Integration Tests Added for HTTP Server Core Flows + +`tests/http_integration.rs` (new) — Integration tests using `tokio::net::TcpStream` against a test server bound on port 0. Covers: `GET /index.html` → 200; `HEAD /index.html` → correct `Content-Length`, no body; `GET /` with `index_file` configured; `GET /../etc/passwd` → 403; request header > 8 KiB → 400; `GET /nonexistent.txt` → 404; `POST /index.html` → 400. + +#### 5.3 — Security Response Headers Added to All Responses + +`src/server/handler.rs` — All responses now include `X-Content-Type-Options: nosniff`, `X-Frame-Options: SAMEORIGIN`, `Referrer-Policy: no-referrer`, and `Permissions-Policy: camera=(), microphone=(), geolocation=()`. HTML responses additionally include `Content-Security-Policy: default-src 'self'` (configurable via `[server] content_security_policy` in `settings.toml`). The `Referrer-Policy: no-referrer` header is especially relevant for the Tor onion service: it prevents the `.onion` URL from leaking in the `Referer` header to any third-party resources loaded by served HTML. + +#### 5.4 — Accept Loop Error Handling Uses Exponential Backoff + +`src/server/mod.rs` — The accept loop previously retried immediately on error, producing thousands of log entries per second on persistent errors such as `EMFILE`. Errors now trigger exponential backoff (starting at 1 ms, doubling up to 1 second). `EMFILE` is logged at `error` level (operator intervention required); transient errors (`ECONNRESET`, `ECONNABORTED`) are logged at `debug`. The backoff counter resets on successful accept. + +#### 5.5 — CLI Arguments Added (`--config`, `--data-dir`, `--version`, `--help`) + +`src/main.rs`, `src/runtime/lifecycle.rs` — The binary now accepts `--config ` and `--data-dir ` to override the default config and data directory paths (previously inferred from `current_exe()`). `--version` prints the crate version and exits. `--help` prints a usage summary. These flags enable multi-instance deployments, systemd unit files with explicit paths, and CI test runs without relying on the working directory. + +#### 5.6 — `cargo deny check` Passes Cleanly; `audit.toml` Consolidated + +`deny.toml`, CI — `audit.toml` (which suppressed `RUSTSEC-2023-0071` without a documented rationale) has been removed. Advisory suppression is now managed exclusively in `deny.toml`, which carries the full justification. CI now runs `cargo deny check` as a required step, subsuming the advisory check. The existing rationale for `RUSTSEC-2023-0071` is unchanged: the `rsa` crate is used only for signature verification on Tor directory documents, not for decryption; the Marvin timing attack's threat model does not apply. + +--- + ## [0.1.0] — Initial Release ### HTTP Server diff --git a/Cargo.lock b/Cargo.lock index 1d89169..4e1ea7b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3060,9 +3060,11 @@ dependencies = [ "crossterm", "data-encoding", "futures", + "libc", "log", "serde", "sha3", + "tempfile", "thiserror 1.0.69", "tokio", "toml 0.8.23", diff --git a/Cargo.toml b/Cargo.toml index 09785aa..0180356 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,75 +10,65 @@ description = "Single-binary, zero-setup static site hosting appliance with Tor license = "MIT" authors = [] +[lib] +name = "rusthost" +path = "src/lib.rs" + [[bin]] name = "rusthost-cli" path = "src/main.rs" +# ─── Lint configuration ─────────────────────────────────────────────────────── +# +# clippy::all — every lint in the default set (correctness + style + perf) +# clippy::pedantic — stricter style + API lints; individual allows used where +# the rule conflicts with the module's documented design +# (e.g. too_many_arguments for the HTTP write_* stack which +# must mirror the HTTP/1.1 wire format). +[lints.rust] +unsafe_code = "forbid" + +[lints.clippy] +all = { level = "deny", priority = -1 } +pedantic = { level = "deny", priority = -1 } +# nursery lints warn but do not gate CI; they surface improvement candidates. +nursery = { level = "warn", priority = -1 } + [dependencies] -# Async runtime — drives the server, console, and all I/O -# 3.6 — Explicit feature list instead of "full"; removes unused `process` and -# `io-std` components, reducing binary size and compile time. -# Note: open_browser uses std::process::Command (stdlib), not the Tokio -# process feature, so dropping `process` here is safe. tokio = { version = "1", features = [ - "rt-multi-thread", # multi-threaded work-stealing executor - "net", # TcpListener / TcpStream - "io-util", # AsyncBufReadExt, AsyncWriteExt, copy - "fs", # tokio::fs::File, metadata - "sync", # Mutex, RwLock, Semaphore, oneshot, watch, mpsc - "time", # interval, sleep, timeout - "macros", # #[tokio::main], tokio::select! - "signal", # SIGINT / SIGTERM (used by ctrlc integration) + "rt-multi-thread", + "net", + "io-util", + "fs", + "sync", + "time", + "macros", + "signal", ] } -# ─── Arti (in-process Tor) ──────────────────────────────────────────────────── -# -# arti-client — the high-level Tor client API (bootstrap, launch onion service) -# tokio — use Tokio as the async runtime (default, required) -# native-tls — TLS backend for connecting to Tor relays (default) -# onion-service-service — enables hosting (serving) onion services -# -# tor-hsservice — lower-level onion service types: -# handle_rend_requests, OnionServiceConfigBuilder, StreamRequest -# -# futures — StreamExt trait for iterating over the stream of StreamRequests - arti-client = { version = "0.40", features = [ "tokio", "native-tls", "onion-service-service", ] } tor-hsservice = { version = "0.40" } -# tor-cell: needed to construct the Connected message passed to StreamRequest::accept() -tor-cell = { version = "0.40" } -futures = "0.3" +tor-cell = { version = "0.40" } +futures = "0.3" -# Onion-address encoding (used by the tor module to format HsId → ${base32}.onion) -# sha3: provides SHA3-256 for the v3 address checksum -# data-encoding: provides RFC 4648 base32 (no-padding, uppercase/lowercase) -sha3 = "0.10" +sha3 = "0.10" data-encoding = "2" +thiserror = "1" +serde = { version = "1", features = ["derive"] } +toml = "0.8" +log = "0.4" +crossterm = "0.27" +chrono = { version = "0.4", features = ["clock"] } +# OS error codes used in the accept-loop backoff to distinguish EMFILE/ENFILE +# (resource exhaustion → log error) from transient errors (log debug). +libc = "0.2" -# Typed error enum — derives Display + Error for AppError variants -thiserror = "1" - -# Configuration — TOML parsing and typed deserialization -serde = { version = "1", features = ["derive"] } -toml = "0.8" - -# Logging — standard facade used by all modules -log = "0.4" - -# Terminal — cross-platform raw mode, cursor control, key events -crossterm = "0.27" - -# Timestamps — used in log formatting -chrono = { version = "0.4", features = ["clock"] } - -# Signal handling — Ctrl-C and SIGTERM handled via tokio::signal (4.7) -# ctrlc crate removed: tokio's built-in signal support (feature "signal", already -# in the explicit feature list) eliminates the threading conflict between the ctrlc -# crate's OS-level signal handler and Tokio's internal signal infrastructure. +[dev-dependencies] +tempfile = "3" [profile.release] opt-level = 3 diff --git a/README.md b/README.md index fba9d64..3f27949 100644 --- a/README.md +++ b/README.md @@ -54,11 +54,19 @@ Drop the binary next to your site files, run it once, and you get: ### 🌐 HTTP Server - Built directly on `tokio::net::TcpListener` — no HTTP framework dependency - Handles `GET` and `HEAD` requests; concurrent connections via per-task Tokio workers -- Percent-decoded URL paths, query string & fragment stripping -- **Path traversal protection** — every path verified as a descendant of the site root via `canonicalize`; escapes rejected with `403 Forbidden` -- Configurable index file, optional HTML directory listings, and a built-in fallback page +- **Buffered request reading** via `tokio::io::BufReader` — headers read line-by-line, not byte-by-byte +- **File streaming** via `tokio::io::copy` — memory per connection is bounded by the socket buffer (~256 KB) regardless of file size +- **30-second request timeout** (configurable via `request_timeout_secs`); slow or idle connections receive `408 Request Timeout` +- **Semaphore-based connection limit** (configurable via `max_connections`, default 256) — excess connections queue at the OS backlog level rather than spawning unbounded tasks +- Percent-decoded URL paths with correct multi-byte UTF-8 handling; null bytes (`%00`) are never decoded +- Query string & fragment stripping before path resolution +- **Path traversal protection** — every path verified as a descendant of the site root via `canonicalize` (called once at startup, not per request); escapes rejected with `403 Forbidden` +- Configurable index file, optional HTML directory listing with fully HTML-escaped and URL-encoded filenames, and a built-in fallback page - Automatic port selection if the configured port is busy (up to 10 attempts) - Request header cap at 8 KiB; `Content-Type`, `Content-Length`, and `Connection: close` on every response +- **Security headers on every response**: `X-Content-Type-Options`, `X-Frame-Options`, `Referrer-Policy: no-referrer`, `Permissions-Policy`; configurable `Content-Security-Policy` on HTML responses +- **HEAD responses** include correct `Content-Length` but no body, as required by RFC 7231 §4.3.2 +- Accept loop uses **exponential backoff** on errors and distinguishes `EMFILE` (operator-level error) from transient errors (`ECONNRESET`, `ECONNABORTED`) ### 🧅 Tor Onion Service *(fully working)* - Embedded via [Arti](https://gitlab.torproject.org/tpo/core/arti) — the official Rust Tor client — in-process, no external daemon @@ -67,6 +75,9 @@ Drop the binary next to your site files, run it once, and you get: - First run fetches ~2 MB of directory data (~30 s); subsequent starts reuse the cache and are up in seconds - Onion address computed fully in-process using the v3 spec (SHA3-256 + base32) - Each inbound Tor connection is bridged to the local HTTP listener via `tokio::io::copy_bidirectional` +- **Port synchronised via `oneshot` channel** — the Tor subsystem always receives the actual bound port, eliminating a race condition that could cause silent connection failures +- **`TorStatus` reflects mid-session failures** — if the onion service stream terminates unexpectedly, the dashboard transitions to `FAILED (reason)` and clears the displayed `.onion` address +- Participates in **graceful shutdown** — the run loop watches the shutdown signal via `tokio::select!` and exits cleanly - Can be disabled entirely with `[tor] enabled = false` ### 🖥️ Interactive Terminal Dashboard @@ -81,18 +92,32 @@ Drop the binary next to your site files, run it once, and you get: | `R` | Reload site file count & size without restart | | `Q` | Graceful shutdown | +- **Skip-on-idle rendering** — the terminal is only written when the rendered output changes, eliminating unnecessary writes on quiet servers +- `TorStatus::Failed` displays a human-readable reason string (e.g. `FAILED (stream ended)`) rather than a bare error indicator +- Keyboard input task failure is detected and reported; the process remains killable via Ctrl-C +- **Terminal fully restored on all exit paths** — panic hook and error handler both call `console::cleanup()` before exiting, ensuring `LeaveAlternateScreen`, `cursor::Show`, and `disable_raw_mode` always run - Configurable refresh rate (default 500 ms); headless mode available for `systemd` / piped deployments ### ⚙️ Configuration - TOML file at `rusthost-data/settings.toml`, auto-generated with inline comments on first run - Six sections: `[server]`, `[site]`, `[tor]`, `[logging]`, `[console]`, `[identity]` +- **`#[serde(deny_unknown_fields)]`** on all structs — typos in key names are rejected at startup with a clear error +- **Typed config fields** — `bind` is `IpAddr`, `log level` is a `LogLevel` enum; invalid values are caught at deserialisation time - Startup validation with clear, multi-error messages — nothing starts until config is clean +- Config and data directory paths overridable via **`--config `** and **`--data-dir `** CLI flags ### 📝 Logging - Custom `log::Log` implementation; dual output — append-mode log file + in-memory ring buffer (1 000 lines) - Ring buffer feeds the dashboard log view with zero file I/O per render tick +- **Dependency log filtering** — Arti and Tokio internals at `Info` and below are suppressed by default, keeping the log focused on application events (configurable via `filter_dependencies`) +- Log file explicitly flushed on graceful shutdown - Configurable level (`trace` → `error`) and optional full disable for minimal-overhead deployments +### 🧪 Testing & CI +- Unit tests for all security-critical functions: `percent_decode`, `resolve_path`, `validate`, `strip_timestamp`, `hsid_to_onion_address` +- Integration tests (`tests/http_integration.rs`) covering all HTTP core flows via raw `TcpStream` +- `cargo deny check` runs in CI, enforcing the SPDX license allowlist and advisory database; `audit.toml` consolidated into `deny.toml` + --- ## Quick Start @@ -134,37 +159,53 @@ rusthost-data/ The dashboard appears. Your site is live on `http://localhost:8080`. Tor bootstraps in the background — your `.onion` address appears in the **Endpoints** panel once ready (~30 s on first run). +### CLI flags + +``` +rusthost [OPTIONS] + +Options: + --config Path to settings.toml (default: rusthost-data/settings.toml) + --data-dir Path to data directory (default: rusthost-data/ next to binary) + --version Print version and exit + --help Print this help and exit +``` + --- ## Configuration Reference ```toml [server] -port = 8080 -bind = "127.0.0.1" # set "0.0.0.0" to expose on LAN (logs a warning) -index_file = "index.html" -directory_listing = false -auto_port_fallback = true +port = 8080 +bind = "127.0.0.1" # set "0.0.0.0" to expose on LAN (logs a warning) +index_file = "index.html" +directory_listing = false +auto_port_fallback = true +max_connections = 256 # semaphore cap on concurrent connections +request_timeout_secs = 30 # seconds before idle connection receives 408 +content_security_policy = "default-src 'self'" # applied to HTML responses only [site] root = "rusthost-data/site" [tor] -enabled = true # set false to skip Tor entirely +enabled = true # set false to skip Tor entirely [logging] -enabled = true -level = "info" # trace | debug | info | warn | error -path = "logs/rusthost.log" +enabled = true +level = "info" # trace | debug | info | warn | error +path = "logs/rusthost.log" +filter_dependencies = true # suppress Arti/Tokio noise at info and below [console] -interactive = true # false for systemd / piped deployments -refresh_ms = 500 # minimum 100 -show_timestamps = false +interactive = true # false for systemd / piped deployments +refresh_ms = 500 # minimum 100 +show_timestamps = false open_browser_on_start = false [identity] -name = "RustHost" # 1–32 chars, shown in dashboard header +name = "RustHost" # 1–32 chars, shown in dashboard header ``` --- @@ -205,7 +246,9 @@ Unknown extensions fall back to `application/octet-stream`. All subsystems share state through `Arc>`. Hot-path request and error counters use a separate `Arc` backed by atomics — the HTTP handler **never acquires a lock per request**. -Shutdown is coordinated via a `watch` channel: `[Q]`, `SIGINT`, or `SIGTERM` signals all subsystems simultaneously, waits 300 ms for in-flight connections, then exits. The Tor client is dropped naturally with the Tokio runtime — no explicit kill step needed. +The HTTP server and Tor subsystem share a `tokio::sync::Semaphore` that caps concurrent connections. The bound port is communicated to Tor via a `oneshot` channel before the accept loop begins, eliminating the startup race condition present in earlier versions. + +Shutdown is coordinated via a `watch` channel: `[Q]`, `SIGINT`, or `SIGTERM` signals all subsystems simultaneously. In-flight HTTP connections are tracked in a `JoinSet` and given up to 5 seconds to complete. The log file is explicitly flushed before the process exits. --- @@ -213,11 +256,23 @@ Shutdown is coordinated via a `watch` channel: `[Q]`, `SIGINT`, or `SIGTERM` sig | Concern | Mitigation | |---------|-----------| -| Path traversal | `std::fs::canonicalize` + descendant check; returns `403` on escape | +| Path traversal (requests) | `std::fs::canonicalize` + descendant check per request; `403` on escape | +| Path traversal (config) | `site.directory` and `logging.file` validated against `..`, absolute paths, and path separators at startup | +| Directory listing XSS | Filenames HTML-entity-escaped in link text; percent-encoded in `href` attributes | | Header overflow | 8 KiB hard cap; oversized requests rejected immediately | +| Slow-loris DoS | 30-second request timeout; `408` sent on expiry | +| Connection exhaustion | Semaphore cap (default 256); excess connections queue at OS level | +| Memory exhaustion (large files) | Files streamed via `tokio::io::copy`; per-connection memory bounded by socket buffer | | Bind exposure | Defaults to loopback (`127.0.0.1`); warns loudly on `0.0.0.0` | +| ANSI/terminal injection | `instance_name` validated against all control characters (`is_control`) at startup | +| Security response headers | `X-Content-Type-Options`, `X-Frame-Options`, `Referrer-Policy: no-referrer`, `Permissions-Policy`, configurable `Content-Security-Policy` | +| `.onion` URL leakage | `Referrer-Policy: no-referrer` prevents the `.onion` address from appearing in `Referer` headers sent to third-party resources | +| Tor port race | Bound port delivered to Tor via `oneshot` channel before accept loop starts | +| Silent Tor failure | `TorStatus` transitions to `Failed(reason)` and onion address is cleared when the service stream ends | +| Percent-decode correctness | Multi-byte UTF-8 sequences decoded correctly; null bytes (`%00`) never decoded | +| Config typos | `#[serde(deny_unknown_fields)]` on all structs | | License compliance | `cargo-deny` enforces SPDX allowlist at CI time | -| [RUSTSEC-2023-0071](https://rustsec.org/advisories/RUSTSEC-2023-0071) | Suppressed with rationale: the `rsa` crate is a transitive dep of `arti-client` used **only** for signature *verification* on Tor directory documents — the Marvin timing attack's threat model (decryption oracle) does not apply | +| [RUSTSEC-2023-0071](https://rustsec.org/advisories/RUSTSEC-2023-0071) | Suppressed with rationale in `deny.toml`: the `rsa` crate is a transitive dep of `arti-client` used **only** for signature *verification* on Tor directory documents — the Marvin timing attack's threat model (decryption oracle) does not apply | --- diff --git a/src/config/defaults.rs b/src/config/defaults.rs index 6c09c81..dedecc2 100644 --- a/src/config/defaults.rs +++ b/src/config/defaults.rs @@ -31,6 +31,13 @@ open_browser_on_start = false # at the OS TCP backlog level rather than spawning unbounded tasks. max_connections = 256 +# Content-Security-Policy value sent with every HTML response. +# The default restricts all content to the same origin. +# Relax this if you serve fonts, analytics scripts, or other third-party +# resources from a CDN, for example: +# content_security_policy = "default-src 'self'; script-src 'self' cdn.example.com" +content_security_policy = "default-src 'self'" + # ─── [site] ─────────────────────────────────────────────────────────────────── [site] @@ -102,6 +109,13 @@ show_timestamps = false instance_name = "RustHost" "#; +/// Write the default `settings.toml` to `path`, creating parent directories +/// as needed. +/// +/// # Errors +/// +/// Returns [`crate::AppError::Io`] if the parent directory cannot be created +/// or the file cannot be written. pub fn write_default_config(path: &Path) -> Result<()> { if let Some(parent) = path.parent() { std::fs::create_dir_all(parent)?; diff --git a/src/config/loader.rs b/src/config/loader.rs index a02a717..7594840 100644 --- a/src/config/loader.rs +++ b/src/config/loader.rs @@ -7,6 +7,14 @@ use std::path::Path; use super::Config; use crate::{AppError, Result}; +/// Load and validate the configuration from `path`. +/// +/// # Errors +/// +/// Returns [`AppError::ConfigLoad`] if the file cannot be read or is +/// malformed TOML, or [`AppError::ConfigValidation`] if any field fails +/// semantic validation. +#[must_use = "the loaded Config must be used to start the server"] pub fn load(path: &Path) -> Result { let raw = std::fs::read_to_string(path) .map_err(|e| AppError::ConfigLoad(format!("Cannot read {}: {e}", path.display())))?; @@ -92,3 +100,138 @@ fn validate(cfg: &Config) -> Result<()> { Err(AppError::ConfigValidation(errors)) } } + +// ─── Tests ──────────────────────────────────────────────────────────────────── + +#[cfg(test)] +mod tests { + use super::{validate, Config}; + use crate::AppError; + + // Helper: a valid config derived from the Default impl. + fn valid() -> Config { + Config::default() + } + + // ── validate — happy path ──────────────────────────────────────────────── + + #[test] + fn validate_valid_config_returns_ok() { + assert!(validate(&valid()).is_ok()); + } + + // ── validate — [site] directory ───────────────────────────────────────── + + #[test] + fn validate_site_directory_relative_traversal() { + let mut cfg = valid(); + cfg.site.directory = "../../etc".into(); + let result = validate(&cfg); + assert!( + matches!(&result, Err(AppError::ConfigValidation(e)) if e.iter().any(|s| s.contains("[site] directory"))), + "expected ConfigValidation error with '[site] directory', got: {result:?}" + ); + } + + #[test] + fn validate_site_directory_absolute_path() { + let mut cfg = valid(); + cfg.site.directory = "/etc".into(); + let result = validate(&cfg); + assert!( + matches!(&result, Err(AppError::ConfigValidation(e)) if e.iter().any(|s| s.contains("[site] directory"))), + "expected ConfigValidation error with '[site] directory', got: {result:?}" + ); + } + + // ── validate — [logging] file ──────────────────────────────────────────── + + #[test] + fn validate_logging_file_traversal() { + let mut cfg = valid(); + cfg.logging.file = "../../.bashrc".into(); + let result = validate(&cfg); + assert!( + matches!(&result, Err(AppError::ConfigValidation(e)) if e.iter().any(|s| s.contains("[logging] file"))), + "expected ConfigValidation error with '[logging] file', got: {result:?}" + ); + } + + // ── validate — port ────────────────────────────────────────────────────── + // + // Port 0 is already rejected by `NonZeroU16` at the serde layer (fix 4.2), + // so we verify that serde rejects a zero value rather than testing via + // `validate()` (which only receives already-parsed configs). + + #[test] + fn config_rejects_port_zero_at_parse_time() { + let toml_str = make_full_toml("port = 0"); + let result = toml::from_str::(&toml_str); + assert!(result.is_err(), "expected serde error for port = 0"); + } + + // ── validate — bind address ────────────────────────────────────────────── + + #[test] + fn config_rejects_invalid_bind_address_at_parse_time() { + let toml_str = make_full_toml("bind = \"not.an.ip\""); + let result = toml::from_str::(&toml_str); + assert!( + result.is_err(), + "expected serde error for invalid bind address" + ); + } + + // ── validate — deny_unknown_fields (fix 2.5) ───────────────────────────── + + #[test] + fn config_rejects_unknown_fields_at_parse_time() { + // Insert an unrecognised key into [server] — must be rejected by serde + // because all Config structs carry `#[serde(deny_unknown_fields)]`. + let toml_str = make_full_toml("completely_unknown_key = true"); + let result = toml::from_str::(&toml_str); + assert!(result.is_err(), "expected serde error for unknown field"); + } + + // ── Helpers ────────────────────────────────────────────────────────────── + + /// Build a complete, valid TOML document with `extra` injected into + /// the `[server]` section, so individual field-level tests can be + /// expressed as minimal one-liner overrides. + fn make_full_toml(extra: &str) -> String { + format!( + r#" +[server] +port = 8080 +bind = "127.0.0.1" +auto_port_fallback = true +open_browser_on_start = false +max_connections = 256 +content_security_policy = "default-src 'self'" +{extra} + +[site] +directory = "site" +index_file = "index.html" +enable_directory_listing = false + +[tor] +enabled = false + +[logging] +enabled = true +level = "info" +file = "logs/rusthost.log" +filter_dependencies = true + +[console] +interactive = false +refresh_rate_ms = 500 +show_timestamps = false + +[identity] +instance_name = "Test" +"# + ) + } +} diff --git a/src/config/mod.rs b/src/config/mod.rs index deb99bb..a82a384 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -89,6 +89,23 @@ pub struct ServerConfig { pub auto_port_fallback: bool, pub open_browser_on_start: bool, pub max_connections: u32, + + /// Value of the `Content-Security-Policy` header sent with every HTML + /// response (task 5.3). The default `"default-src 'self'"` restricts all + /// content to the same origin. + /// + /// Operators serving CDN fonts, analytics scripts, or other third-party + /// resources can relax this without touching source code, e.g.: + /// + /// ```toml + /// [server] + /// content_security_policy = "default-src 'self'; script-src 'self' cdn.example.com" + /// ``` + /// + /// **Tor note:** `Referrer-Policy: no-referrer` is always sent regardless + /// of this setting, preventing the `.onion` URL from leaking to any + /// third-party origin referenced in served HTML. + pub content_security_policy: String, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -166,6 +183,7 @@ impl Default for Config { auto_port_fallback: true, open_browser_on_start: false, max_connections: 256, + content_security_policy: "default-src 'self'".into(), }, site: SiteConfig { directory: "site".into(), diff --git a/src/console/dashboard.rs b/src/console/dashboard.rs index 0cedb66..23f8964 100644 --- a/src/console/dashboard.rs +++ b/src/console/dashboard.rs @@ -32,6 +32,7 @@ const RULE: &str = "──────────────────── // ─── Dashboard ─────────────────────────────────────────────────────────────── +#[must_use] pub fn render_dashboard(state: &AppState, requests: u64, errors: u64, config: &Config) -> String { let mut out = String::with_capacity(1_024); @@ -115,6 +116,7 @@ pub fn render_dashboard(state: &AppState, requests: u64, errors: u64, config: &C // ─── Log view ──────────────────────────────────────────────────────────────── +#[must_use] pub fn render_log_view(show_timestamps: bool) -> String { let lines = logging::recent_lines(40); @@ -142,6 +144,7 @@ pub fn render_log_view(show_timestamps: bool) -> String { // ─── Help ──────────────────────────────────────────────────────────────────── +#[must_use] pub fn render_help() -> String { let mut out = String::with_capacity(512); let _ = writeln!(out, "{RULE}\r"); @@ -178,3 +181,45 @@ fn strip_timestamp(line: &str) -> &str { parts.next(); // consume "[timestamp" parts.next().map_or(line, str::trim_start) } + +// ─── Unit tests ─────────────────────────────────────────────────────────────── + +#[cfg(test)] +mod tests { + use super::strip_timestamp; + + #[test] + fn strip_timestamp_ascii_log_line() { + // A typical log line: "[INFO][2024-01-01 12:00:00] message body" + // strip_timestamp should consume the two bracketed tokens and return + // " message body" with leading whitespace trimmed. + let line = "[INFO][2024-01-01 12:00:00] message body"; + assert_eq!(strip_timestamp(line), "message body"); + } + + #[test] + fn strip_timestamp_multibyte_utf8_does_not_panic() { + // A log line whose payload contains a multi-byte UTF-8 character. + // splitn(3, ']') must not slice at a non-character boundary. + let line = "[INFO][2024-01-01 12:00:00] café au lait"; + let result = strip_timestamp(line); + // Must not panic; the multi-byte payload must be preserved. + assert_eq!(result, "café au lait"); + } + + #[test] + fn strip_timestamp_no_brackets_returns_original() { + // When there is no `]` delimiter at all the function must return the + // original line unchanged rather than an empty string. + let line = "bare message without any brackets"; + assert_eq!(strip_timestamp(line), line); + } + + #[test] + fn strip_timestamp_only_one_bracket_pair_returns_original() { + // Only a single `]` present — the second `parts.next()` consumes None, + // so the third `parts.next()` also returns None → fall back to `line`. + let line = "[INFO] single bracket only"; + assert_eq!(strip_timestamp(line), line); + } +} diff --git a/src/console/mod.rs b/src/console/mod.rs index edea0cd..ed1b4bc 100644 --- a/src/console/mod.rs +++ b/src/console/mod.rs @@ -54,6 +54,11 @@ static RAW_MODE_ACTIVE: std::sync::atomic::AtomicBool = std::sync::atomic::Atomi /// reads from this channel to dispatch events. /// /// Note: not `async` — all awaits live inside the spawned tasks. +/// +/// # Errors +/// +/// Returns [`AppError::Console`] if the terminal cannot be put into raw mode +/// or the alternate screen cannot be entered. pub fn start( config: Arc, state: SharedState, diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..c29a453 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,22 @@ +//! # rusthost — library crate +//! +//! Exposes all subsystem modules so that integration tests in `tests/` can +//! import them directly. The binary entry point (`src/main.rs`) is a thin +//! wrapper that calls [`runtime::lifecycle::run`]. + +pub mod config; +pub mod console; +pub mod error; +pub mod logging; +pub mod runtime; +pub mod server; +pub mod tor; + +pub use error::AppError; + +/// Common `Result` alias used throughout the codebase. +/// +/// The error type is [`AppError`] — a typed enum covering every failure mode. +/// All subsystems return this type so callers can match on specific variants +/// rather than inspecting an opaque `Box` string. +pub type Result = std::result::Result; diff --git a/src/logging/mod.rs b/src/logging/mod.rs index 10fd7f4..53f19b8 100644 --- a/src/logging/mod.rs +++ b/src/logging/mod.rs @@ -134,6 +134,12 @@ pub fn flush() { /// /// Opens the log file in append mode (creating parent dirs as needed), /// registers the logger, and initialises the ring buffer. +/// +/// # Errors +/// +/// Returns [`AppError::Io`] if the log file's parent directory cannot be +/// created, or [`AppError::LogInit`] if the logger is already initialised or +/// cannot be registered with the `log` facade. pub fn init(config: &LoggingConfig, data_dir: &Path) -> Result<()> { LOG_BUFFER.get_or_init(|| Mutex::new(VecDeque::with_capacity(1_000))); diff --git a/src/main.rs b/src/main.rs index 909cba5..4a39e51 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,40 +1,104 @@ -//! # rusthost +//! # rusthost-cli //! -//! **Directory:** `src/` +//! Binary entry point. Parses CLI arguments then delegates all startup logic +//! to [`rusthost::runtime::lifecycle::run`]. //! -//! Entry point. Declares all modules and launches the Tokio async runtime. -//! All startup logic lives in [`runtime::lifecycle::run`]; this file -//! only boots the executor and propagates fatal errors to the OS. +//! ## Flags +//! +//! ```text +//! --config Override the path to settings.toml +//! --data-dir Override the data-directory root +//! --version Print the crate version and exit +//! --help Print usage and exit +//! ``` + +use std::path::PathBuf; -mod config; -mod console; -mod error; -mod logging; -mod runtime; -mod server; -mod tor; +use rusthost::runtime::lifecycle::CliArgs; -pub use error::AppError; +// ─── CLI argument parsing ───────────────────────────────────────────────────── -/// Common `Result` alias used throughout the codebase. +/// Parse `std::env::args()` into a [`CliArgs`] value. /// -/// The error type is [`AppError`] — a typed enum covering every failure mode. -/// All subsystems return this type so callers can match on specific variants -/// rather than inspecting an opaque `Box` string. -pub type Result = std::result::Result; +/// Exits the process immediately for `--version`, `--help`, and unrecognised +/// flags so that the async runtime is never started unnecessarily. +fn parse_args() -> CliArgs { + let mut config_path: Option = None; + let mut data_dir: Option = None; + let mut args = std::env::args().skip(1); + + while let Some(flag) = args.next() { + match flag.as_str() { + "--version" | "-V" => { + println!("rusthost {}", env!("CARGO_PKG_VERSION")); + std::process::exit(0); + } + "--help" | "-h" => { + print_help(); + std::process::exit(0); + } + "--config" => { + config_path = Some(PathBuf::from(args.next().unwrap_or_else(|| { + eprintln!("error: --config requires a argument"); + std::process::exit(1); + }))); + } + "--data-dir" => { + data_dir = Some(PathBuf::from(args.next().unwrap_or_else(|| { + eprintln!("error: --data-dir requires a argument"); + std::process::exit(1); + }))); + } + unknown => { + eprintln!("error: unrecognised argument '{unknown}'"); + eprintln!(" Run with --help for usage information."); + std::process::exit(1); + } + } + } + + CliArgs { + config_path, + data_dir, + } +} + +fn print_help() { + println!( + "rusthost {ver} +{desc} + +USAGE: + rusthost-cli [OPTIONS] + +OPTIONS: + --config Override the path to settings.toml + (default: /rusthost-data/settings.toml) + --data-dir Override the data-directory root + (default: /rusthost-data/) + --version Print version and exit + --help Print this message and exit", + ver = env!("CARGO_PKG_VERSION"), + desc = env!("CARGO_PKG_DESCRIPTION"), + ); +} + +// ─── Entry point ───────────────────────────────────────────────────────────── #[tokio::main] async fn main() { // Register a panic hook so the terminal is always restored, even when a // panic fires on an async executor thread. std::panic::set_hook(Box::new(|info| { - console::cleanup(); + rusthost::console::cleanup(); eprintln!("\nPanic: {info}"); })); - if let Err(err) = runtime::lifecycle::run().await { + let args = parse_args(); + + if let Err(err) = rusthost::runtime::lifecycle::run(args).await { // Best-effort terminal restore in case we crashed inside the console. - console::cleanup(); + rusthost::console::cleanup(); eprintln!("\nFatal error: {err}"); std::process::exit(1); } diff --git a/src/runtime/events.rs b/src/runtime/events.rs index acfb339..bfe7582 100644 --- a/src/runtime/events.rs +++ b/src/runtime/events.rs @@ -20,6 +20,16 @@ pub enum KeyEvent { Other, } +/// Dispatch a single key event, mutating shared state as needed. +/// +/// Returns `true` when the event is [`KeyEvent::Quit`] (the caller should +/// begin graceful shutdown), or `false` for all other events. +/// +/// # Errors +/// +/// Returns [`AppError`] if a site rescan (`KeyEvent::Reload`) fails to spawn +/// a blocking task or if a browser-open (`KeyEvent::Open`) returns an I/O +/// error. pub async fn handle( event: KeyEvent, config: &Config, diff --git a/src/runtime/lifecycle.rs b/src/runtime/lifecycle.rs index eba1e39..67b6766 100644 --- a/src/runtime/lifecycle.rs +++ b/src/runtime/lifecycle.rs @@ -7,6 +7,12 @@ //! instructions, and exits cleanly. //! 2. **Normal run** — loads config, starts every subsystem, enters the //! event dispatch loop, then shuts down gracefully. +//! +//! ## CLI override support (5.5) +//! +//! [`CliArgs`] carries optional path overrides from `--config` and `--data-dir`. +//! When absent the original defaults (relative to `current_exe()`) are used, +//! preserving backward compatibility for zero-argument invocations. use std::{ path::{Path, PathBuf}, @@ -26,31 +32,56 @@ use crate::{ server, tor, AppError, Result, }; +// ─── Public types ───────────────────────────────────────────────────────────── + +/// CLI-supplied path overrides. Both fields default to `None`, which causes +/// [`run`] to fall back to the standard paths relative to `current_exe()`. +#[derive(Debug, Default)] +pub struct CliArgs { + /// Explicit path to `settings.toml`; overrides the default derived from + /// `data_dir`. + pub config_path: Option, + /// Explicit data-directory root; overrides `/rusthost-data/`. + pub data_dir: Option, +} + // ─── Entry point ───────────────────────────────────────────────────────────── -// -// 4.4 — `data_dir()` and `settings_path()` are no longer free functions. -// The data directory is computed exactly once at the top of `run()` and -// threaded through every call that needs it as an explicit parameter. -// This removes the hidden `current_exe()` dependency from all internal -// functions and makes the path injectable (e.g. a tmp dir in tests). - -pub async fn run() -> Result<()> { - // 4.4 — single source of truth; computed here, nowhere else. - let exe = std::env::current_exe().unwrap_or_else(|_| PathBuf::from(".")); - let data_dir = exe - .parent() - .unwrap_or_else(|| Path::new(".")) - .join("rusthost-data"); - let settings_path = data_dir.join("settings.toml"); + +/// Entry point for the `rusthost-cli` binary. +/// +/// Computes the data-directory and settings path (honouring any overrides in +/// `args`), then either performs first-run setup or starts the full server. +/// +/// # Errors +/// +/// Returns an [`AppError`] if the config cannot be loaded, logging cannot be +/// initialised, or any other fatal startup condition occurs. +/// Path overrides are supplied via [`CliArgs`]. +pub async fn run(args: CliArgs) -> Result<()> { + // 4.4 + 5.5 — data_dir is computed exactly once and threaded everywhere. + // A CLI override takes precedence; the default is relative to current_exe(). + let data_dir = args.data_dir.unwrap_or_else(default_data_dir); + + let settings_path = args + .config_path + .unwrap_or_else(|| data_dir.join("settings.toml")); if settings_path.exists() { - normal_run(data_dir).await?; + normal_run(data_dir, &settings_path).await?; } else { first_run_setup(&data_dir, &settings_path)?; } Ok(()) } +/// Compute the default data directory (`/rusthost-data/`). +fn default_data_dir() -> PathBuf { + let exe = std::env::current_exe().unwrap_or_else(|_| PathBuf::from(".")); + exe.parent() + .unwrap_or_else(|| Path::new(".")) + .join("rusthost-data") +} + // ─── First Run ─────────────────────────────────────────────────────────────── fn first_run_setup(data_dir: &Path, settings_path: &Path) -> Result<()> { @@ -81,11 +112,9 @@ fn first_run_setup(data_dir: &Path, settings_path: &Path) -> Result<()> { // ─── Normal Run ────────────────────────────────────────────────────────────── -async fn normal_run(data_dir: PathBuf) -> Result<()> { - let settings_path = data_dir.join("settings.toml"); - +async fn normal_run(data_dir: PathBuf, settings_path: &Path) -> Result<()> { // 1. Load and validate config. - let config = Arc::new(config::loader::load(&settings_path)?); + let config = Arc::new(config::loader::load(settings_path)?); // 2. Initialise logging. logging::init(&config.logging, &data_dir)?; @@ -185,10 +214,6 @@ async fn normal_run(data_dir: PathBuf) -> Result<()> { } // 11. Event dispatch loop. - // 4.7 — tokio::signal::ctrl_c() replaces the ctrlc crate. The signal - // future is passed into event_loop and used directly in select!, - // eliminating the threading conflict between the ctrlc crate's - // OS-level handler and Tokio's internal signal infrastructure. event_loop(key_rx, &config, &state, &metrics, data_dir).await?; // 12. Graceful shutdown. @@ -196,10 +221,8 @@ async fn normal_run(data_dir: PathBuf) -> Result<()> { let _ = shutdown_tx.send(true); // 2.10 — wait for the HTTP server to drain in-flight connections (max 5 s). - // tor::kill() has been removed — Tor exits when its task detects shutdown_rx. let _ = tokio::time::timeout(Duration::from_secs(5), server_handle).await; - // 2.11 — write the final log entry, flush to disk, then restore the terminal. log::info!("RustHost shut down cleanly."); logging::flush(); console::cleanup(); @@ -217,7 +240,7 @@ fn spawn_server( metrics: &SharedMetrics, shutdown: &watch::Receiver, port_tx: oneshot::Sender, - data_dir: PathBuf, // 3.1/4.4 — caller owns data_dir; no current_exe() here + data_dir: PathBuf, ) -> tokio::task::JoinHandle<()> { let cfg = Arc::clone(config); let st = Arc::clone(state); @@ -255,7 +278,7 @@ async fn event_loop( config: &Arc, state: &SharedState, metrics: &SharedMetrics, - data_dir: PathBuf, // 3.1/4.4 — pre-computed by normal_run; not recomputed per event + data_dir: PathBuf, ) -> Result<()> { // 2.8 — mutable so we can set to None when the channel closes. let mut key_rx = key_rx; @@ -268,12 +291,10 @@ async fn event_loop( loop { // Build a future that yields the next key, or pends forever once the // channel closes (avoids repeated None-match after input task death). - // When the channel closes we log once and park this arm (2.8). let key_fut = async { if let Some(rx) = key_rx.as_mut() { rx.recv().await } else { - // Channel already closed — pend forever so ctrl_c can still fire. std::future::pending().await } }; @@ -286,7 +307,7 @@ async fn event_loop( config, Arc::clone(state), Arc::clone(metrics), - data_dir.clone(), // 3.1 — cheap PathBuf clone, no syscall + data_dir.clone(), ).await?; if quit { break; } } else { @@ -295,7 +316,7 @@ async fn event_loop( "Console input task exited — keyboard input disabled. \ Use Ctrl-C to quit." ); - key_rx = None; // suppress repeated warnings on next select! iteration + key_rx = None; } } // 4.7 — Ctrl-C handled directly through Tokio's signal machinery. @@ -310,9 +331,6 @@ async fn event_loop( Ok(()) } -// open_browser removed from this file — canonical definition is in -// crate::runtime (src/runtime/mod.rs), called via super::open_browser (fix 2.4). - // ─── Placeholder HTML ──────────────────────────────────────────────────────── const PLACEHOLDER_HTML: &str = r#" diff --git a/src/runtime/state.rs b/src/runtime/state.rs index d977971..5cb3e6e 100644 --- a/src/runtime/state.rs +++ b/src/runtime/state.rs @@ -81,6 +81,7 @@ pub struct AppState { } impl AppState { + #[must_use] pub const fn new() -> Self { Self { actual_port: 0, @@ -110,6 +111,7 @@ pub struct Metrics { } impl Metrics { + #[must_use] pub const fn new() -> Self { Self { requests: AtomicU64::new(0), @@ -142,6 +144,7 @@ impl Default for Metrics { // ─── Helpers ──────────────────────────────────────────────────────────────── #[allow(clippy::cast_precision_loss)] +#[must_use] pub fn format_bytes(bytes: u64) -> String { const KB: u64 = 1_024; const MB: u64 = 1_024 * KB; diff --git a/src/server/handler.rs b/src/server/handler.rs index 6bb9fd0..8affdb0 100644 --- a/src/server/handler.rs +++ b/src/server/handler.rs @@ -10,6 +10,8 @@ //! configured site root via [`std::fs::canonicalize`]. Any attempt to //! escape (e.g. `/../secret`) is rejected with HTTP 403. +#![allow(clippy::too_many_arguments)] // HTTP write_* fns mirror the wire format + use std::{fmt::Write as _, path::Path, sync::Arc}; use tokio::{ @@ -23,19 +25,42 @@ use crate::{runtime::state::SharedMetrics, Result}; // ─── Entry point ───────────────────────────────────────────────────────────── -/// Handle one HTTP connection to completion. -pub async fn handle( +/// Outcome of the initial request-reading phase. +/// +/// Returned by [`receive_request`] to communicate what happened to the caller +/// without requiring `handle` to inspect raw error kinds directly. +enum RequestOutcome { + /// The request was read and parsed successfully. + /// + /// Carries the raw header block, the recovered stream, and the boolean + /// `is_head` flag derived from the method. + Ready { + is_head: bool, + raw_path: String, + stream: TcpStream, + }, + /// A complete error response has already been written to the stream. + /// `handle` should return `Ok(())` immediately. + Responded, +} + +/// Read and parse one HTTP request from `stream`, returning [`RequestOutcome`]. +/// +/// Handles the 30-second slow-loris timeout, the 8 KiB header limit, and +/// method validation, writing the appropriate error response in each failure +/// case so that `handle` stays focused on routing. +/// +/// # Errors +/// +/// Propagates I/O errors from writing error responses (e.g. `400`, `408`). +async fn receive_request( stream: TcpStream, - canonical_root: Arc, // 3.2 — pre-canonicalized (2.3); Arc avoids per-connection alloc - index_file: Arc, // 3.2 — Arc clone is O(1) - dir_listing: bool, - metrics: SharedMetrics, -) -> Result<()> { - // 2.1 — wrap in BufReader so read_request uses read_line (one syscall per - // line) rather than reading one byte at a time (up to 8192 syscalls). + csp: &str, + metrics: &SharedMetrics, +) -> Result { let mut reader = BufReader::new(stream); - // 1.5 — Wrap read_request in a 30-second timeout to prevent slow-loris DoS. + // 1.5 — 30-second timeout prevents slow-loris DoS. let request = match timeout( std::time::Duration::from_secs(30), read_request(&mut reader), @@ -43,11 +68,25 @@ pub async fn handle( .await { Ok(Ok(r)) => r, - Ok(Err(e)) => return Err(e), + Ok(Err(e)) => { + // 5.2 — Send 400 on any read failure (oversized headers, reset, etc.) + log::debug!("Failed to read request headers: {e}"); + let mut stream = reader.into_inner(); + write_response( + &mut stream, + 400, + "Bad Request", + "text/plain", + b"Bad Request", + false, + csp, + ) + .await?; + metrics.add_error(); + return Ok(RequestOutcome::Responded); + } Err(_elapsed) => { - // Client held the connection open without completing a request. log::debug!("Request timeout — sending 408"); - // Recover the stream from the reader for writing. let mut stream = reader.into_inner(); write_response( &mut stream, @@ -56,18 +95,17 @@ pub async fn handle( "text/plain", b"Request Timeout", false, + csp, ) .await?; - return Ok(()); + return Ok(RequestOutcome::Responded); } }; - // Recover the TcpStream from the BufReader for writing the response. let mut stream = reader.into_inner(); - // 1.4 — parse_path now returns (method, path) so we can suppress the body - // on HEAD responses. - let Some((method, raw_path)) = parse_path(&request) else { + // 1.4 — parse_path extracts (method, path); returns None for non-GET/HEAD. + let Some((method, raw_path_ref)) = parse_path(&request) else { write_response( &mut stream, 400, @@ -75,13 +113,43 @@ pub async fn handle( "text/plain", b"Bad Request", false, + csp, ) .await?; metrics.add_error(); - return Ok(()); + return Ok(RequestOutcome::Responded); }; - let is_head = method == "HEAD"; + Ok(RequestOutcome::Ready { + is_head: method == "HEAD", + raw_path: raw_path_ref.to_owned(), + stream, + }) +} + +/// Handle one HTTP connection to completion. +/// +/// # Errors +/// +/// Propagates I/O errors from writing response headers or body. Read errors +/// (e.g. connection reset during header read) are converted to a `400 Bad +/// Request` response rather than being surfaced as errors. +pub async fn handle( + stream: TcpStream, + canonical_root: Arc, // 3.2 — pre-canonicalized (2.3); Arc avoids per-connection alloc + index_file: Arc, // 3.2 — Arc clone is O(1) + dir_listing: bool, + metrics: SharedMetrics, + csp: Arc, // 5.3 — Content-Security-Policy, applied to HTML responses only +) -> Result<()> { + let RequestOutcome::Ready { + is_head, + raw_path, + mut stream, + } = receive_request(stream, &csp, &metrics).await? + else { + return Ok(()); + }; // Strip query string / fragment then percent-decode. let path_only = raw_path.split('?').next().unwrap_or("/"); @@ -89,9 +157,21 @@ pub async fn handle( match resolve_path(&canonical_root, &decoded, &index_file, dir_listing) { Resolved::File(abs_path) => { - serve_file(&mut stream, &abs_path, is_head, &metrics).await?; + serve_file(&mut stream, &abs_path, is_head, &metrics, &csp).await?; + } + Resolved::NotFound => { + write_response( + &mut stream, + 404, + "Not Found", + "text/plain", + b"Not Found", + false, + &csp, + ) + .await?; + metrics.add_error(); } - Resolved::Fallback => { write_response( &mut stream, @@ -100,11 +180,11 @@ pub async fn handle( "text/html; charset=utf-8", fallback::NO_SITE_HTML.as_bytes(), is_head, + &csp, ) .await?; metrics.add_request(); } - Resolved::Forbidden => { write_response( &mut stream, @@ -113,11 +193,11 @@ pub async fn handle( "text/plain", b"Forbidden", false, + &csp, ) .await?; metrics.add_error(); } - Resolved::DirectoryListing(dir_path) => { let html = build_directory_listing(&dir_path, &decoded); write_response( @@ -127,6 +207,7 @@ pub async fn handle( "text/html; charset=utf-8", html.as_bytes(), is_head, + &csp, ) .await?; metrics.add_request(); @@ -140,13 +221,16 @@ pub async fn handle( /// Open `abs_path`, send headers + streamed body (or headers only for HEAD). /// -/// Extracted from `handle()` to keep that function under the line-count lint -/// threshold. All logic is unchanged from the inline version. +/// # Errors +/// +/// Propagates I/O errors from opening the file, reading metadata, or writing +/// the response to the stream. async fn serve_file( stream: &mut TcpStream, abs_path: &std::path::Path, is_head: bool, metrics: &SharedMetrics, + csp: &str, ) -> Result<()> { // 1.7 — Stream the file instead of reading it entirely into memory. if let Ok(mut file) = tokio::fs::File::open(abs_path).await { @@ -161,6 +245,7 @@ async fn serve_file( "text/plain", b"Internal Server Error", false, + csp, ) .await?; metrics.add_error(); @@ -169,14 +254,23 @@ async fn serve_file( }; let ext = abs_path.extension().and_then(|e| e.to_str()).unwrap_or(""); let content_type = mime::for_extension(ext); - write_headers(stream, 200, "OK", content_type, file_len).await?; + write_headers(stream, 200, "OK", content_type, file_len, csp).await?; if !is_head { tokio::io::copy(&mut file, stream).await?; } stream.flush().await?; metrics.add_request(); } else { - write_response(stream, 404, "Not Found", "text/plain", b"Not Found", false).await?; + write_response( + stream, + 404, + "Not Found", + "text/plain", + b"Not Found", + false, + csp, + ) + .await?; metrics.add_error(); } Ok(()) @@ -186,12 +280,16 @@ async fn serve_file( /// Read HTTP request headers from a buffered stream, line by line. /// -/// Uses `read_line()` — a single system call per line regardless of how many -/// bytes arrive — instead of the previous byte-at-a-time loop that issued up -/// to 8 192 `read` syscalls per request (fix 2.1). -/// /// Stops at the blank line that terminates the HTTP header section /// (`\r\n` or bare `\n`). Enforces an 8 KiB total limit. +/// +/// # Errors +/// +/// - [`std::io::ErrorKind::InvalidData`] when the total header block exceeds +/// 8 KiB — the caller maps this to `400 Bad Request`. +/// - [`std::io::ErrorKind::Other`] when the connection closes before the +/// blank terminating line is received. +/// - Any underlying [`std::io::Error`] from the network layer. async fn read_request(reader: &mut BufReader) -> Result { let mut request = String::with_capacity(512); let mut total = 0usize; @@ -206,7 +304,14 @@ async fn read_request(reader: &mut BufReader) -> Result { } total = total.saturating_add(n); if total > 8_192 { - return Err(std::io::Error::other("Request header too large (> 8 KiB)").into()); + // Use InvalidData so the caller can distinguish "too large" from + // other I/O errors and respond with 400 rather than dropping the + // connection silently. + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + "Request header too large (> 8 KiB)", + ) + .into()); } request.push_str(&line); // Both `\r\n` (CRLF, RFC 7230 §3) and bare `\n` terminate the headers. @@ -219,8 +324,9 @@ async fn read_request(reader: &mut BufReader) -> Result { } /// Extract the method and URL path from `GET /path HTTP/1.1`. +/// /// Returns `(method, path)` or `None` if the request line is malformed or -/// the method is not GET/HEAD. +/// the method is not `GET`/`HEAD`. fn parse_path(request: &str) -> Option<(&str, &str)> { let first = request.lines().next()?; let mut it = first.splitn(3, ' '); @@ -236,14 +342,41 @@ fn parse_path(request: &str) -> Option<(&str, &str)> { // ─── Path resolution ───────────────────────────────────────────────────────── -enum Resolved { +/// Resolve `.` and `..` components in `path` lexically, without any +/// filesystem calls. The result is an absolute path with the same prefix +/// as `path` but with all `..` hops applied to the accumulated component stack. +/// +/// Unlike [`std::fs::canonicalize`] this works on paths whose final component +/// does not yet exist on disk, which is exactly what we need when checking +/// whether a requested-but-missing file would fall inside the site root. +fn normalize_path(path: &std::path::Path) -> std::path::PathBuf { + let mut stack: Vec> = Vec::new(); + for component in path.components() { + match component { + std::path::Component::ParentDir => { + // Only pop a normal component; never pop a root or prefix. + if matches!(stack.last(), Some(std::path::Component::Normal(_))) { + stack.pop(); + } + } + std::path::Component::CurDir => { /* skip — no-op */ } + c => stack.push(c), + } + } + stack.iter().collect() +} + +#[derive(Debug, PartialEq)] +pub(crate) enum Resolved { File(std::path::PathBuf), + NotFound, Fallback, Forbidden, DirectoryListing(std::path::PathBuf), } -fn resolve_path( +#[must_use] +pub(crate) fn resolve_path( canonical_root: &Path, url_path: &str, index_file: &str, @@ -268,7 +401,20 @@ fn resolve_path( }; let Ok(canonical) = target.canonicalize() else { - return Resolved::Fallback; + // Target does not exist on disk. Use lexical normalization (no syscalls) + // to determine whether the path would land inside the root: + // - Root itself missing → Fallback (no site configured) + // - Normalized path within root → NotFound (404) + // - Normalized path escapes root → Forbidden (403) + if !canonical_root.exists() { + return Resolved::Fallback; + } + let normalized = normalize_path(&target); + return if normalized.starts_with(canonical_root) { + Resolved::NotFound + } else { + Resolved::Forbidden + }; }; if !canonical.starts_with(canonical_root) { @@ -284,6 +430,10 @@ fn resolve_path( /// /// The `Content-Length` header always reflects the full body size, even when /// the body is suppressed, as required by RFC 7231 §4.3.2. +/// +/// # Errors +/// +/// Propagates any [`std::io::Error`] from writing to the stream. async fn write_response( stream: &mut TcpStream, status: u16, @@ -291,8 +441,14 @@ async fn write_response( content_type: &str, body: &[u8], suppress_body: bool, + csp: &str, ) -> Result<()> { - write_headers(stream, status, reason, content_type, body.len() as u64).await?; + // `usize as u64`: on all supported targets usize ≤ 64 bits, so this cast + // is always lossless. The allow suppresses clippy::cast_possible_truncation + // at the narrowest possible scope. + #[allow(clippy::cast_possible_truncation)] + let body_len: u64 = body.len() as u64; + write_headers(stream, status, reason, content_type, body_len, csp).await?; if !suppress_body { stream.write_all(body).await?; } @@ -300,20 +456,56 @@ async fn write_response( Ok(()) } -/// Write only the response status line and headers, followed by the blank line. -/// Used by the streaming file path (1.7) and internally by `write_response`. +/// Write only the response status line and all headers, followed by the blank +/// line separating headers from body. +/// +/// ## Security headers (task 5.3) +/// +/// The following headers are added to **every** response: +/// +/// | Header | Value | +/// |------------------------|--------------------------------------------| +/// | `X-Content-Type-Options` | `nosniff` | +/// | `X-Frame-Options` | `SAMEORIGIN` | +/// | `Referrer-Policy` | `no-referrer` | +/// | `Permissions-Policy` | `camera=(), microphone=(), geolocation=()` | +/// +/// For **HTML** responses (`content_type` starts with `"text/html"`), the +/// `Content-Security-Policy` header is also emitted using `csp` as the value. +/// +/// `Referrer-Policy: no-referrer` is especially important for the Tor hidden +/// service use case: without it, the `.onion` URL leaks in the `Referer` +/// header sent to any third-party resource (CDN, fonts, analytics) embedded +/// in a served HTML page. +/// +/// # Errors +/// +/// Propagates any [`std::io::Error`] from writing to the stream. async fn write_headers( stream: &mut TcpStream, status: u16, reason: &str, content_type: &str, content_length: u64, + csp: &str, ) -> Result<()> { + let is_html = content_type.starts_with("text/html"); + let csp_line = if is_html && !csp.is_empty() { + format!("Content-Security-Policy: {csp}\r\n") + } else { + String::new() + }; + let header = format!( "HTTP/1.1 {status} {reason}\r\n\ Content-Type: {content_type}\r\n\ Content-Length: {content_length}\r\n\ Connection: close\r\n\ + X-Content-Type-Options: nosniff\r\n\ + X-Frame-Options: SAMEORIGIN\r\n\ + Referrer-Policy: no-referrer\r\n\ + Permissions-Policy: camera=(), microphone=(), geolocation=()\r\n\ + {csp_line}\ \r\n" ); stream.write_all(header.as_bytes()).await?; @@ -387,6 +579,7 @@ fn html_escape(s: &str) -> String { } /// Percent-encode a filename component for safe use in a URL path segment. +/// /// Encodes all bytes that are not unreserved URI characters (RFC 3986). fn percent_encode_path(s: &str) -> String { let mut out = String::with_capacity(s.len()); @@ -394,7 +587,9 @@ fn percent_encode_path(s: &str) -> String { match byte { // Unreserved characters: ALPHA / DIGIT / "-" / "." / "_" / "~" b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'.' | b'_' | b'~' => { - out.push(byte as char); + // All matched bytes are ASCII; `char::from` is the + // clippy-pedantic-clean alternative to `byte as char`. + out.push(char::from(byte)); } b => { let _ = write!(out, "%{b:02X}"); @@ -410,19 +605,15 @@ fn percent_encode_path(s: &str) -> String { /// /// # Correctness (fix 4.5) /// -/// The previous implementation decoded each `%XX` pair as an independent -/// `char` cast from a `u8`, which produced two garbled characters for any -/// multi-byte UTF-8 sequence (e.g. `%C3%A9` yielded `é` instead of `é`). -/// -/// This version accumulates consecutive percent-decoded bytes into a buffer -/// and converts to UTF-8 via `String::from_utf8_lossy` only when a literal -/// character (or end-of-input) breaks the run. This correctly handles -/// multi-byte sequences split across adjacent `%XX` tokens and falls back -/// gracefully for invalid UTF-8. +/// Accumulates consecutive percent-decoded bytes into a buffer and converts to +/// UTF-8 via `String::from_utf8_lossy` only when a literal character (or +/// end-of-input) breaks the run. This correctly handles multi-byte sequences +/// split across adjacent `%XX` tokens (e.g. `%C3%A9` → `é`). /// /// Null bytes (`%00`) are never decoded — they are passed through as the /// literal string `%00` to prevent null-byte path injection attacks. -fn percent_decode(input: &str) -> String { +#[must_use] +pub(crate) fn percent_decode(input: &str) -> String { let mut output = String::with_capacity(input.len()); // Buffer for consecutive percent-decoded bytes that may form a multi-byte // UTF-8 character together. @@ -432,7 +623,6 @@ fn percent_decode(input: &str) -> String { let mut i = 0; while i < src.len() { - // Use .get() throughout to satisfy clippy::indexing_slicing. if src.get(i).copied() == Some(b'%') { let h1 = src.get(i.saturating_add(1)).copied().and_then(hex_digit); let h2 = src.get(i.saturating_add(2)).copied().and_then(hex_digit); @@ -447,9 +637,7 @@ fn percent_decode(input: &str) -> String { } i = i.saturating_add(3); } else { - // Incomplete or invalid %XX — pass through literal `%` and - // advance by 1 so the following characters are re-processed - // individually (preserving `%2` as `%2`, `%ZZ` as `%ZZ`). + // Incomplete or invalid %XX — pass through literal `%`. flush_byte_buf(&mut byte_buf, &mut output); output.push('%'); i = i.saturating_add(1); @@ -488,3 +676,138 @@ fn flush_byte_buf(buf: &mut Vec, out: &mut String) { buf.clear(); } } + +// ─── Unit tests ─────────────────────────────────────────────────────────────── + +#[cfg(test)] +mod tests { + // `expect()` in test helpers is idiomatic and intentional — a failure here + // means the test environment itself is broken, not the code under test. + #![allow(clippy::expect_used)] + + use std::path::Path; + + use super::{percent_decode, resolve_path, Resolved}; + + // ── percent_decode ──────────────────────────────────────────────────────── + + #[test] + fn percent_decode_ascii_passthrough() { + assert_eq!(percent_decode("/index.html"), "/index.html"); + } + + #[test] + fn percent_decode_space() { + assert_eq!(percent_decode("/file%20name.html"), "/file name.html"); + } + + #[test] + fn percent_decode_multibyte_utf8() { + // %C3%A9 is the UTF-8 encoding of 'é' (U+00E9). + // Regression test for fix 4.5: the old implementation decoded each + // %XX pair as an independent u8→char cast, yielding "é" instead of "é". + assert_eq!(percent_decode("/caf%C3%A9.html"), "/café.html"); + } + + #[test] + fn percent_decode_null_byte_not_decoded() { + // %00 must never be decoded to a null byte (path injection attack). + // The literal string "%00" must appear in the output unchanged. + let result = percent_decode("/foo%00/../secret"); + assert!( + !result.contains('\x00'), + "null byte found in decoded output: {result:?}" + ); + assert!( + result.contains("%00"), + "expected literal %00 in output, got: {result:?}" + ); + } + + #[test] + fn percent_decode_incomplete_percent_sequence() { + // "/foo%2" — the `%2` is not followed by a second hex digit, so the + // `%` is passed through literally and the `2` is re-processed. + assert_eq!(percent_decode("/foo%2"), "/foo%2"); + } + + #[test] + fn percent_decode_invalid_hex() { + // "%ZZ" contains non-hex digits after `%`; output must be unchanged. + assert_eq!(percent_decode("/foo%ZZ"), "/foo%ZZ"); + } + + // ── resolve_path ────────────────────────────────────────────────────────── + // + // All tests that exercise the file-system use a temporary directory so + // they are completely self-contained and leave no side effects. + + /// Returns a canonical temp dir with the structure: + /// ``` + /// / + /// root/ + /// index.html ← served for happy-path tests + /// secret.txt ← outside root, for traversal tests + /// ``` + fn make_test_tree() -> (tempfile::TempDir, std::path::PathBuf) { + let tmp = tempfile::tempdir().expect("tempdir"); + let root = tmp.path().join("root"); + std::fs::create_dir_all(&root).expect("create root"); + std::fs::write(root.join("index.html"), b"hello").expect("write index"); + std::fs::write(tmp.path().join("secret.txt"), b"secret").expect("write secret"); + let canonical_root = root.canonicalize().expect("canonicalize root"); + (tmp, canonical_root) + } + + #[test] + fn resolve_path_happy_path() { + let (_tmp, root) = make_test_tree(); + let result = resolve_path(&root, "/index.html", "index.html", false); + assert!( + matches!(result, Resolved::File(_)), + "expected Resolved::File, got {result:?}" + ); + } + + #[test] + fn resolve_path_directory_traversal() { + let (tmp, root) = make_test_tree(); + // secret.txt lives one level above `root`, so "/../secret.txt" would + // escape the root if the traversal check were absent. + // canonicalize() resolves `/../secret.txt` → `/secret.txt` + // which is a real file, but it does NOT start_with `root` → Forbidden. + let _ = tmp; // keep alive so secret.txt exists for canonicalize + let result = resolve_path(&root, "/../secret.txt", "index.html", false); + assert_eq!( + result, + Resolved::Forbidden, + "expected Resolved::Forbidden for traversal attempt" + ); + } + + #[test] + fn resolve_path_encoded_slash_traversal() { + // After percent-decoding, "/..%2Fsecret.txt" becomes "/../secret.txt" + // which is what is passed to resolve_path — same traversal as above. + let (tmp, root) = make_test_tree(); + let decoded = super::percent_decode("/../secret.txt"); // already decoded form + let _ = tmp; + let result = resolve_path(&root, &decoded, "index.html", false); + assert_eq!(result, Resolved::Forbidden); + } + + #[test] + fn resolve_path_missing_file_returns_not_found() { + let (_tmp, root) = make_test_tree(); + let result = resolve_path(&root, "/does_not_exist.txt", "index.html", false); + assert_eq!(result, Resolved::NotFound); + } + + #[test] + fn resolve_path_missing_root_returns_fallback() { + // Passing a non-existent root means every canonicalize() call fails. + let missing_root = Path::new("/nonexistent/root/that/does/not/exist"); + let result = resolve_path(missing_root, "/index.html", "index.html", false); + assert_eq!(result, Resolved::Fallback); + } +} diff --git a/src/server/mime.rs b/src/server/mime.rs index d9f39be..2a9c12b 100644 --- a/src/server/mime.rs +++ b/src/server/mime.rs @@ -10,9 +10,11 @@ /// /// # Examples /// ``` +/// use rusthost::server::mime; /// assert_eq!(mime::for_extension("html"), "text/html; charset=utf-8"); /// assert_eq!(mime::for_extension("xyz"), "application/octet-stream"); /// ``` +#[must_use] pub fn for_extension(ext: &str) -> &'static str { // 3.4 — Normalise to lowercase in a fixed stack buffer to avoid a heap // allocation on every served file request. Extensions longer than 16 bytes diff --git a/src/server/mod.rs b/src/server/mod.rs index 24f56ef..5400ef8 100644 --- a/src/server/mod.rs +++ b/src/server/mod.rs @@ -40,6 +40,16 @@ use crate::{ /// Binds the port (with optional fallback), updates `SharedState.actual_port`, /// sends the bound port through `port_tx` so Tor can start without a sleep, /// then accepts connections until the shutdown watch fires. +/// +/// ## Accept-loop observability (task 5.4) +/// +/// Accept errors use exponential backoff (1 ms → 1 s) to prevent log storms +/// under persistent failures such as `EMFILE`. Error severity is split: +/// +/// - **`EMFILE` / `ENFILE`** (file-descriptor exhaustion) → logged at `error`; +/// these require operator intervention. +/// - **Transient errors** (`ECONNRESET`, `ECONNABORTED`, etc.) → logged at +/// `debug`; they are expected under normal traffic and resolve automatically. pub async fn run( config: Arc, state: SharedState, @@ -52,6 +62,9 @@ pub async fn run( // 4.2 — config.server.port is NonZeroU16; .get() produces the u16 value. let base_port = config.server.port.get(); let fallback = config.server.auto_port_fallback; + // `u32 as usize`: usize ≥ 32 bits on every target Rust supports, so this + // conversion is lossless. The allow suppresses clippy::cast_possible_truncation. + #[allow(clippy::cast_possible_truncation)] let max_conns = config.server.max_connections as usize; let (listener, bound_port) = match bind_with_fallback(bind_addr, base_port, fallback) { @@ -75,7 +88,6 @@ pub async fn run( } // Signal the bound port to lifecycle so Tor can start immediately. - // If the receiver has already gone away, we continue serving anyway. let _ = port_tx.send(bound_port); log::info!("HTTP server listening on {bind_addr}:{bound_port}"); @@ -94,20 +106,30 @@ pub async fn run( return; } }; - // 3.2 — Arc: per-connection clone is an atomic refcount bump, not a - // String heap allocation. + // 3.2 — Arc: per-connection clone is an atomic refcount bump. let index_file: Arc = Arc::from(config.site.index_file.as_str()); + // 5.3 — Content-Security-Policy forwarded to every handler so it can be + // emitted on HTML responses without a global static. + let csp_header: Arc = Arc::from(config.server.content_security_policy.as_str()); let dir_list = config.site.enable_directory_listing; let semaphore = Arc::new(Semaphore::new(max_conns)); // 2.10 — JoinSet tracks in-flight handler tasks so shutdown can drain them. let mut join_set: JoinSet<()> = JoinSet::new(); + // 5.4 — Exponential backoff on accept errors. + // Starts at 1 ms, doubles on each consecutive error, caps at 1 s. + // Reset to 1 ms after the next successful accept. + let mut backoff_ms: u64 = 1; + loop { tokio::select! { result = listener.accept() => { match result { Ok((stream, peer)) => { + // 5.4 — reset backoff after a successful accept. + backoff_ms = 1; + log::debug!("Connection from {peer}"); let Ok(permit) = Arc::clone(&semaphore).acquire_owned().await else { break; // semaphore closed — shutting down @@ -121,16 +143,34 @@ pub async fn run( let site = Arc::clone(&canonical_root); let idx = Arc::clone(&index_file); let met = Arc::clone(&metrics); + let csp = Arc::clone(&csp_header); join_set.spawn(async move { let _permit = permit; if let Err(e) = handler::handle( - stream, site, idx, dir_list, met + stream, site, idx, dir_list, met, csp, ).await { log::debug!("Handler error: {e}"); } }); } - Err(e) => log::warn!("Accept error: {e}"), + Err(e) => { + // 5.4 — differentiate error severity. + if is_fd_exhaustion(&e) { + log::error!( + "Accept error — file-descriptor limit reached \ + (EMFILE/ENFILE): {e}. Reduce max_connections or \ + raise the OS ulimit." + ); + } else { + log::debug!("Accept error (transient): {e}"); + } + + // 5.4 — exponential backoff: prevents log storms under + // persistent errors such as EMFILE (thousands of errors + // per second in a tight loop become at most one per 1 s). + tokio::time::sleep(Duration::from_millis(backoff_ms)).await; + backoff_ms = backoff_ms.saturating_mul(2).min(1_000); + } } } @@ -143,8 +183,7 @@ pub async fn run( state.write().await.server_running = false; log::info!("HTTP server stopped accepting; draining in-flight connections…"); - // 2.10 — wait up to 5 seconds for in-flight handlers to complete so - // responses that were already being written are not truncated mid-stream. + // 2.10 — wait up to 5 seconds for in-flight handlers to complete. let drain = async { while join_set.join_next().await.is_some() {} }; let _ = tokio::time::timeout(Duration::from_secs(5), drain).await; log::info!("HTTP server drained."); @@ -171,8 +210,6 @@ fn bind_with_fallback(addr: IpAddr, port: u16, fallback: bool) -> Result<(TcpLis // Try the next port. } Err(source) => { - // 4.1 — use the structured ServerBind variant so callers can - // match on the port number and source error separately. return Err(AppError::ServerBind { port: try_port, source, @@ -194,6 +231,27 @@ fn bind_with_fallback(addr: IpAddr, port: u16, fallback: bool) -> Result<(TcpLis }) } +/// Return `true` when `e` represents file-descriptor exhaustion (`EMFILE` or +/// `ENFILE`) on Unix platforms. +/// +/// On non-Unix targets (Windows) where these error codes have no equivalent, +/// always returns `false`. +fn is_fd_exhaustion(e: &std::io::Error) -> bool { + #[cfg(unix)] + { + // EMFILE (24): too many open files for the process. + // ENFILE (23): too many open files system-wide. + // Both values are specified by POSIX and identical on Linux, macOS, + // FreeBSD, and other POSIX-conformant systems. + matches!(e.raw_os_error(), Some(libc::EMFILE | libc::ENFILE)) + } + #[cfg(not(unix))] + { + let _ = e; + false + } +} + // ─── Site scanner ───────────────────────────────────────────────────────────── /// Recursively count files and total bytes in `site_root` (BFS traversal). @@ -201,8 +259,16 @@ fn bind_with_fallback(addr: IpAddr, port: u16, fallback: bool) -> Result<(TcpLis /// Returns `Err` if any `read_dir` call fails so callers can log a warning /// instead of silently reporting zeros. /// -/// **Must be called from a blocking context** (e.g. `tokio::task::spawn_blocking`) -/// because `std::fs::read_dir` is a blocking syscall. +/// # Errors +/// +/// Returns [`AppError::Io`] if any directory in the tree cannot be read. +/// +/// # Panics +/// +/// Does not panic. **Must be called from a blocking context** (e.g. +/// `tokio::task::spawn_blocking`) because `std::fs::read_dir` is a blocking +/// syscall. +#[must_use = "the file count and byte total are used to populate the dashboard"] pub fn scan_site(site_root: &Path) -> crate::Result<(u32, u64)> { let mut count = 0u32; let mut bytes = 0u64; @@ -212,7 +278,6 @@ pub fn scan_site(site_root: &Path) -> crate::Result<(u32, u64)> { while let Some(dir) = queue.pop_front() { let entries = std::fs::read_dir(&dir).map_err(|e| { - // Preserve path context in the error while mapping to AppError::Io. AppError::Io(std::io::Error::new( e.kind(), format!("Cannot read directory {}: {e}", dir.display()), diff --git a/src/tor/mod.rs b/src/tor/mod.rs index befa6b7..ce13e81 100644 --- a/src/tor/mod.rs +++ b/src/tor/mod.rs @@ -238,9 +238,14 @@ async fn proxy_stream( /// Encode an `HsId` (ed25519 public key) as a v3 `.onion` domain name. /// -/// `arti-client 0.40` exposes `HsId` via `DisplayRedacted` (from the `safelog` -/// crate) rather than `std::fmt::Display`, so we cannot use `format!("{}", …)` -/// directly. We implement the encoding ourselves using the spec: +/// Delegates to [`onion_address_from_pubkey`] which is separately unit-tested. +fn hsid_to_onion_address(hsid: HsId) -> String { + onion_address_from_pubkey(hsid.as_ref()) +} + +/// Encode a raw 32-byte ed25519 public key as a v3 `.onion` domain name. +/// +/// Implements the encoding defined in the Tor Rendezvous Specification: /// /// ```text /// onion_address = base32(PUBKEY | CHECKSUM | VERSION) + ".onion" @@ -248,11 +253,15 @@ async fn proxy_stream( /// VERSION = 0x03 /// ``` /// -/// `HsId: AsRef<[u8; 32]>` is stable across arti 0.40+. -fn hsid_to_onion_address(hsid: HsId) -> String { +/// The output is always exactly 62 characters: 56 lowercase base32 characters +/// followed by `".onion"`. +/// +/// Separated from [`hsid_to_onion_address`] so that tests can supply an +/// arbitrary 32-byte key without constructing an `HsId`. +#[must_use] +pub(crate) fn onion_address_from_pubkey(pubkey: &[u8; 32]) -> String { use sha3::{Digest, Sha3_256}; - let pubkey: &[u8; 32] = hsid.as_ref(); let version: u8 = 3; // CHECKSUM = SHA3-256(".onion checksum" || PUBKEY || VERSION) truncated to 2 bytes @@ -267,14 +276,13 @@ fn hsid_to_onion_address(hsid: HsId) -> String { address_bytes[..32].copy_from_slice(pubkey); // Consume the first two checksum bytes via an iterator — clippy cannot // prove at compile time that a GenericArray has >= 2 elements, so direct - // indexing (hash[0], hash[1]) triggers `indexing_slicing`. SHA3-256 - // always produces 32 bytes, so next() will never return None here. + // indexing triggers `indexing_slicing`. SHA3-256 always produces 32 bytes. let mut hash_iter = hash.iter().copied(); address_bytes[32] = hash_iter.next().unwrap_or(0); address_bytes[33] = hash_iter.next().unwrap_or(0); address_bytes[34] = version; - // RFC 4648 base32, no padding, lowercase → 56 characters + // RFC 4648 base32, no padding, lowercase → 56 characters let encoded = data_encoding::BASE32_NOPAD .encode(&address_bytes) .to_ascii_lowercase(); @@ -283,6 +291,9 @@ fn hsid_to_onion_address(hsid: HsId) -> String { } // ─── State helpers ──────────────────────────────────────────────────────────── +// +// These must appear BEFORE the #[cfg(test)] module; items after a test module +// trigger the `clippy::items_after_test_module` lint. async fn set_status(state: &SharedState, status: TorStatus) { state.write().await.tor_status = status; @@ -293,3 +304,83 @@ async fn set_onion(state: &SharedState, addr: String) { s.tor_status = TorStatus::Ready; s.onion_address = Some(addr); } + +// ─── Unit tests ─────────────────────────────────────────────────────────────── + +#[cfg(test)] +mod tests { + use super::onion_address_from_pubkey; + + /// Compute the expected onion address for a given 32-byte key using the + /// same algorithm as `onion_address_from_pubkey`, acting as an independent + /// reference implementation to cross-check the production code. + fn reference_onion(pubkey: &[u8; 32]) -> String { + use data_encoding::BASE32_NOPAD; + use sha3::{Digest, Sha3_256}; + + let version: u8 = 3; + let mut hasher = Sha3_256::new(); + hasher.update(b".onion checksum"); + hasher.update(pubkey); + hasher.update([version]); + let hash = hasher.finalize(); + + let mut bytes = [0u8; 35]; + bytes[..32].copy_from_slice(pubkey); + // Use iterator instead of direct indexing to avoid clippy::indexing_slicing. + // SHA3-256 always produces 32 bytes, so next() will never return None. + let mut it = hash.iter().copied(); + bytes[32] = it.next().unwrap_or(0); + bytes[33] = it.next().unwrap_or(0); + bytes[34] = version; + + format!("{}.onion", BASE32_NOPAD.encode(&bytes).to_ascii_lowercase()) + } + + #[test] + fn hsid_to_onion_address_all_zeros_vector() { + // Fixed 32-byte test vector: all zeros. + // The expected value is derived from the reference implementation above. + let pubkey = [0u8; 32]; + let expected = reference_onion(&pubkey); + let actual = onion_address_from_pubkey(&pubkey); + assert_eq!(actual, expected); + } + + #[test] + fn hsid_to_onion_address_format_is_correct() { + let pubkey = [0u8; 32]; + let addr = onion_address_from_pubkey(&pubkey); + // A v3 onion address is always 56 base32 chars + ".onion" = 62 chars. + assert_eq!(addr.len(), 62, "unexpected length: {addr:?}"); + // Use strip_suffix to avoid clippy::case_sensitive_file_extension_comparison. + assert!( + addr.strip_suffix(".onion").is_some(), + "must end with .onion: {addr:?}" + ); + let host = addr.strip_suffix(".onion").unwrap_or(&addr); + assert!( + host.chars() + .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit()), + "host contains non-base32 characters: {host:?}" + ); + } + + #[test] + fn hsid_to_onion_address_is_deterministic() { + // Calling the function twice with the same key must produce the same + // output — the address must be derivable from the public key alone. + let pubkey = [0x42u8; 32]; + assert_eq!( + onion_address_from_pubkey(&pubkey), + onion_address_from_pubkey(&pubkey) + ); + } + + #[test] + fn hsid_to_onion_address_different_keys_produce_different_addresses() { + let a = onion_address_from_pubkey(&[0u8; 32]); + let b = onion_address_from_pubkey(&[1u8; 32]); + assert_ne!(a, b, "different keys must produce different addresses"); + } +} diff --git a/tests/http_integration.rs b/tests/http_integration.rs new file mode 100644 index 0000000..1e1c4fd --- /dev/null +++ b/tests/http_integration.rs @@ -0,0 +1,406 @@ +//! # HTTP Server Integration Tests (task 5.2) +//! +//! Each test spins up an isolated [`rusthost::server::run`] instance, connects +//! to it via [`tokio::net::TcpStream`], sends raw HTTP/1.1, and inspects the +//! raw response bytes. +//! +//! ## Port allocation +//! +//! Each test calls [`free_port()`] which binds a `StdTcpListener` on +//! `127.0.0.1:0`, reads the OS-assigned port, and immediately closes the +//! listener. That port is then passed to the test config with +//! `auto_port_fallback = false`, so the server binds the same (now-free) port. +//! The TOCTOU window between release and server bind is acceptable on the +//! loopback interface and eliminates the port-collision risk that would arise +//! from having every test start from port 8080. + +use std::{net::SocketAddr, path::Path, sync::Arc, time::Duration}; + +use tokio::{ + io::{AsyncReadExt, AsyncWriteExt}, + net::TcpStream, + sync::{watch, RwLock}, +}; + +use rusthost::{ + config::Config, + runtime::state::{AppState, Metrics}, +}; + +// ─── Port helper ────────────────────────────────────────────────────────────── + +/// Ask the OS for a free port by binding on `0`, then release it. +/// +/// Returns the port number for immediate use as the test server's bind port. +/// `auto_port_fallback` is set to `false` in the test config so the server +/// always binds exactly this port rather than searching a range. +fn free_port() -> Result { + use std::net::TcpListener; + let listener = TcpListener::bind("127.0.0.1:0")?; + Ok(listener.local_addr()?.port()) + // listener is dropped here, releasing the port +} + +// ─── Test harness ───────────────────────────────────────────────────────────── + +/// A live server instance scoped to one test. +struct TestServer { + addr: SocketAddr, + shutdown_tx: watch::Sender, + handle: Option>, +} + +impl TestServer { + /// Spin up a server bound to the port returned by [`free_port()`]. + /// + /// `site_root` must already contain the files the test expects to serve. + async fn start(site_root: &Path) -> Result> { + let port = free_port()?; + let config = Arc::new(build_test_config(site_root, port)); + let state = Arc::new(RwLock::new(AppState::new())); + let metrics = Arc::new(Metrics::new()); + let (shutdown_tx, shutdown_rx) = watch::channel(false); + let (port_tx, port_rx) = tokio::sync::oneshot::channel::(); + + // The server joins data_dir + config.site.directory to find files. + // `site_root` is `/site`; `data_dir` must therefore be ``. + let data_dir = site_root.parent().unwrap_or(site_root).to_path_buf(); + + let handle = { + let cfg = Arc::clone(&config); + let st = Arc::clone(&state); + let met = Arc::clone(&metrics); + let shut = shutdown_rx; + tokio::spawn(async move { + rusthost::server::run(cfg, st, met, data_dir, shut, port_tx).await; + }) + }; + + // Wait for the server to confirm its bound port (5 s guard). + let bound_port = tokio::time::timeout(Duration::from_secs(5), port_rx).await??; + + let addr: SocketAddr = format!("127.0.0.1:{bound_port}").parse()?; + + Ok(Self { + addr, + shutdown_tx, + handle: Some(handle), + }) + } + + /// Send raw `request` bytes and return the complete response as a `String`. + /// + /// A 5-second read deadline prevents a misbehaving server from hanging the + /// test suite indefinitely. + async fn send(&self, request: &[u8]) -> Result> { + let mut stream = TcpStream::connect(self.addr).await?; + stream.write_all(request).await?; + + let mut response = Vec::new(); + tokio::time::timeout(Duration::from_secs(5), async { + let mut buf = [0u8; 4096]; + loop { + let n = stream.read(&mut buf).await?; + if n == 0 { + break; + } + let slice = buf + .get(..n) + .ok_or_else(|| std::io::Error::other("read returned out-of-bounds length"))?; + response.extend_from_slice(slice); + } + Ok::<_, std::io::Error>(()) + }) + .await??; + + Ok(String::from_utf8_lossy(&response).into_owned()) + } + + /// Gracefully shut the server down and await task exit. + async fn stop(mut self) { + let _ = self.shutdown_tx.send(true); + if let Some(handle) = self.handle.take() { + tokio::time::timeout(Duration::from_secs(5), handle) + .await + .ok(); + } + } +} + +impl Drop for TestServer { + fn drop(&mut self) { + // Best-effort signal if the test panics before calling `stop()`. + let _ = self.shutdown_tx.send(true); + } +} + +// ─── Config + fixture helpers ───────────────────────────────────────────────── + +/// Build a minimal [`Config`] whose site directory matches `site_root`. +fn build_test_config(site_root: &Path, port: u16) -> Config { + use std::num::NonZeroU16; + + let mut config = Config::default(); + config.server.port = NonZeroU16::new(port).unwrap_or(NonZeroU16::MIN); + // auto_port_fallback = false: the server must bind exactly `port`. + config.server.auto_port_fallback = false; + config.server.open_browser_on_start = false; + config.server.max_connections = 16; + // Use the directory basename; server joins data_dir + this name. + config.site.directory = String::from( + site_root + .file_name() + .and_then(|n| n.to_str()) + .unwrap_or("site"), + ); + config.site.index_file = "index.html".into(); + config.site.enable_directory_listing = false; + config.tor.enabled = false; + config.console.interactive = false; + config +} + +/// Create a temporary `/site/` tree. +/// +/// Returns `(TempDir, site_path)`. The caller must keep `TempDir` alive for +/// the duration of the test. +fn make_site( + files: &[(&str, &[u8])], +) -> Result<(tempfile::TempDir, std::path::PathBuf), Box> { + let tmp = tempfile::tempdir()?; + let site = tmp.path().join("site"); + std::fs::create_dir_all(&site)?; + for (name, content) in files { + std::fs::write(site.join(name), content)?; + } + Ok((tmp, site)) +} + +// ─── Response assertion helpers ─────────────────────────────────────────────── + +/// Extract the numeric HTTP status code from the response status line. +fn status_code(response: &str) -> Option { + response.split_whitespace().nth(1)?.parse().ok() +} + +/// `true` when there are no bytes after the `\r\n\r\n` header terminator. +fn body_is_empty(response: &str) -> bool { + response + .find("\r\n\r\n") + .is_none_or(|sep| response.len() == sep.saturating_add(4)) +} + +/// `true` when the named header appears in the response (case-insensitive). +fn has_header(response: &str, name: &str) -> bool { + let name_lc = name.to_ascii_lowercase(); + response + .lines() + .skip(1) // skip status line + .any(|l| l.to_ascii_lowercase().starts_with(&name_lc)) +} + +// ─── Core HTTP flow tests (task 5.2) ───────────────────────────────────────── + +#[tokio::test] +async fn get_index_html_returns_200() -> Result<(), Box> { + let (tmp, site) = make_site(&[("index.html", b"

    hello

    ")])?; + let server = TestServer::start(&site).await?; + + let response = server + .send(b"GET /index.html HTTP/1.1\r\nHost: localhost\r\n\r\n") + .await?; + server.stop().await; + drop(tmp); + + assert_eq!( + status_code(&response), + Some(200), + "GET /index.html must return 200:\n{response}" + ); + Ok(()) +} + +#[tokio::test] +async fn head_request_returns_headers_no_body() -> Result<(), Box> { + let (tmp, site) = make_site(&[("index.html", b"

    hello

    ")])?; + let server = TestServer::start(&site).await?; + + let response = server + .send(b"HEAD /index.html HTTP/1.1\r\nHost: localhost\r\n\r\n") + .await?; + server.stop().await; + drop(tmp); + + assert_eq!( + status_code(&response), + Some(200), + "HEAD must return 200:\n{response}" + ); + assert!( + has_header(&response, "content-length"), + "HEAD must include Content-Length:\n{response}" + ); + assert!( + body_is_empty(&response), + "HEAD must not include a body:\n{response}" + ); + Ok(()) +} + +#[tokio::test] +async fn get_root_with_index_file_serves_200() -> Result<(), Box> { + let (tmp, site) = make_site(&[("index.html", b"

    root

    ")])?; + let server = TestServer::start(&site).await?; + + let response = server + .send(b"GET / HTTP/1.1\r\nHost: localhost\r\n\r\n") + .await?; + server.stop().await; + drop(tmp); + + assert_eq!( + status_code(&response), + Some(200), + "GET / must serve index.html (200):\n{response}" + ); + Ok(()) +} + +#[tokio::test] +async fn directory_traversal_returns_403() -> Result<(), Box> { + let (tmp, site) = make_site(&[("index.html", b"safe")])?; + let server = TestServer::start(&site).await?; + + let response = server + .send(b"GET /../etc/passwd HTTP/1.1\r\nHost: localhost\r\n\r\n") + .await?; + server.stop().await; + drop(tmp); + + assert_eq!( + status_code(&response), + Some(403), + "traversal must return 403:\n{response}" + ); + Ok(()) +} + +#[tokio::test] +async fn oversized_request_header_returns_400() -> Result<(), Box> { + let (tmp, site) = make_site(&[("index.html", b"ok")])?; + let server = TestServer::start(&site).await?; + + // Build headers that exceed the 8 KiB limit enforced by `read_request`. + let padding = format!("X-Padding: {}\r\n", "A".repeat(8_300)); + let request = format!("GET / HTTP/1.1\r\nHost: localhost\r\n{padding}\r\n"); + + let response = server.send(request.as_bytes()).await?; + server.stop().await; + drop(tmp); + + assert_eq!( + status_code(&response), + Some(400), + "oversized headers must return 400:\n{response}" + ); + Ok(()) +} + +#[tokio::test] +async fn get_nonexistent_file_returns_404() -> Result<(), Box> { + let (tmp, site) = make_site(&[("index.html", b"ok")])?; + let server = TestServer::start(&site).await?; + + let response = server + .send(b"GET /nonexistent.txt HTTP/1.1\r\nHost: localhost\r\n\r\n") + .await?; + server.stop().await; + drop(tmp); + + assert_eq!( + status_code(&response), + Some(404), + "missing file must return 404:\n{response}" + ); + Ok(()) +} + +#[tokio::test] +async fn post_request_returns_400() -> Result<(), Box> { + let (tmp, site) = make_site(&[("index.html", b"ok")])?; + let server = TestServer::start(&site).await?; + + let response = server + .send(b"POST /index.html HTTP/1.1\r\nHost: localhost\r\nContent-Length: 0\r\n\r\n") + .await?; + server.stop().await; + drop(tmp); + + assert_eq!( + status_code(&response), + Some(400), + "POST must be rejected with 400:\n{response}" + ); + Ok(()) +} + +// ─── Security header tests (task 5.3 — integration verification) ───────────── + +#[tokio::test] +async fn all_security_headers_present_on_html_response() -> Result<(), Box> { + let (tmp, site) = make_site(&[("index.html", b"

    ok

    ")])?; + let server = TestServer::start(&site).await?; + + let response = server + .send(b"GET /index.html HTTP/1.1\r\nHost: localhost\r\n\r\n") + .await?; + server.stop().await; + drop(tmp); + + // All five headers must be present on an HTML response. + for header in &[ + "x-content-type-options", + "x-frame-options", + "referrer-policy", + "permissions-policy", + "content-security-policy", + ] { + assert!( + has_header(&response, header), + "missing security header '{header}' on HTML:\n{response}" + ); + } + Ok(()) +} + +#[tokio::test] +async fn csp_absent_and_base_headers_present_on_non_html_response( +) -> Result<(), Box> { + let (tmp, site) = make_site(&[("style.css", b"body{color:red}")])?; + let server = TestServer::start(&site).await?; + + let response = server + .send(b"GET /style.css HTTP/1.1\r\nHost: localhost\r\n\r\n") + .await?; + server.stop().await; + drop(tmp); + + // The four universal headers must be present on all response types. + for header in &[ + "x-content-type-options", + "x-frame-options", + "referrer-policy", + "permissions-policy", + ] { + assert!( + has_header(&response, header), + "missing security header '{header}' on CSS:\n{response}" + ); + } + // Content-Security-Policy must NOT appear on non-HTML responses. + assert!( + !has_header(&response, "content-security-policy"), + "CSP must not appear on CSS responses:\n{response}" + ); + Ok(()) +} From afff88339bf316722b1468d0a97874102e6952a8 Mon Sep 17 00:00:00 2001 From: csd113 Date: Sat, 21 Mar 2026 10:38:08 -0700 Subject: [PATCH 4/4] CSP was too agressive, now fixed and modifyable in the settings file --- src/.DS_Store | Bin 8196 -> 0 bytes src/config/defaults.rs | 12 +++++++----- src/config/loader.rs | 2 +- src/config/mod.rs | 6 +++--- src/server/handler.rs | 25 +++++++++++++++++++++++++ 5 files changed, 36 insertions(+), 9 deletions(-) delete mode 100644 src/.DS_Store diff --git a/src/.DS_Store b/src/.DS_Store deleted file mode 100644 index 406570781817405839d86fabdee13f9f6cb84885..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 8196 zcmeHM%}*0S6n|5Ywj#28DQHYK^kM=*P%s)}s0E{qF%V0z2w1l5v~JkWG`rg(5R#tt zq<6i0^Y^5J!;>DOcLuP(&XWo0e^V`0ec?$rLL?u51FbV(> ztOAW*Y<^M1xTtL<6!i2V60`@fp_p`dF6;1Yo3`%I3}^;41DXNNfM(!dU;y`QQ6fj| z`)X9TngPwgf60JYA55$Q;{nzb%B2GvnF1hoW4A0QV;&%F%mCv7))Y!p^r^B35}1-8 z#URQY$4#M*7!Rboq`xJfs=yeOA%qy(juk_8S9(Lj%JDBO@cHPLGbAIXjjyhO$;}yXcCiUEmIP6HbM$7hOA_ zDqA_571KU_?5FHKnlT1jRPWLJ!^5tX-7@M7A9hH0w_Hk@^tV(fc+^if$ZHv+-^;n& zahADf`!;858y<3<^l2%{3LNiPMV^<+V#r&iu7}uWliqZ!EOnMxg8M!%ty!*Xu_8_E zrtKVO>dE`1TfAI$sc;#a|Ek!G#qZRG^GYxvrUX1;4%ejsN$j1qCFSS}O!o(osWP;3+>tvqXCQ0&u_#8nPL^%s3k zUDsRUq15!Et0|W`oE5QO*o1>1@Guk{pbej~L>Ror+8735)ZtLXKN|z5@uL@Nlkwkc zcSzW@aG(RHa|5U4Rkwvs2`m{1Q{aNv96IEATQIm#!JzTcqSP8_A#tp04748bfTGu` zXq!EUM94SlcA-z+swH=@mkm1@8tEb#=u$k4iv0iP(ck|yTbcpQKnF8Gx))Q6;-FtY zJGIVLEXpR blocks, and style= attributes. +# Tighten if your site uses no inline code: +# content_security_policy = "default-src 'self'" +# Relax further for third-party CDN resources: +# content_security_policy = "default-src 'self' cdn.example.com; script-src 'self' 'unsafe-inline'" +content_security_policy = "default-src 'self'; script-src 'self' 'unsafe-inline'; style-src 'self' 'unsafe-inline'" # ─── [site] ─────────────────────────────────────────────────────────────────── diff --git a/src/config/loader.rs b/src/config/loader.rs index 7594840..c32aca6 100644 --- a/src/config/loader.rs +++ b/src/config/loader.rs @@ -207,7 +207,7 @@ bind = "127.0.0.1" auto_port_fallback = true open_browser_on_start = false max_connections = 256 -content_security_policy = "default-src 'self'" +content_security_policy = "default-src 'self'; script-src 'self' 'unsafe-inline'; style-src 'self' 'unsafe-inline'" {extra} [site] diff --git a/src/config/mod.rs b/src/config/mod.rs index a82a384..ad6dc3c 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -91,7 +91,7 @@ pub struct ServerConfig { pub max_connections: u32, /// Value of the `Content-Security-Policy` header sent with every HTML - /// response (task 5.3). The default `"default-src 'self'"` restricts all + /// response (task 5.3). The default `"default-src 'self'; script-src 'self' 'unsafe-inline'; style-src 'self' 'unsafe-inline'"` restricts all /// content to the same origin. /// /// Operators serving CDN fonts, analytics scripts, or other third-party @@ -99,7 +99,7 @@ pub struct ServerConfig { /// /// ```toml /// [server] - /// content_security_policy = "default-src 'self'; script-src 'self' cdn.example.com" + /// content_security_policy = "default-src 'self'; script-src 'self' 'unsafe-inline'; style-src 'self' 'unsafe-inline'; script-src 'self' cdn.example.com" /// ``` /// /// **Tor note:** `Referrer-Policy: no-referrer` is always sent regardless @@ -183,7 +183,7 @@ impl Default for Config { auto_port_fallback: true, open_browser_on_start: false, max_connections: 256, - content_security_policy: "default-src 'self'".into(), + content_security_policy: "default-src 'self'; script-src 'self' 'unsafe-inline'; style-src 'self' 'unsafe-inline'".into(), }, site: SiteConfig { directory: "site".into(), diff --git a/src/server/handler.rs b/src/server/handler.rs index 8affdb0..400dda8 100644 --- a/src/server/handler.rs +++ b/src/server/handler.rs @@ -172,6 +172,23 @@ pub async fn handle( .await?; metrics.add_error(); } + Resolved::Redirect(location) => { + let body = format!("Redirecting to {location}"); + let mut hdr = String::new(); + let _ = std::fmt::write( + &mut hdr, + format_args!( + "HTTP/1.1 301 Moved Permanently\r\n Location: {location}\r\n Content-Type: text/plain\r\n Content-Length: {len}\r\n Connection: close\r\n \r\n", + len = body.len() + ), + ); + stream.write_all(hdr.as_bytes()).await?; + if !is_head { + stream.write_all(body.as_bytes()).await?; + } + stream.flush().await?; + metrics.add_request(); + } Resolved::Fallback => { write_response( &mut stream, @@ -373,6 +390,8 @@ pub(crate) enum Resolved { Fallback, Forbidden, DirectoryListing(std::path::PathBuf), + /// 301 redirect to the given Location URL (used to append a trailing slash). + Redirect(String), } #[must_use] @@ -388,6 +407,12 @@ pub(crate) fn resolve_path( let candidate = canonical_root.join(relative); let target = if candidate.is_dir() { + // If the URL has no trailing slash, redirect so that relative links + // in the served index file resolve against the correct base URL. + if !url_path.ends_with('/') { + let redirect_to = format!("{url_path}/"); + return Resolved::Redirect(redirect_to); + } let idx = candidate.join(index_file); if idx.exists() { idx