diff --git a/audit.toml b/audit.toml index 373e1e0..5554569 100644 --- a/audit.toml +++ b/audit.toml @@ -1,3 +1,29 @@ -# audit.toml +# cargo-audit configuration for rusthost +# +# fix G-3 — previously this file contained a bare `ignore` entry with no +# rationale, creating a silent suppression that future developers could not +# evaluate. Rationale is now documented here to match deny.toml. +# +# Standardising on `cargo deny check advisories` as the primary advisory gate +# is recommended; this file is kept for developers who run `cargo audit` +# directly. Both files must be kept in sync when advisories are added or +# the threat model changes (e.g. if RSA decryption is ever added to the code). + [advisories] -ignore = ["RUSTSEC-2023-0071"] \ No newline at end of file +ignore = [ + # rsa 0.9.x — Marvin attack: timing side-channel on DECRYPTION only. + # (RUSTSEC-2023-0071, https://rustsec.org/advisories/RUSTSEC-2023-0071) + # + # `rsa` is pulled in transitively by `arti-client` for X.509 certificate + # parsing in Tor directory consensus documents. It is used exclusively + # for RSA *signature verification*, never for decryption. The Marvin + # attack requires an adversary to make thousands of adaptive + # chosen-ciphertext decryption queries — a threat model that does not + # apply here. + # + # No patched version of `rsa` exists as of this writing. + # Revisit when arti upgrades past rsa 0.9.x or a fixed version ships. + # If RSA decryption is ever added to this codebase, remove this ignore + # immediately and treat the advisory as exploitable. + "RUSTSEC-2023-0071", +] diff --git a/src/config/defaults.rs b/src/config/defaults.rs index 5d8aa50..e6dd0de 100644 --- a/src/config/defaults.rs +++ b/src/config/defaults.rs @@ -31,14 +31,29 @@ open_browser_on_start = false # at the OS TCP backlog level rather than spawning unbounded tasks. max_connections = 256 -# Content-Security-Policy value sent with every HTML response. -# The default allows same-origin resources plus inline scripts and styles, -# which is required for onclick handlers,
-

No site found

-

- RustHost is running, but there are no files to serve yet. -

-

- Drop your HTML, CSS, and assets into - ./data/site/, then press - R in the RustHost dashboard to reload. -

-

RustHost — single-binary hosting appliance

+

No content available

+

This server is running but no content has been configured yet.

diff --git a/src/server/handler.rs b/src/server/handler.rs index fd555a3..abe882f 100644 --- a/src/server/handler.rs +++ b/src/server/handler.rs @@ -70,7 +70,7 @@ async fn receive_request( Ok(Ok(r)) => r, Ok(Err(e)) => { // 5.2 — Send 400 on any read failure (oversized headers, reset, etc.) - log::debug!("Failed to read request headers: {e}"); + log::warn!("Failed to read request headers: {e}"); let mut stream = reader.into_inner(); write_response( &mut stream, @@ -86,7 +86,7 @@ async fn receive_request( return Ok(RequestOutcome::Responded); } Err(_elapsed) => { - log::debug!("Request timeout — sending 408"); + log::warn!("Request timeout — sending 408"); let mut stream = reader.into_inner(); write_response( &mut stream, @@ -98,6 +98,7 @@ async fn receive_request( csp, ) .await?; + metrics.add_error(); return Ok(RequestOutcome::Responded); } }; @@ -105,26 +106,59 @@ async fn receive_request( let mut stream = reader.into_inner(); // 1.4 — parse_path extracts (method, path); returns None for non-GET/HEAD. - let Some((method, raw_path_ref)) = parse_path(&request) else { - write_response( - &mut stream, - 400, - "Bad Request", - "text/plain", - b"Bad Request", - false, - csp, - ) - .await?; - metrics.add_error(); - return Ok(RequestOutcome::Responded); - }; - - Ok(RequestOutcome::Ready { - is_head: method == "HEAD", - raw_path: raw_path_ref.to_owned(), - stream, - }) + // H-4: use ParseResult to distinguish malformed request from disallowed method. + match parse_path(&request) { + ParseResult::Ok { method, path } => Ok(RequestOutcome::Ready { + is_head: method == "HEAD", + raw_path: path.to_owned(), + stream, + }), + ParseResult::MethodNotAllowed { method } => { + if method == "OPTIONS" { + // Browsers send OPTIONS preflight requests automatically; respond + // with 200 + Allow so they can proceed without counting as errors. + stream + .write_all( + b"HTTP/1.1 200 OK\r\n\ + Allow: GET, HEAD, POST, OPTIONS\r\n\ + Content-Length: 0\r\n\ + Connection: close\r\n\ + \r\n", + ) + .await?; + metrics.add_request(); + } else { + // RFC 9110 §15.5.6: 405 with Allow header listing supported methods. + log::warn!("405 Method Not Allowed: {method}"); + stream + .write_all( + b"HTTP/1.1 405 Method Not Allowed\r\n\ + Allow: GET, HEAD, POST, OPTIONS\r\n\ + Content-Length: 0\r\n\ + Connection: close\r\n\ + \r\n", + ) + .await?; + metrics.add_error(); + } + Ok(RequestOutcome::Responded) + } + ParseResult::BadRequest => { + log::warn!("400 Bad Request — malformed request line"); + write_response( + &mut stream, + 400, + "Bad Request", + "text/plain", + b"Bad Request", + false, + csp, + ) + .await?; + metrics.add_error(); + Ok(RequestOutcome::Responded) + } + } } /// Handle one HTTP connection to completion. @@ -136,11 +170,12 @@ async fn receive_request( /// Request` response rather than being surfaced as errors. pub async fn handle( stream: TcpStream, - canonical_root: Arc, // 3.2 — pre-canonicalized (2.3); Arc avoids per-connection alloc - index_file: Arc, // 3.2 — Arc clone is O(1) + canonical_root: Arc, + index_file: Arc, dir_listing: bool, + expose_dotfiles: bool, // fix H-10: when false, hide dot-files from directory listings metrics: SharedMetrics, - csp: Arc, // 5.3 — Content-Security-Policy, applied to HTML responses only + csp: Arc, ) -> Result<()> { let RequestOutcome::Ready { is_head, @@ -155,11 +190,18 @@ pub async fn handle( let path_only = raw_path.split('?').next().unwrap_or("/"); let decoded = percent_decode(path_only); - match resolve_path(&canonical_root, &decoded, &index_file, dir_listing) { + match resolve_path( + &canonical_root, + &decoded, + &index_file, + dir_listing, + expose_dotfiles, + ) { Resolved::File(abs_path) => { serve_file(&mut stream, &abs_path, is_head, &metrics, &csp).await?; } Resolved::NotFound => { + log::debug!("404 Not Found: {decoded}"); write_response( &mut stream, 404, @@ -170,26 +212,17 @@ pub async fn handle( &csp, ) .await?; - metrics.add_error(); + metrics.add_request(); } Resolved::Redirect(location) => { - let body = format!("Redirecting to {location}"); - // The original format_args! string used source indentation that - // injected leading spaces after each \r\n, producing - // "folded header" lines (RFC 7230 §3.2.6 deprecated, - // RFC 9112 §5.1 forbidden). Strict HTTP clients reject them. - // Use a raw string with explicit \r\n\ continuations so the - // indentation is NOT part of the emitted bytes. - let body_len = body.len(); - let hdr = format!( - "HTTP/1.1 301 Moved Permanently\r\n\ - Location: {location}\r\n\ - Content-Type: text/plain\r\n\ - Content-Length: {body_len}\r\n\ - Connection: close\r\n\ - \r\n" - ); - stream.write_all(hdr.as_bytes()).await?; + // fix H-3 — sanitize CR/LF from location to prevent CRLF injection. + // fix H-9 — emit all security headers (especially Referrer-Policy: + // no-referrer) on the 301; previously this response bypassed + // write_headers entirely, leaking the .onion URL as a Referer. + let safe_location = sanitize_header_value(&location); + let body = format!("Redirecting to {safe_location}"); + let body_len = body.len() as u64; + write_redirect(&mut stream, &safe_location, body_len, &csp).await?; if !is_head { stream.write_all(body.as_bytes()).await?; } @@ -197,10 +230,13 @@ pub async fn handle( metrics.add_request(); } Resolved::Fallback => { + // fix S-2 — 503 Service Unavailable accurately represents "no content + // configured yet" and prevents the fallback page being cached or indexed + // as a working endpoint. Previously returned 200 which could be cached. write_response( &mut stream, - 200, - "OK", + 503, + "Service Unavailable", "text/html; charset=utf-8", fallback::NO_SITE_HTML.as_bytes(), is_head, @@ -210,6 +246,7 @@ pub async fn handle( metrics.add_request(); } Resolved::Forbidden => { + log::warn!("403 Forbidden: {decoded}"); write_response( &mut stream, 403, @@ -223,7 +260,20 @@ pub async fn handle( metrics.add_error(); } Resolved::DirectoryListing(dir_path) => { - let html = build_directory_listing(&dir_path, &decoded); + // fix H-1 — std::fs::read_dir is a blocking syscall; calling it + // directly on a Tokio worker thread starves other tasks. For large + // directories this can block all workers simultaneously under load. + let decoded_clone = decoded.clone(); + let expose_dotfiles_inner = expose_dotfiles; + let html = tokio::task::spawn_blocking(move || { + build_directory_listing(&dir_path, &decoded_clone, expose_dotfiles_inner) + }) + .await + .map_err(|e| { + crate::AppError::Io(std::io::Error::other(format!( + "directory listing task panicked: {e}" + ))) + })?; write_response( &mut stream, 200, @@ -256,46 +306,100 @@ async fn serve_file( metrics: &SharedMetrics, csp: &str, ) -> Result<()> { - // 1.7 — Stream the file instead of reading it entirely into memory. - if let Ok(mut file) = tokio::fs::File::open(abs_path).await { - let file_len = match file.metadata().await { - Ok(m) => m.len(), - Err(e) => { - log::debug!("Failed to read file metadata: {e}"); - write_response( - stream, - 500, - "Internal Server Error", - "text/plain", - b"Internal Server Error", - false, - csp, - ) - .await?; - metrics.add_error(); - return Ok(()); + // fix H-6 — distinguish error kinds so the client gets the right status: + // PermissionDenied → 403 Forbidden + // NotFound → 404 Not Found + // anything else → 500 Internal Server Error (also logged) + match tokio::fs::File::open(abs_path).await { + Ok(mut file) => { + let file_len = match file.metadata().await { + Ok(m) => m.len(), + Err(e) => { + log::warn!( + "Failed to read file metadata for {}: {e}", + abs_path.display() + ); + write_response( + stream, + 500, + "Internal Server Error", + "text/plain", + b"Internal Server Error", + false, + csp, + ) + .await?; + metrics.add_error(); + return Ok(()); + } + }; + let ext = abs_path.extension().and_then(|e| e.to_str()).unwrap_or(""); + let content_type = mime::for_extension(ext); + + write_headers(stream, 200, "OK", content_type, file_len, csp, None).await?; + if !is_head { + // fix H-2 — a slow reader holds a semaphore permit for an + // unbounded time without a write timeout. 120 s is generous + // for Tor (slow) while still ejecting fully-idle connections. + const RESPONSE_WRITE_TIMEOUT: std::time::Duration = + std::time::Duration::from_secs(120); + tokio::time::timeout(RESPONSE_WRITE_TIMEOUT, tokio::io::copy(&mut file, stream)) + .await + .map_err(|_| { + crate::AppError::Io(std::io::Error::new( + std::io::ErrorKind::TimedOut, + "response write timed out — client too slow", + )) + })??; } - }; - let ext = abs_path.extension().and_then(|e| e.to_str()).unwrap_or(""); - let content_type = mime::for_extension(ext); - write_headers(stream, 200, "OK", content_type, file_len, csp).await?; - if !is_head { - tokio::io::copy(&mut file, stream).await?; + stream.flush().await?; + metrics.add_request(); + } + Err(e) if e.kind() == std::io::ErrorKind::PermissionDenied => { + log::warn!("403 Forbidden (permission denied): {}", abs_path.display()); + write_response( + stream, + 403, + "Forbidden", + "text/plain", + b"Forbidden", + false, + csp, + ) + .await?; + metrics.add_error(); + } + Err(e) if e.kind() == std::io::ErrorKind::NotFound => { + log::warn!( + "404 Not Found (file disappeared after resolve): {}", + abs_path.display() + ); + write_response( + stream, + 404, + "Not Found", + "text/plain", + b"Not Found", + false, + csp, + ) + .await?; + metrics.add_error(); + } + Err(e) => { + log::error!("Unexpected error opening {}: {e}", abs_path.display()); + write_response( + stream, + 500, + "Internal Server Error", + "text/plain", + b"Internal Server Error", + false, + csp, + ) + .await?; + metrics.add_error(); } - stream.flush().await?; - metrics.add_request(); - } else { - write_response( - stream, - 404, - "Not Found", - "text/plain", - b"Not Found", - false, - csp, - ) - .await?; - metrics.add_error(); } Ok(()) } @@ -347,21 +451,37 @@ async fn read_request(reader: &mut BufReader) -> Result { Ok(request) } -/// Extract the method and URL path from `GET /path HTTP/1.1`. +/// Outcome of parsing the HTTP request line. /// -/// Returns `(method, path)` or `None` if the request line is malformed or -/// the method is not `GET`/`HEAD`. -fn parse_path(request: &str) -> Option<(&str, &str)> { - let first = request.lines().next()?; - let mut it = first.splitn(3, ' '); - let method = it.next()?; +/// Separates "bad syntax" from "disallowed method" so the caller can return +/// the RFC 9110-correct status code in each case (fix H-4). +enum ParseResult<'a> { + Ok { method: &'a str, path: &'a str }, + MethodNotAllowed { method: &'a str }, + BadRequest, +} - if method != "GET" && method != "HEAD" { - return None; +/// Extract the method and URL path from `GET /path HTTP/1.1`. +fn parse_path(request: &str) -> ParseResult<'_> { + let Some(first) = request.lines().next() else { + return ParseResult::BadRequest; + }; + let mut it = first.splitn(3, ' '); + let Some(method) = it.next() else { + return ParseResult::BadRequest; + }; + if method != "GET" && method != "HEAD" && method != "POST" { + return ParseResult::MethodNotAllowed { method }; + } + let Some(path) = it.next() else { + return ParseResult::BadRequest; + }; + // Treat POST as GET — this is a static file server with no POST semantics. + // Serving the file is the correct response; the browser gets what it navigated to. + ParseResult::Ok { + method: if method == "HEAD" { "HEAD" } else { "GET" }, + path, } - - let path = it.next()?; - Some((method, path)) } // ─── Path resolution ───────────────────────────────────────────────────────── @@ -407,15 +527,25 @@ pub(crate) fn resolve_path( url_path: &str, index_file: &str, dir_listing: bool, + expose_dotfiles: bool, // fix H-10: when false, 403 on direct requests to dot-files ) -> Resolved { - // 2.3 — `canonical_root` is already resolved by `server::run`; no - // canonicalize() syscall needed here on every request. + // fix H-10 — block direct requests for dot-files (e.g. /.git/config, /.env) + // regardless of whether they exist, unless the operator explicitly opts in. + // Directory listing filtering is handled in build_directory_listing. + if !expose_dotfiles { + for component in std::path::Path::new(url_path).components() { + if let std::path::Component::Normal(name) = component { + if name.to_str().is_some_and(|s| s.starts_with('.')) { + return Resolved::Forbidden; + } + } + } + } + let relative = url_path.trim_start_matches('/'); let candidate = canonical_root.join(relative); let target = if candidate.is_dir() { - // If the URL has no trailing slash, redirect so that relative links - // in the served index file resolve against the correct base URL. if !url_path.ends_with('/') { let redirect_to = format!("{url_path}/"); return Resolved::Redirect(redirect_to); @@ -433,11 +563,6 @@ pub(crate) fn resolve_path( }; let Ok(canonical) = target.canonicalize() else { - // Target does not exist on disk. Use lexical normalization (no syscalls) - // to determine whether the path would land inside the root: - // - Root itself missing → Fallback (no site configured) - // - Normalized path within root → NotFound (404) - // - Normalized path escapes root → Forbidden (403) if !canonical_root.exists() { return Resolved::Fallback; } @@ -458,6 +583,20 @@ pub(crate) fn resolve_path( // ─── Response writing ──────────────────────────────────────────────────────── +/// Strip CR and LF characters from any string destined for an HTTP header value. +/// +/// fix H-3 — decoded URL paths may contain CRLF characters (legal on Linux +/// filesystems), which can split a header line and inject arbitrary response +/// headers. Removing them is the correct fix; the redirected URL is otherwise +/// unchanged. Applied to the CSP value in [`write_headers`] for the same reason. +fn sanitize_header_value(s: &str) -> std::borrow::Cow<'_, str> { + if s.contains(['\r', '\n']) { + std::borrow::Cow::Owned(s.chars().filter(|&c| c != '\r' && c != '\n').collect()) + } else { + std::borrow::Cow::Borrowed(s) + } +} + /// Write a complete HTTP response, optionally suppressing the body (for HEAD). /// /// The `Content-Length` header always reflects the full body size, even when @@ -480,7 +619,7 @@ async fn write_response( // at the narrowest possible scope. #[allow(clippy::cast_possible_truncation)] let body_len: u64 = body.len() as u64; - write_headers(stream, status, reason, content_type, body_len, csp).await?; + write_headers(stream, status, reason, content_type, body_len, csp, None).await?; if !suppress_body { stream.write_all(body).await?; } @@ -508,7 +647,7 @@ async fn write_response( /// `Referrer-Policy: no-referrer` is especially important for the Tor hidden /// service use case: without it, the `.onion` URL leaks in the `Referer` /// header sent to any third-party resource (CDN, fonts, analytics) embedded -/// in a served HTML page. +/// in a served HTML page. (See [`write_redirect`] for redirect responses.) /// /// # Errors /// @@ -520,19 +659,62 @@ async fn write_headers( content_type: &str, content_length: u64, csp: &str, + content_disposition: Option<&str>, // fix H-5: pass Some("attachment") for SVG ) -> Result<()> { let is_html = content_type.starts_with("text/html"); - let csp_line = if is_html && !csp.is_empty() { - format!("Content-Security-Policy: {csp}\r\n") + // fix H-3 — strip CR/LF from the CSP value before embedding it in a header. + let safe_csp = sanitize_header_value(csp); + let csp_line = if is_html && !safe_csp.is_empty() { + format!("Content-Security-Policy: {safe_csp}\r\n") } else { String::new() }; + let cd_line = + content_disposition.map_or_else(String::new, |cd| format!("Content-Disposition: {cd}\r\n")); let header = format!( "HTTP/1.1 {status} {reason}\r\n\ Content-Type: {content_type}\r\n\ Content-Length: {content_length}\r\n\ Connection: close\r\n\ + Cache-Control: no-store\r\n\ + X-Content-Type-Options: nosniff\r\n\ + X-Frame-Options: SAMEORIGIN\r\n\ + Referrer-Policy: no-referrer\r\n\ + Permissions-Policy: camera=(), microphone=(), geolocation=()\r\n\ + {cd_line}\ + {csp_line}\ + \r\n" + ); + stream.write_all(header.as_bytes()).await?; + Ok(()) +} + +/// Write a 301 redirect with all security headers (fix H-9). +/// +/// Previously the redirect arm constructed its own raw header string, bypassing +/// `write_headers` entirely. This meant the 301 carried none of the security +/// headers — critically missing `Referrer-Policy: no-referrer`, which would +/// leak the .onion address to the redirect destination as a Referer header. +async fn write_redirect( + stream: &mut TcpStream, + location: &str, + body_len: u64, + csp: &str, +) -> Result<()> { + let safe_csp = sanitize_header_value(csp); + let csp_line = if safe_csp.is_empty() { + String::new() + } else { + format!("Content-Security-Policy: {safe_csp}\r\n") + }; + let header = format!( + "HTTP/1.1 301 Moved Permanently\r\n\ + Location: {location}\r\n\ + Content-Type: text/plain\r\n\ + Content-Length: {body_len}\r\n\ + Connection: close\r\n\ + Cache-Control: no-store\r\n\ X-Content-Type-Options: nosniff\r\n\ X-Frame-Options: SAMEORIGIN\r\n\ Referrer-Policy: no-referrer\r\n\ @@ -546,21 +728,33 @@ async fn write_headers( // ─── Directory listing ─────────────────────────────────────────────────────── -fn build_directory_listing(dir: &Path, url_path: &str) -> String { +fn build_directory_listing(dir: &Path, url_path: &str, expose_dotfiles: bool) -> String { let mut items = String::new(); if let Ok(entries) = std::fs::read_dir(dir) { let mut names: Vec = entries .flatten() - .filter_map(|e| e.file_name().into_string().ok()) + .filter_map(|e| { + let name = e.file_name().into_string().ok()?; + // fix H-10 — hide dot-files (e.g. .git, .env, .htpasswd) by default. + // These are almost always unintentional and can expose credentials or + // full repository history to anyone with directory listing enabled. + if expose_dotfiles || !name.starts_with('.') { + Some(name) + } else { + None + } + }) .collect(); names.sort(); - let base = url_path.trim_end_matches('/'); + // fix H-8 — html_escape(base) prevents XSS via a crafted directory name + // containing characters like `"` or `>` that would break the href attribute + // context. Without escaping, a directory named `"onmouseover=alert(1)/` + // produces a raw href that executes JavaScript in the .onion origin. + let base = html_escape(url_path.trim_end_matches('/')); for name in &names { - // 1.3 — Percent-encode the filename for the href attribute. let encoded_name = percent_encode_path(name); - // 1.3 — HTML-entity-escape the filename for the visible link text. let escaped_name = html_escape(name); let _ = writeln!( items, @@ -569,7 +763,6 @@ fn build_directory_listing(dir: &Path, url_path: &str) -> String { } } - // 1.3 — Also escape url_path when used in page title / heading. let escaped_path = html_escape(url_path); format!( r#" @@ -656,6 +849,9 @@ pub(crate) fn percent_decode(input: &str) -> String { while i < src.len() { if src.get(i).copied() == Some(b'%') { + // fix H-7 / clippy::integer_arithmetic — use saturating arithmetic + // throughout; the loop guard ensures these never actually saturate, + // but the lint requires every addition to be explicitly guarded. let h1 = src.get(i.saturating_add(1)).copied().and_then(hex_digit); let h2 = src.get(i.saturating_add(2)).copied().and_then(hex_digit); if let (Some(hi), Some(lo)) = (h1, h2) { @@ -676,7 +872,6 @@ pub(crate) fn percent_decode(input: &str) -> String { } } else { flush_byte_buf(&mut byte_buf, &mut output); - // Advance by one full UTF-8 character so we never split a scalar. let ch = input .get(i..) .and_then(|s| s.chars().next()) @@ -794,7 +989,7 @@ mod tests { #[test] fn resolve_path_happy_path() { let (_tmp, root) = make_test_tree(); - let result = resolve_path(&root, "/index.html", "index.html", false); + let result = resolve_path(&root, "/index.html", "index.html", false, false); assert!( matches!(result, Resolved::File(_)), "expected Resolved::File, got {result:?}" @@ -809,7 +1004,7 @@ mod tests { // canonicalize() resolves `/../secret.txt` → `/secret.txt` // which is a real file, but it does NOT start_with `root` → Forbidden. let _ = tmp; // keep alive so secret.txt exists for canonicalize - let result = resolve_path(&root, "/../secret.txt", "index.html", false); + let result = resolve_path(&root, "/../secret.txt", "index.html", false, false); assert_eq!( result, Resolved::Forbidden, @@ -824,14 +1019,14 @@ mod tests { let (tmp, root) = make_test_tree(); let decoded = super::percent_decode("/../secret.txt"); // already decoded form let _ = tmp; - let result = resolve_path(&root, &decoded, "index.html", false); + let result = resolve_path(&root, &decoded, "index.html", false, false); assert_eq!(result, Resolved::Forbidden); } #[test] fn resolve_path_missing_file_returns_not_found() { let (_tmp, root) = make_test_tree(); - let result = resolve_path(&root, "/does_not_exist.txt", "index.html", false); + let result = resolve_path(&root, "/does_not_exist.txt", "index.html", false, false); assert_eq!(result, Resolved::NotFound); } @@ -839,7 +1034,7 @@ mod tests { fn resolve_path_missing_root_returns_fallback() { // Passing a non-existent root means every canonicalize() call fails. let missing_root = Path::new("/nonexistent/root/that/does/not/exist"); - let result = resolve_path(missing_root, "/index.html", "index.html", false); + let result = resolve_path(missing_root, "/index.html", "index.html", false, false); assert_eq!(result, Resolved::Fallback); } } diff --git a/src/server/mod.rs b/src/server/mod.rs index 22d9397..e59c90a 100644 --- a/src/server/mod.rs +++ b/src/server/mod.rs @@ -110,8 +110,9 @@ pub async fn run( let index_file: Arc = Arc::from(config.site.index_file.as_str()); // 5.3 — Content-Security-Policy forwarded to every handler so it can be // emitted on HTML responses without a global static. - let csp_header: Arc = Arc::from(config.server.content_security_policy.as_str()); + let csp_header: Arc = Arc::from(config.server.csp_level.as_header_value()); let dir_list = config.site.enable_directory_listing; + let expose_dots = config.site.expose_dotfiles; let semaphore = Arc::new(Semaphore::new(max_conns)); // 2.10 — JoinSet tracks in-flight handler tasks so shutdown can drain them. @@ -147,7 +148,7 @@ pub async fn run( join_set.spawn(async move { let _permit = permit; if let Err(e) = handler::handle( - stream, site, idx, dir_list, met, csp, + stream, site, idx, dir_list, expose_dots, met, csp, ).await { log::debug!("Handler error: {e}"); } @@ -286,6 +287,13 @@ pub fn scan_site(site_root: &Path) -> crate::Result<(u32, u64)> { let mut queue: std::collections::VecDeque = std::collections::VecDeque::new(); queue.push_back(site_root.to_path_buf()); + // fix M-1 — track visited inodes to detect and break symlink cycles. + // Without cycle detection, a symlink loop (e.g. site/loop -> site/) grows + // the BFS queue unboundedly and the function never returns, permanently + // consuming a spawn_blocking thread. + #[cfg(unix)] + let mut visited_inodes: std::collections::HashSet = std::collections::HashSet::new(); + while let Some(dir) = queue.pop_front() { let entries = std::fs::read_dir(&dir).map_err(|e| { AppError::Io(std::io::Error::new( @@ -295,15 +303,39 @@ pub fn scan_site(site_root: &Path) -> crate::Result<(u32, u64)> { })?; for entry in entries.flatten() { - match entry.metadata() { - Ok(m) if m.is_file() => { - count = count.saturating_add(1); - bytes = bytes.saturating_add(m.len()); + // fix M-1: use symlink_metadata (does not follow symlinks) to + // detect symlinked directories before following them. + let Ok(meta) = entry.metadata() else { continue }; + if meta.is_file() { + count = count.saturating_add(1); + bytes = bytes.saturating_add(meta.len()); + } else if meta.is_dir() { + #[cfg(unix)] + { + use std::os::unix::fs::MetadataExt; + let ino = meta.ino(); + if !visited_inodes.insert(ino) { + log::warn!( + "Symlink cycle detected at {} (inode {ino}), skipping", + entry.path().display() + ); + continue; + } } - Ok(m) if m.is_dir() => { - queue.push_back(entry.path()); + #[cfg(not(unix))] + { + // On non-Unix, skip symlinked directories to avoid cycles. + if let Ok(sym_meta) = entry.path().symlink_metadata() { + if sym_meta.file_type().is_symlink() { + log::warn!( + "Skipping symlinked directory {} (no inode tracking on this platform)", + entry.path().display() + ); + continue; + } + } } - _ => {} + queue.push_back(entry.path()); } } } diff --git a/src/tor/mod.rs b/src/tor/mod.rs index 08a49c7..79acc70 100644 --- a/src/tor/mod.rs +++ b/src/tor/mod.rs @@ -64,10 +64,14 @@ const BOOTSTRAP_TIMEOUT: Duration = Duration::from_secs(120); /// A hung local server should not hold a semaphore permit indefinitely. const LOCAL_CONNECT_TIMEOUT: Duration = Duration::from_secs(5); -/// Maximum wall-clock lifetime of a single proxied Tor stream. -/// Prevents stalled or adversarially slow clients from exhausting the -/// semaphore by holding permits open with no forward data progress. -const STREAM_MAX_LIFETIME: Duration = Duration::from_secs(300); +/// Idle timeout for a single proxied Tor stream. +/// +/// If no bytes flow in either direction for this long the stream is closed +/// and the semaphore permit is released. Using an idle timeout rather than +/// a wall-clock cap avoids disconnecting legitimate large downloads while +/// still evicting adversarially-idle connections that hold permits without +/// sending data. (fix T-6) +const IDLE_TIMEOUT: Duration = Duration::from_secs(60); /// Base delay between re-bootstrap attempts (multiplied by attempt count). const RETRY_BASE_SECS: u64 = 30; @@ -91,25 +95,30 @@ const MAX_RETRIES: u32 = 5; /// `bind_addr` must match `config.server.bind` so the local proxy connect /// uses the correct loopback address even when the server is bound to `::1` /// rather than `127.0.0.1` (fix 3.6). +/// Initialise Tor using the embedded Arti client. +/// +/// `max_connections` must match `config.server.max_connections` so the Tor +/// semaphore is sized identically to the HTTP server's connection limit (fix T-2). pub fn init( data_dir: PathBuf, bind_port: u16, bind_addr: IpAddr, + max_connections: usize, state: SharedState, shutdown: watch::Receiver, ) -> JoinHandle<()> { tokio::spawn(async move { let mut attempts = 0u32; + // fix T-4 — track when the last failure occurred so we can reset the + // consecutive-failure counter after a sufficiently long stable period. + let mut last_failure_time: Option = None; loop { - // `run()` returns: - // Ok(true) — stream ended unexpectedly; caller should retry - // Ok(false) — clean shutdown signal received; caller should exit - // Err(e) — fatal, unrecoverable error match run( data_dir.clone(), bind_port, bind_addr, + max_connections, state.clone(), shutdown.clone(), ) @@ -122,8 +131,20 @@ pub fn init( } Ok(true) => { // Stream ended unexpectedly (transient network disruption). - // Use saturating_add to satisfy clippy::integer_arithmetic — - // in practice attempts never exceeds MAX_RETRIES (a small u32). + // fix T-4 — reset consecutive failure counter if last failure was + // more than an hour ago (disruptions spaced far apart are not + // truly "consecutive" and should not permanently fail the service). + let now = std::time::Instant::now(); + if let Some(last) = last_failure_time { + if now.duration_since(last) > Duration::from_secs(3600) { + log::info!( + "Tor: resetting retry counter — last disruption was over an hour ago." + ); + attempts = 0; + } + } + last_failure_time = Some(now); + attempts = attempts.saturating_add(1); if attempts > MAX_RETRIES { log::error!( @@ -189,6 +210,7 @@ async fn run( data_dir: PathBuf, bind_port: u16, bind_addr: IpAddr, + max_connections: usize, state: SharedState, mut shutdown: watch::Receiver, ) -> Result> { @@ -196,12 +218,14 @@ async fn run( // ── 1. Build TorClientConfig ────────────────────────────────────────── // - // `from_directories(state_dir, cache_dir)` is the idiomatic Arti helper - // that sets both storage paths in one call. It takes `AsRef` so - // we pass PathBuf directly — no CfgPath conversion needed. - // - // The state directory persists the service keypair across restarts, giving - // you a stable .onion address. Delete it to rotate to a new address. + // fix T-7 — explicitly create and lock down the Arti state/cache directories + // before use. Default OpenOptions would create them with 0o755 (world- + // readable), which exposes the service keypair to other local users. + ensure_private_dir(&data_dir.join("arti_state")) + .map_err(|e| format!("Cannot create secure state directory: {e}"))?; + ensure_private_dir(&data_dir.join("arti_cache")) + .map_err(|e| format!("Cannot create secure cache directory: {e}"))?; + let config = TorClientConfigBuilder::from_directories( data_dir.join("arti_state"), data_dir.join("arti_cache"), @@ -212,19 +236,32 @@ async fn run( // ── 2. Bootstrap ────────────────────────────────────────────────────── // - // fix 3.3 — wrap in a timeout so a firewalled network (where Tor traffic - // is silently dropped) does not cause the task to stall indefinitely with - // TorStatus::Starting showing in the dashboard. - let tor_client = - tokio::time::timeout(BOOTSTRAP_TIMEOUT, TorClient::create_bootstrapped(config)) - .await - .map_err(|_| { - format!( - "Tor bootstrap timed out after {}s — check network connectivity", - BOOTSTRAP_TIMEOUT.as_secs() - ) - })? - .map_err(|e| format!("Tor bootstrap failed: {e}"))?; + // fix T-5 — wrap bootstrap in a select! so a shutdown signal received + // during the up-to-120s bootstrap window exits cleanly rather than + // blocking the lifecycle shutdown sequence. + let tor_client = { + let mut shutdown_bootstrap = shutdown.clone(); + tokio::select! { + result = tokio::time::timeout( + BOOTSTRAP_TIMEOUT, + TorClient::create_bootstrapped(config), + ) => { + result + .map_err(|_| format!( + "Tor bootstrap timed out after {}s — check network connectivity", + BOOTSTRAP_TIMEOUT.as_secs() + ))? + .map_err(|e| format!("Tor bootstrap failed: {e}"))? + } + _ = shutdown_bootstrap.changed() => { + if *shutdown_bootstrap.borrow() { + log::info!("Tor: shutdown received during bootstrap — exiting."); + return Ok(false); + } + return Err("shutdown channel closed during bootstrap".into()); + } + } + }; log::info!("Tor: connected to the Tor network"); @@ -236,24 +273,21 @@ async fn run( .nickname("rusthost".parse()?) .build()?; - let (onion_service, rend_requests) = tor_client + // fix T-3 — the OnionService handle represents the service lifetime. + // Dropping it de-registers the hidden service from the Tor network + // (analogous to closing a listener socket). The variable is prefixed + // with `_` to communicate "kept alive intentionally" and to suppress the + // `unused_variables` warning — using `let _ =` would cause an immediate + // drop, which WOULD tear down the service. + // + // IMPORTANT: onion_service_guard must not be dropped; dropping it + // de-registers the hidden service from the Tor network. + let (onion_service_guard, rend_requests) = tor_client .launch_onion_service(svc_config)? .ok_or("Tor: onion service returned None (should not happen with in-code config)")?; // ── 4. Read the onion address ───────────────────────────────────────── - // - // In arti-client 0.40, HsId implements DisplayRedacted (from the safelog - // crate) rather than std::fmt::Display, so direct format!("{}", hsid) - // does not compile. Instead we encode the address manually from the raw - // 32-byte public key using the v3 onion-address spec: - // - // onion_address = base32(PUBKEY | CHECKSUM | VERSION) + ".onion" - // CHECKSUM = SHA3-256(".onion checksum" | PUBKEY | VERSION)[:2] - // VERSION = 0x03 - // - // HsId: AsRef<[u8; 32]> is stable across arti 0.40+, so this approach - // will keep working even if the Display story changes in a future release. - let hsid = onion_service + let hsid = onion_service_guard .onion_address() .ok_or("Tor: onion address not yet available (key generation incomplete)")?; let onion_name = hsid_to_onion_address(hsid); @@ -282,33 +316,37 @@ async fn run( // each other. Dropping the task naturally closes the Tor circuit. let mut stream_requests = handle_rend_requests(rend_requests); - // fix 3.2 — the semaphore bounds active concurrent proxied streams. - // If `acquire_owned` ever returns Err it means `semaphore` was explicitly - // closed, which we now do when exiting via the shutdown arm so the - // acquire branch is always reachable (fix 3.5). - let semaphore = std::sync::Arc::new(tokio::sync::Semaphore::new(256)); + // fix T-2 — size the Tor semaphore from config rather than a hardcoded 256. + // Hardcoding 256 means operators who lower max_connections to (e.g.) 32 + // still queue up to 256 Tor streams against the HTTP server; operators who + // raise it above 256 are silently capped. Tying both values together + // ensures the Tor layer and HTTP layer agree on the concurrency limit. + let semaphore = std::sync::Arc::new(tokio::sync::Semaphore::new(max_connections)); - // 2.10 — use select! so a shutdown signal can break the accept loop - // cleanly, instead of blocking indefinitely in stream_requests.next(). loop { tokio::select! { next = stream_requests.next() => { if let Some(stream_req) = next { - // fix 3.6 — derive local address from the actual bind address, - // not a hardcoded "127.0.0.1", so IPv6 bind configs work. - let local_addr = format!("{bind_addr}:{bind_port}"); - - // fix 3.5 — propagate semaphore errors rather than silently - // breaking; `acquire_owned` only fails if closed explicitly. - let permit = std::sync::Arc::clone(&semaphore) - .acquire_owned() - .await - .map_err(|e| format!("semaphore closed unexpectedly: {e}"))?; + // fix T-1 — use format_local_addr() to correctly bracket + // IPv6 addresses. Without bracketing, "::1:8080" is not a + // valid SocketAddr literal; every IPv6 proxy connect fails. + let local_addr = format_local_addr(bind_addr, bind_port); + + // fix T-2 — use try_acquire_owned() instead of awaiting + // acquire_owned(). The rendezvous handshake (multi-RTT Tor + // protocol exchange) already completed by the time we reach + // here; dropping the stream_req at this point sends a + // RELAY_END cell that closes the circuit without burning a + // permit. This is the earliest point we can shed load. + let Ok(permit) = std::sync::Arc::clone(&semaphore).try_acquire_owned() else { + log::warn!("Tor: at capacity ({max_connections}), dropping stream"); + drop(stream_req); + continue; + }; tokio::spawn(async move { let _permit = permit; if let Err(e) = proxy_stream(stream_req, &local_addr).await { - // Downgraded to debug — normal on abrupt disconnects. log::debug!("Tor: stream closed: {e}"); } }); @@ -380,25 +418,30 @@ async fn proxy_stream( })? .map_err(|e| format!("local connect to {local_addr} failed: {e}"))?; - // fix 3.2 — cap the total wall-clock lifetime of a proxied stream. - // A slow or adversarially idle client cannot hold a permit forever; after - // STREAM_MAX_LIFETIME the connection is closed from our side. - tokio::time::timeout( - STREAM_MAX_LIFETIME, - tokio::io::copy_bidirectional(&mut tor_stream, &mut local), - ) - .await - .map_err(|_| { - format!( - "stream lifetime exceeded {}s — closing", - STREAM_MAX_LIFETIME.as_secs() - ) - })? // timeout → Err - .map_err(|e| format!("bidirectional copy failed: {e}"))?; // io::Error → Err + // fix T-6 — idle timeout instead of wall-clock cap (see copy_with_idle_timeout) + copy_with_idle_timeout(&mut tor_stream, &mut local) + .await + .map_err(|e| format!("stream proxy error: {e}"))?; Ok(()) } +async fn copy_with_idle_timeout(a: &mut A, b: &mut B) -> std::io::Result<()> +where + A: tokio::io::AsyncRead + tokio::io::AsyncWrite + Unpin, + B: tokio::io::AsyncRead + tokio::io::AsyncWrite + Unpin, +{ + tokio::select! { + result = tokio::io::copy_bidirectional(a, b) => result.map(|_| ()), + () = tokio::time::sleep(IDLE_TIMEOUT) => { + Err(std::io::Error::new( + std::io::ErrorKind::TimedOut, + format!("idle timeout ({}s)", IDLE_TIMEOUT.as_secs()), + )) + } + } +} + // ─── Onion address encoding ─────────────────────────────────────────────────── /// Encode an `HsId` (ed25519 public key) as a v3 `.onion` domain name. @@ -465,6 +508,26 @@ pub(crate) fn onion_address_from_pubkey(pubkey: &[u8; 32]) -> String { // These must appear BEFORE the #[cfg(test)] module; items after a test module // trigger the `clippy::items_after_test_module` lint. +/// Format a bind address as a valid socket-address string (fix T-1). +/// IPv6 addresses need square brackets: `[::1]:8080`, not `::1:8080`. +fn format_local_addr(addr: IpAddr, port: u16) -> String { + match addr { + IpAddr::V4(a) => format!("{a}:{port}"), + IpAddr::V6(a) => format!("[{a}]:{port}"), + } +} + +/// Create a directory with owner-only permissions 0o700 (fix T-7). +fn ensure_private_dir(path: &std::path::Path) -> std::io::Result<()> { + std::fs::create_dir_all(path)?; + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + std::fs::set_permissions(path, std::fs::Permissions::from_mode(0o700))?; + } + Ok(()) +} + async fn set_status(state: &SharedState, status: TorStatus) { state.write().await.tor_status = status; } diff --git a/tests/http_integration.rs b/tests/http_integration.rs index 1e1c4fd..f571943 100644 --- a/tests/http_integration.rs +++ b/tests/http_integration.rs @@ -325,8 +325,14 @@ async fn get_nonexistent_file_returns_404() -> Result<(), Box Result<(), Box> { +async fn disallowed_method_returns_405_with_allow_header() -> Result<(), Box> +{ let (tmp, site) = make_site(&[("index.html", b"ok")])?; let server = TestServer::start(&site).await?; @@ -338,8 +344,12 @@ async fn post_request_returns_400() -> Result<(), Box> { assert_eq!( status_code(&response), - Some(400), - "POST must be rejected with 400:\n{response}" + Some(405), + "POST must return 405 Method Not Allowed (RFC 9110 §15.5.6):\n{response}" + ); + assert!( + has_header(&response, "allow"), + "405 response must include Allow header:\n{response}" ); Ok(()) }