From 7a7515d58a53f8021e73593e04fe7d531e70d549 Mon Sep 17 00:00:00 2001 From: csd113 Date: Sun, 22 Mar 2026 11:24:04 -0700 Subject: [PATCH 1/7] Update .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 1c15a38..9240289 100644 --- a/.gitignore +++ b/.gitignore @@ -23,3 +23,4 @@ dev-check-strict.sh .DS_STORE clippy_reports src/.DS_Store +build-release.sh From 238bb43cdcd9fa79af76fb9a663b6bd243354d0b Mon Sep 17 00:00:00 2001 From: csd113 Date: Sun, 22 Mar 2026 20:27:38 -0700 Subject: [PATCH 2/7] Phase 1 complete --- ..._audit-askforrepeatafterfixesareapplied.md | 541 ++++ rusthost_implementation_plan.md | 2224 +++++++++++++++++ src/console/dashboard.rs | 2 +- src/console/mod.rs | 14 +- src/runtime/lifecycle.rs | 3 +- src/runtime/state.rs | 1 + src/server/handler.rs | 133 +- src/tor/mod.rs | 149 +- 8 files changed, 2947 insertions(+), 120 deletions(-) create mode 100644 docs/rusthost_audit-askforrepeatafterfixesareapplied.md create mode 100644 rusthost_implementation_plan.md diff --git a/docs/rusthost_audit-askforrepeatafterfixesareapplied.md b/docs/rusthost_audit-askforrepeatafterfixesareapplied.md new file mode 100644 index 0000000..332a97f --- /dev/null +++ b/docs/rusthost_audit-askforrepeatafterfixesareapplied.md @@ -0,0 +1,541 @@ +# RustHost — Full Project Audit + +> Audited from source archive (Archive.zip) and https://github.com/csd113/RustHost +> Rust edition 2021 · MSRV 1.90 · Arti 0.40 · Tokio 1 + +--- + +## Preamble + +This is a thoughtful, iteratively-improved codebase. The internal "fix X.Y" comments reveal at least two full self-review passes, and the results show: `unsafe` is forbidden at the workspace level, the Tor integration migrated from subprocess to Arti in-process, `NonZeroU16`/`IpAddr` push validation to serde, and the path-resolution security model is correct. The developer clearly knows Rust. + +That said, the project is **not elite**. The gaps listed below are not style nits — they are functional blockers that would stop real users from relying on it, or that represent genuine attack surface. Read this as: "here's exactly what it would take to make this worth deploying." + +--- + +## 1. Architecture & Design + +### 🔴 CRITICAL — No HTTP/1.1 keep-alive or HTTP/2 + +Every response carries `Connection: close`. The server handles exactly one request per TCP connection and drops the socket. For clearnet this is merely slow; **for Tor this is a project-killing design flaw.** Each Tor circuit requires a multi-RTT rendezvous handshake (~1–3 s on a typical path). A page with 15 assets (HTML + CSS + JS + images) forces 15 sequential rendezvous handshakes. A typical page load over this server will take **15–45 seconds** on Tor. + +**Fix:** Add HTTP/1.1 keep-alive in the request loop inside `handler.rs`. Parse the `Connection:` request header and re-enter `receive_request` on the same stream. Long-term, HTTP/2 via `h2` or `hyper` eliminates head-of-line blocking entirely. + +### 🟠 HIGH — `canonical_root` is never refreshed after startup + +In `server/mod.rs`, `canonical_root` is canonicalized once at server start. If the `site/` directory is deleted and recreated while the server is running (e.g., during a content deployment), `canonical_root` points to the now-dead inode. All requests return `Resolved::Fallback`. Pressing `[R]` updates `site_file_count` but **does not update `canonical_root`**. Recovery requires a full process restart. + +**Fix:** Re-resolve `canonical_root` inside the `Reload` event handler in `events.rs` and push the new value to the server via a `watch` channel. + +### 🟠 HIGH — Tor and HTTP semaphores are sized identically but compete for different resources + +The T-2 fix correctly sizes both semaphores to `max_connections`. However, a Tor stream + its proxied HTTP connection occupy **two** file descriptors simultaneously. Under max load, the process holds `2 × max_connections` open sockets, but the OS `ulimit` and `EMFILE` guard only knows about the Tor semaphore. The effective capacity is half what the operator configured. + +**Fix:** Document this clearly. Consider sizing the Tor semaphore to `max_connections / 2` or adding a dedicated Tor connection limit to the config. + +### 🟡 MEDIUM — No `[profile.dev]` optimization + +First `cargo build` (dev) with vendored OpenSSL and the full Arti tree takes 90–120 seconds on a modern machine. There's no `[profile.dev]` section in `Cargo.toml` to set `opt-level = 1` for dependencies, which would dramatically reduce compile time without the debug-info cost of a full release build. + +```toml +[profile.dev.package."*"] +opt-level = 1 +``` + +### 🟡 MEDIUM — Module boundary between `tor` and `server` is leaky + +`tor/mod.rs` calls `TcpStream::connect(local_addr)` directly against the HTTP server. This creates an implicit contract (the HTTP server must be listening on a specific `IpAddr:port`) that bypasses all the `SharedState` machinery. A refactor that changes how the HTTP server exposes its address would silently break Tor proxying. + +**Fix:** Pass the bound address through `SharedState.actual_port` + `config.server.bind` (which already happens in lifecycle), and have `tor::init` receive a `SocketAddr` rather than separate `IpAddr`/`u16` arguments. + +### 🟡 MEDIUM — Single log file + simplistic rotation + +`logging/mod.rs` rotates `rusthost.log` → `rusthost.log.1` at 100 MB. Only one backup is kept. There's no timestamp in the rotated filename, no gzip, and no hook to signal an external log manager. On a server running at DEBUG level with Arti noise enabled, 100 MB fills in hours. + +--- + +## 2. Code Quality + +### 🔴 CRITICAL — `onion_address_from_pubkey` test is a tautology + +The `reference_onion` function in `tor/mod.rs` tests uses the **same algorithm** as the production function. It tests determinism and format, but a consistent implementation bug in both would pass. There is no cross-check against a known external test vector. + +The Tor Rendezvous Specification defines exact test vectors. One should be hardcoded: + +```rust +// Known vector from the Tor spec, independently computed: +// All-zero key → "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa3.onion" +// (compute the exact value offline and assert it here) +#[test] +fn hsid_to_onion_address_known_vector() { + let pubkey = [0u8; 32]; + assert_eq!(onion_address_from_pubkey(&pubkey), "aaaa...aaa3.onion"); +} +``` + +### 🔴 CRITICAL — `copy_with_idle_timeout` is not actually an idle timeout + +In `tor/mod.rs`, `copy_with_idle_timeout` uses `tokio::time::sleep(IDLE_TIMEOUT)` alongside `copy_bidirectional`. **`sleep` starts when the call begins, not when I/O stalls.** A legitimate large file download (say, a 50 MB video) that takes 65 seconds of continuous data transfer is killed at second 60 even though the connection was never idle. The variable name and doc comment say "idle" but the implementation is a wall-clock cap. + +**Fix:** Use a proper idle timeout that resets on each read/write. This requires a custom bidirectional copy loop that arms a `tokio::time::Sleep` and resets it on each successful I/O operation, or wraps each read/write in `tokio::time::timeout`. + +### 🟠 HIGH — `write_redirect` duplicates all security headers + +`write_redirect` in `handler.rs` manually re-lists every security header that `write_headers` also emits. Any future header addition (e.g., `Cross-Origin-Opener-Policy`) must be applied in two places. This is already a bug: `write_redirect` emits CSP on all redirects regardless of content-type, while `write_headers` correctly gates CSP to HTML responses. + +**Fix:** Remove `write_redirect` and call `write_headers` with `status: 301, reason: "Moved Permanently"`, adding a `Location` header via a new optional parameter or a pre-call `stream.write_all`. + +### 🟠 HIGH — No per-IP request rate limiting + +The `Semaphore` limits total *concurrent* connections, but a single IP can consume all 256 slots simultaneously and DoS every other user. There's no per-IP connection limit, no request-rate limit, and no backpressure signal to the caller. On Tor, adversarial clients share exit nodes with legitimate users, making this more exploitable, not less. + +**Fix:** Add a `HashMap` of active connections per peer, checked at accept time. This fits naturally in the accept loop in `server/mod.rs`. + +### 🟡 MEDIUM — `receive_request` ignores all headers after the request line + +The function reads all headers into a `String` for the 8 KiB check but never parses them. `Host`, `Content-Length`, `Transfer-Encoding`, `If-None-Match`, `Range`, `Accept-Encoding` are all silently discarded. This isn't a bug today, but it makes adding any feature that requires inspecting request headers a large refactor. + +**Fix:** Parse headers into a lightweight `HashMap<&str, &str>` (or a dedicated struct) after reading them. This enables conditional GET, range requests, compression negotiation, and keep-alive without touching the read logic. + +### 🟡 MEDIUM — Dashboard TorStatus message says "polling" + +`dashboard.rs` line: `TorStatus::Starting => yellow("STARTING — polling for .onion address…")`. +The Arti integration is fully event-driven — there is no polling. Stale copy-paste from the old C-Tor subprocess implementation. + +### 🟡 MEDIUM — `sanitize_header_value` is incomplete + +The function strips `\r` and `\n` from header values. It does not strip: +- Null bytes (`\x00`) — rejected by RFC 9110 but some parsers accept them +- Other C0 control characters (`\x01`–`\x1f`, `\x7f`) — legal in filenames on Linux + +For the `Location` header, a filename containing `\x00` after CR/LF stripping could still produce an anomalous URL. Add a broader control-character strip: + +```rust +.filter(|&c| !c.is_control()) +``` + +### 🟡 MEDIUM — `default_data_dir` warning string has a stray whitespace + +In `lifecycle.rs`, the fallback `eprintln!` warning in `default_data_dir` contains a multi-line string with a leading run of spaces at the line join: +``` +"Warning: cannot determine executable path ({e}); using ./rusthost-data…" +``` +This renders as a very long single line with ~18 spaces mid-sentence. Use `\n` + indentation instead. + +### 🟡 MEDIUM — `tor/mod.rs`: the log message for "resetting retry counter" contains leading whitespace + +```rust +log::info!( + "Tor: resetting retry counter — last disruption was over an hour ago." +) +``` +Same issue as above — line continuation whitespace is included in the string. + +### 🟡 MEDIUM — `open_browser` spawns a child process without logging the outcome + +In `runtime/mod.rs`, `open_browser` ignores the `Result` from `Command::spawn()` on all platforms. If `xdg-open` isn't installed (common on headless Linux servers), the user gets no feedback. The `[O]` key silently does nothing. + +**Fix:** Log a `warn!` when the spawn fails. + +### 🟡 MEDIUM — `percent_decode` reinvents `percent-encoding` + +The custom percent-decoder in `handler.rs` is 60 lines long, covers null-byte injection, and handles multi-byte UTF-8 correctly. All of this is already provided by the `percent-encoding` crate (3 lines). The custom implementation is a maintenance liability: if a bug is found in `percent_decode`, it won't be caught by an upstream security advisory. + +### 🟡 MEDIUM — `LogFile::write_line` checks file size on every write + +```rust +if let Ok(meta) = self.file.metadata() { + if meta.len() >= MAX_LOG_BYTES { +``` + +This is a `fstat` syscall on every log record. At DEBUG level with Arti noise, this could be thousands of syscalls per second. Cache the size and only re-stat after every N writes (or increment an internal counter). + +### 🟡 MEDIUM — `AppState` fields are not reset between test runs (integration tests) + +The integration tests in `tests/http_integration.rs` create a fresh `AppState::new()` per test, which is correct. However, `LOG_BUFFER` is a `OnceLock` global in `logging/mod.rs`. If `logging::init` is called in one test run and the test binary is reused, the second call silently returns an error (the logger is already set). The tests currently skip logging, which avoids this, but it means the logging path is not integration-tested. + +### 🟡 MEDIUM — `scan_site` returns `(u32, u64)` but file count could theoretically overflow + +`count = count.saturating_add(1)` wraps at 4 billion files. Practically not an issue, but returning `u64` for both would be consistent. + +--- + +## 3. Performance + +### 🔴 CRITICAL — No HTTP keep-alive (see Architecture §1) + +Covered above. The single largest performance issue in the codebase by an order of magnitude. + +### 🟠 HIGH — No response compression (gzip/brotli) + +All files are served raw. For Tor users on a ~100–500 kbps effective circuit, a 200 KB minified JavaScript file takes 3–16 seconds. Brotli compression typically achieves 70–85% reduction on text assets. Without compression, the Tor user experience is extremely poor. + +**Fix:** Check `Accept-Encoding` request header (once header parsing is added), and compress responses with the `async-compression` crate. Pre-compress files at startup to avoid per-request CPU overhead. + +### 🟠 HIGH — No conditional GET (ETag / Last-Modified) + +All responses carry `Cache-Control: no-store`. There is no `ETag`, `Last-Modified`, `If-None-Match`, or `If-Modified-Since` support. Every browser reload re-fetches every asset, regardless of whether it changed. This is anti-caching by design, which is appropriate for Tor (you don't want assets cached with the onion address in the referrer), but it should be a conscious per-resource policy, not a blanket prohibition. At minimum, `Cache-Control: no-store` should only apply to HTML and not to immutable assets. + +### 🟠 HIGH — No `sendfile` / zero-copy file transfer + +`tokio::io::copy` reads file data into a userspace buffer then writes it to the socket. On Linux, `sendfile(2)` skips the userspace copy entirely, halving the CPU cost for large file transfers. The `tokio-uring` crate (or the `sendfile` feature in `nix`) enables this. + +### 🟡 MEDIUM — `write_headers` allocates a `String` per response + +Every call to `write_headers` creates a heap-allocated `String` via `format!`. For static sites under load, this is many small allocations per second. Using a stack-allocated `ArrayString` or writing directly to the `TcpStream` in multiple `write_all` calls would eliminate this. + +### 🟡 MEDIUM — `build_directory_listing` buffers the entire HTML response + +The directory listing HTML is built in a single `String` before sending. For directories with thousands of entries this is slow. A streaming approach (write HTML head, iterate entries line-by-line) would reduce peak memory and time-to-first-byte. + +### 🟡 MEDIUM — `render` acquires the `AppState` lock twice per tick + +In `console/mod.rs`: +```rust +let mode = state.read().await.console_mode.clone(); // lock 1 +// ... +let s = state.read().await; // lock 2 +``` + +A single `read()` that extracts both mode and the full state would halve the lock acquisitions per render tick. + +### 🟡 MEDIUM — No Range request support + +Large media files (video, audio) cannot be seeked. Streaming players and download managers depend on `Range: bytes=N-M` requests, which this server rejects with 400 (the method is GET, which the server allows, but range headers are silently ignored and the full file is sent). The client sees the full response instead of the range, which some clients reject entirely. + +### 🟡 MEDIUM — `scan_site` BFS traversal is not depth-bounded + +A deeply nested directory tree (or a symlink cycle that somehow slips through the inode check on Windows) could consume unbounded stack space. The `queue` grows proportionally to directory count. Consider adding a depth limit. + +--- + +## 4. Security + +### 🔴 CRITICAL — No per-IP rate limiting (see Code Quality §2) + +A single client can open 256 simultaneous connections (the full pool) and deny service to every other user. This is especially dangerous on a Tor hidden service because: +1. Tor clients share exit nodes, so an IP-level ban catches innocent users +2. The attacker pays very little (Tor circuit setup is cheap for the attacker) + +### 🔴 CRITICAL — `Cache-Control: no-store` prevents Tor Browser's first-party isolation from working correctly + +Tor Browser applies first-party isolation per-origin. With `no-store` on all resources, the browser cannot serve cached assets even on the same page load. Every sub-resource request goes over a separate Tor circuit. This is **functionally broken** for multi-asset pages. The intention to prevent caching (good) is implemented too broadly (bad). + +**Fix:** Apply `no-store` only to HTML documents. Immutable assets (hashed filenames, images, fonts) should use `Cache-Control: max-age=31536000, immutable`. + +### 🟠 HIGH — Tor keypair directory is fixed at `arti_state/`; no key backup/export path + +`ensure_private_dir` correctly sets `0o700` on Unix, but: +1. On **Windows**, directory permissions are not set at all. The keypair is world-readable to any local user. +2. There is no mechanism to **back up** the keypair. If `arti_state/` is accidentally deleted, the `.onion` address is permanently lost. +3. There is no documented way to **import** an existing keypair (e.g., migrate from another host). + +### 🟠 HIGH — Log file leaks the `.onion` address + +`tor/mod.rs` logs the onion address at `INFO` level in a prominent banner. The log file is created with `0o600` (owner read-only), which is correct. However: +1. If the operator runs `rusthost-cli > output.txt`, the onion address appears in a world-readable file +2. If the operator shares logs for debugging, the onion address is in the paste + +**Fix:** Hash or truncate the address in the log line. Show only the first 8 characters plus `…` to identify it while not fully exposing it. + +### 🟠 HIGH — `open_browser` passes the URL to a shell command without explicit sanitization + +In `runtime/mod.rs`, the Windows path does: +```rust +std::process::Command::new("cmd").args(["/c", "start", "", url]) +``` + +The URL is constructed from `IpAddr` + `u16`, so the values are safe today. But `open_browser` is `pub` in `crate::runtime`, callable from anywhere with an arbitrary string. If a future caller passes an attacker-influenced URL (e.g., from the onion address or a config field), the empty-string third argument to `start` doesn't fully protect against shell expansion on Windows. Document or enforce that only internal URLs may be passed. + +### 🟠 HIGH — No HTTPS option for the clearnet server + +When `bind = "0.0.0.0"`, the server listens on all interfaces with plaintext HTTP. There is no TLS termination, no self-signed certificate generation, and no ACME integration. A user who exposes the server to a local network (e.g., home lab) has no way to get HTTPS without a reverse proxy. + +### 🟡 MEDIUM — `expose_dotfiles` check happens before URL decode + +In `resolve_path`, the dot-file check iterates `Path::new(url_path).components()` where `url_path` is already percent-decoded. This is correct. However, the check runs on the URL path, not on the final resolved filesystem path. A symlink named `safe-name` that points to `.git/` inside the site root would bypass the dot-file filter (the symlink's own name doesn't start with `.`, but the target is a dot-directory). + +**Fix:** After resolving the canonical path, check whether any component of the path **relative to `canonical_root`** starts with `.`. + +### 🟡 MEDIUM — `build_directory_listing` generates URLs with percent-encoded components but no `` tag + +The directory listing uses `percent_encode_path(name)` for hrefs. If the current URL path contains a trailing `/` from a redirect, the relative href `base/encoded_name` may resolve incorrectly on some browser/proxy combinations. Use absolute paths (`/path/to/dir/file`) to eliminate ambiguity. + +### 🟡 MEDIUM — No `Strict-Transport-Security` header + +Even though TLS isn't supported, the HSTS header should be documented as a TODO. Adding HTTPS later without HSTS means browsers will silently downgrade connections. + +### 🟡 MEDIUM — `--config` and `--data-dir` CLI flags accept absolute paths with no restriction + +A user who passes `--config /etc/passwd` will get a likely TOML parse error, but `--data-dir /tmp/attacker-controlled` could be used to point the server at attacker-controlled content. This is a misconfiguration concern, not a true security issue, but it's worth documenting. + +--- + +## 5. Reliability & Stability + +### 🟠 HIGH — Tor reconnect loop uses linear backoff, not exponential + +`RETRY_BASE_SECS = 30` and the delay is `30 * attempt`. After 5 attempts: 30 s, 60 s, 90 s, 120 s, 150 s. This is linear. True exponential backoff (`30 * 2^attempt`, capped at e.g. 600 s) is more respectful of the Tor network under outage conditions and is the industry standard for circuit breakers. + +### 🟠 HIGH — Shutdown drain timeout of 8 seconds may be insufficient + +In `lifecycle.rs`, the total shutdown budget is 8 seconds split between the HTTP server drain (5 s) and Tor cleanup (whatever's left, often 3 s or less). Tor circuits with active transfers can take longer to close gracefully. On slow Tor paths, `copy_bidirectional` might still be blocked. The `_` return from `timeout` means the process continues regardless, which is correct, but the 8-second hard cap means Tor connections are abruptly terminated rather than gracefully closed. + +### 🟡 MEDIUM — If `port_tx` send fails (channel dropped before use), lifecycle returns an error with no cleanup + +In `server/mod.rs`, if the bind fails, `port_tx` is dropped without sending. `lifecycle.rs` catches the `Err` from the oneshot and returns `AppError::ServerStartup`. But by this point, logging may have been initialized and the async runtime is still running. The error path in `main` calls `console::cleanup()` and `eprintln!`, which is correct, but it doesn't explicitly shut down the Tor task (it was never started) or flush the log. + +**Fix:** Add `logging::flush()` to the error path in `main`. + +### 🟡 MEDIUM — `LOG_BUFFER` is a global `OnceLock`; `logging::init` fails silently if called twice + +`log::set_logger` returns `Err` if a logger is already set, and the code maps this to `AppError::LogInit`. This is correct. However, `LOG_BUFFER.get_or_init(...)` silently no-ops on the second call. In a test binary that calls `logging::init` from multiple `#[tokio::test]` tests, only the first test gets a fresh ring buffer. This is a test isolation issue, not a production issue, but it means the logging path is not reliably tested. + +### 🟡 MEDIUM — `AppState::console_mode` is read under `RwLock` then immediately read again + +In `console/mod.rs`, `render()` reads `console_mode` under a read lock, releases it, then re-acquires a read lock to read the full `AppState`. Between the two acquisitions, `console_mode` could change (e.g., from `Dashboard` to `LogView`). The rendered output would then be inconsistent with the state read on the second lock. This is a TOCTOU issue in the rendering path — cosmetic only (next render tick corrects it), but worth fixing. + +### 🟡 MEDIUM — `scan_site` fails loudly on the first `read_dir` error + +If any subdirectory inside `site/` is unreadable (e.g., `0o000` permissions), `scan_site` returns `Err` and the file count reverts to `0`. The user sees "0 files, 0 B" in the dashboard with a log warning. The function should skip unreadable directories (logging a per-directory warning) rather than aborting the entire scan. + +--- + +## 6. Cross-Platform Support + +### 🟠 HIGH — Keypair directory permissions not enforced on Windows + +`ensure_private_dir` applies `0o700` only under `#[cfg(unix)]`. On Windows, the directory is created with default ACLs (typically readable by all local users in the same session). The Tor service keypair is therefore **world-readable on Windows**. The Windows ACL equivalent (`SetNamedSecurityInfo`) should be applied via the `windows-acl` or `winapi` crates, or the limitation must be prominently documented in the README. + +### 🟡 MEDIUM — `is_fd_exhaustion` returns `false` on non-Unix, non-Windows targets + +On WASM, UEFI, and other exotic targets, accept errors that are actually FD exhaustion are logged at `debug` level instead of `error`. This is low-risk but worth documenting. + +### 🟡 MEDIUM — `xdg-open` is not available on all Linux environments + +On headless servers, Docker containers, minimal Alpine images, and WSL without a display, `xdg-open` either doesn't exist or silently fails. The `[O]` key does nothing with no user feedback. + +### 🟡 MEDIUM — Log file permissions not set on Windows + +`OpenOptions::mode(0o600)` is `#[cfg(unix)]` only. On Windows, the log file is created with default permissions (likely readable by all users in the group). The log contains the `.onion` address. + +### 🟡 MEDIUM — No cross-compilation CI + +`audit.toml` and `deny.toml` are present but there is no CI configuration. Cross-compilation to `x86_64-pc-windows-gnu` and `aarch64-unknown-linux-gnu` is claimed as working (via bundled SQLite and vendored OpenSSL), but this is untested in automation. + +--- + +## 7. Developer Experience + +### 🔴 CRITICAL — No README.md + +There is no `README.md` in the repository. A new visitor to https://github.com/csd113/RustHost sees only the file list. There is no explanation of what the project does, how to build it, how to use it, or why it exists. This is the single biggest barrier to adoption and contribution. + +### 🟠 HIGH — MSRV is 1.90 (unreleased as of mid-2025) + +`rust-version = "1.90"` in `Cargo.toml`. Rust 1.90 is not yet stable. A new contributor who runs `cargo build` with the stable toolchain gets: + +``` +error: package `rusthost` cannot be built because it requires rustc 1.90.0 or later +``` + +There is no error message, documentation, or toolchain file (`rust-toolchain.toml`) to tell them what to do. Add a `rust-toolchain.toml` specifying `channel = "nightly"` or the correct beta channel, and document this in the README. + +### 🟠 HIGH — No CI configuration + +No `.github/workflows/`, no `Makefile`, no `justfile`. The `cargo-deny` (`deny.toml`) and `cargo-audit` (`audit.toml`) configurations are present but never run. A PR that introduces a yanked dependency or a RUSTSEC advisory will merge silently. + +**Minimum CI matrix:** +``` +cargo build --release +cargo test +cargo clippy -- -D warnings +cargo deny check +cargo audit +``` + +### 🟠 HIGH — `[R]` reload does not reload configuration + +The dashboard says "press [R] to reload" which users will interpret as "re-read settings.toml." It only rescans the file count. Config changes (e.g., changing `csp_level` or `max_connections`) require a full restart. Document this limitation prominently or implement config hot-reload. + +### 🟡 MEDIUM — Internal "fix X.Y" comments are meaningless to outside contributors + +The codebase is dense with references like `// fix H-3`, `// fix T-7`, `// fix 4.5`. These are clearly from an internal issue tracker or review document that is not in the repository. To an outside contributor, these comments are noise that obscures the actual rationale. + +**Fix:** Replace these with human-readable comments explaining *why* the fix was necessary, not what issue number it closes. E.g., `// fix H-3` → `// Strip CR/LF to prevent CRLF injection into Location header`. + +### 🟡 MEDIUM — CLI parser doesn't support `--flag=value` syntax + +`--config /path` works; `--config=/path` produces `error: unrecognised argument '--config=/path'`. Standard CLI convention supports both. Consider replacing the hand-rolled parser with `clap` to get this, plus `--help` auto-generation, `--` end-of-flags, short flags (`-c`/`-d`), and shell completion generation. + +### 🟡 MEDIUM — No `--port` or `--no-tor` CLI flags for quick ad-hoc use + +The most common developer workflow is "I want to quickly serve a directory on a specific port without editing a TOML file." There's no `rusthost-cli --port 3000 --no-tor ./my-site`. Every use requires the full config file setup. + +### 🟡 MEDIUM — No structured access log + +The server logs requests at `DEBUG` level via `log::debug!("Connection from {peer}")`, but there's no access log in Combined Log Format (or any structured format). Operators cannot pipe logs to a SIEM, run `goaccess`, or analyze traffic patterns. + +--- + +## 8. Feature Completeness + +### 🔴 CRITICAL — No SPA (Single Page Application) fallback routing + +There is no option to serve `index.html` for all 404 responses. React, Vue, Svelte, and Angular apps all require this for client-side routing to work. A request to `/about` on a React SPA returns 404 from this server; only `/` works. This is table stakes for any static host. + +**Fix:** Add `fallback_to_index = false` to `[site]` config. When true, return `index.html` for all 404s that don't match a file. + +### 🔴 CRITICAL — No HTTPS / TLS support + +The server has no TLS. For public Tor hidden service use, this doesn't matter (Tor provides its own encryption). But for clearnet access, plaintext HTTP is increasingly blocked by browsers (HSTS preloading, mixed-content errors). Providing a `--generate-cert` flag with a self-signed certificate, or ACME support, would make the tool usable for clearnet hosting. + +### 🟠 HIGH — No custom error pages (404.html, 500.html) + +404 responses are plain-text "Not Found". Every professional static host supports custom error pages. Add `error_404 = "404.html"` to `[site]` config. + +### 🟠 HIGH — No gzip/brotli compression (see Performance §2) + +### 🟠 HIGH — No Range request (206 Partial Content) support + +Audio/video players, download managers, and PDF viewers depend on range requests. Without it, a 500 MB video file cannot be seeked or resumed. + +### 🟡 MEDIUM — No URL redirect/rewrite rules + +No `[[redirects]]` or `[[rewrites]]` configuration table. Migrating a site from another host requires the destination host to preserve all URLs. Custom redirects (e.g., `/old-page → /new-page`) are a baseline feature. + +### 🟡 MEDIUM — No `--serve ` one-shot mode + +You cannot do `rusthost-cli --serve ./docs` to instantly serve a directory without first running through the first-run setup flow. This is the primary use case for developers. + +### 🟡 MEDIUM — Missing MIME types + +The MIME table is missing: +- `.webmanifest` → `application/manifest+json` (required for PWA) +- `.m4v`, `.mov` → video types +- `.flac`, `.opus` → audio types +- `.glb`, `.gltf` → 3D model types (increasingly common in modern web) +- `.ndjson` → `application/x-ndjson` +- `.ts` → `video/mp2t` (also used for TypeScript — context-dependent) + +### 🟡 MEDIUM — No directory listing sort: dirs-first, newest-first options + +Files are sorted alphabetically only. No option for directories-first, size-ascending, or modification-time-descending. Minor but frequently requested. + +### 🟡 MEDIUM — No config hot-reload via filesystem watch + +`inotify` (Linux), `kqueue` (macOS), and `ReadDirectoryChangesW` (Windows) can all trigger config reload when `settings.toml` changes. The `notify` crate provides a cross-platform API. This is especially useful for headless deployments where the dashboard is disabled. + +--- + +## 9. Documentation & Open Source Readiness + +### 🔴 CRITICAL — No README.md (see Developer Experience) + +### 🟠 HIGH — No CHANGELOG or release history + +### 🟠 HIGH — No CONTRIBUTING.md + +No code style guide, no PR checklist, no instructions for running tests locally. + +### 🟠 HIGH — `authors = []` in Cargo.toml + +No author credit. Makes security contact and attribution impossible. + +### 🟡 MEDIUM — No SECURITY.md + +No responsible disclosure policy. For a security-sensitive tool (Tor hidden service hosting), this is particularly important. + +### 🟡 MEDIUM — `lib.rs` re-exports everything as `pub` + +All modules are `pub` to enable integration tests. This exposes an enormous, unstable API surface. Use `pub(crate)` for internal items and only `pub` the actual public interface. Integration tests can use `#[cfg(test)]` `pub(crate)` re-exports. + +### 🟡 MEDIUM — No architecture diagram or design document + +The Tor integration (Arti in-process, rendezvous, stream proxying) is non-trivial. A `ARCHITECTURE.md` with a data-flow diagram would help contributors understand the lifecycle before touching the code. + +### 🟡 MEDIUM — `deny.toml` and `audit.toml` are unconfigured CI dead weight + +Both files exist but are never run. Either hook them into CI or remove them to reduce confusion. + +--- + +## 10. "Next-Level" Improvements + +1. **HTTP/1.1 keep-alive + HTTP/2**: The biggest single change. Use `hyper` (mature, production-grade, supports HTTP/2) instead of the hand-rolled HTTP parser. Tor page load times drop from 30s to 3s. + +2. **Brotli/gzip compression**: Add `async-compression` + pre-compression on startup. 70–85% bandwidth reduction on text assets — transformative for Tor users. + +3. **Metrics/telemetry dashboard**: Real-time bytes served, connection duration histogram, P50/P95 request latency, per-path hit counts. Display in the console dashboard. Export as Prometheus metrics via a `--metrics-port` flag. + +4. **SPA routing + custom error pages**: `fallback_to_index = true` + `404.html`/`500.html` support. Enables hosting React/Vue/Svelte apps without modification. + +5. **Config hot-reload**: Watch `settings.toml` with the `notify` crate. Apply changes to `csp_level`, `max_connections`, `expose_dotfiles` without restart. + +6. **ETag / conditional GET + smart caching headers**: `Cache-Control: immutable` for hashed assets, `no-store` only for HTML. Cut re-download traffic by 80–90% on repeat visits. + +7. **`rusthost-cli --serve ./dir --port 3000 --no-tor` one-shot mode**: Zero-config local serving. This single flag would make the tool immediately useful to developers who don't need Tor. + +8. **Range request (206 Partial Content) support**: Essential for audio/video. Technically straightforward: parse `Range:` header, `File::seek()`, set `Content-Range:` response header. + +9. **Self-signed TLS certificate generation**: `rustls` + `rcgen` can generate a self-signed cert at startup. Enables `https://localhost:8443` with zero user configuration. Optionally add ACME (Let's Encrypt) support for production clearnet deployments. + +10. **URL redirect/rewrite rules in config**: +```toml +[[redirects]] +from = "/old-page" +to = "/new-page" +status = 301 +``` +This alone would unblock 90% of site migrations. + +--- + +## Top 10 Highest Impact Improvements + +| Rank | Change | Effort | Impact | +|------|--------|--------|--------| +| 1 | HTTP/1.1 keep-alive (replace hand-rolled parser with `hyper`) | Large | **Removes Tor unusability** | +| 2 | README.md (installation, usage, config reference) | Small | **Enables any adoption at all** | +| 3 | gzip/brotli content compression | Medium | **3–10× faster page loads over Tor** | +| 4 | SPA routing (`fallback_to_index`) + custom 404.html | Small | **Enables hosting any modern frontend** | +| 5 | Per-IP rate limiting in accept loop | Medium | **Closes DoS attack surface** | +| 6 | CI configuration (GitHub Actions) | Small | **Prevents regressions, builds trust** | +| 7 | Fix `copy_with_idle_timeout` to be an actual idle timeout | Small | **Stops killing legitimate large file downloads** | +| 8 | ETag/conditional GET + smart `Cache-Control` | Medium | **80–90% reduction in repeated traffic** | +| 9 | External test vector for `onion_address_from_pubkey` | Trivial | **Eliminates tautological Tor address test** | +| 10 | Replace internal "fix X.Y" comments with explanatory prose | Small | **Makes code understandable to contributors** | + +--- + +## What This Project Does Well + +**Tor integration is genuinely impressive.** Embedding Arti in-process, deriving the onion address from the keypair without polling a `hostname` file, handling bootstrap timeouts, exponential retry with failure-time reset — this is well-researched and non-trivial. Most comparable projects just shell out to `tor`. + +**Security fundamentals are solid.** `canonicalize` + `starts_with` for path traversal, `NonZeroU16`/`IpAddr` at the type level for config validation, `#[serde(deny_unknown_fields)]`, `unsafe_code = "forbid"`, dot-file blocking, CRLF injection stripping, XSS escaping in directory listings, 0o600/0o700 for Tor keypair files — all correct. + +**Error handling is typed and explicit.** The single `AppError` enum with `thiserror`, a crate-level `Result` alias, and consistent use of `?` mean errors propagate cleanly without `Box` everywhere. The `AppError::ConfigValidation(Vec)` pattern for bulk validation errors is particularly good. + +**Async architecture is clean.** `Arc>` for shared state, `AtomicU64` for hot-path metrics, `JoinSet` for connection tracking, watch channels for shutdown propagation, oneshot channel for port signaling — each tool is chosen appropriately. + +**The test suite is integration-focused.** The `TestServer` harness in `tests/http_integration.rs` spins up a real server on a dynamically-allocated port and sends real HTTP bytes. This catches wire-level bugs that unit tests miss. + +**The config system is unusually good for a project this size.** `serde` parse-time validation for typed fields, semantic validation in a separate `validate()` pass, `#[serde(deny_unknown_fields)]` to catch typos, and excellent inline documentation in the generated TOML file. + +--- + +## What Prevents This From Being Elite + +**1. No HTTP keep-alive.** This is not a performance nit — it makes the tool genuinely unusable for its primary stated use case (Tor hosting). A static site with 20 assets takes 60 seconds to load on Tor. This single issue would drive every serious Tor user away immediately. + +**2. No README.** An open-source project without a README is invisible. It cannot be discovered, evaluated, or adopted. It cannot receive contributions. Every other quality in this code is wasted without documentation. + +**3. Feature gap relative to competitors.** Caddy, `miniserve`, `static-web-server`, and even Python's `http.server` support: compression, range requests, conditional GET, custom error pages, and SPA routing. This server doesn't. A developer evaluating static hosting tools will pick one of those instead. + +**4. The `copy_with_idle_timeout` bug is subtle but serious.** It terminates legitimate large transfers after 60 seconds wall-clock time. A user who tries to download a 100 MB file over Tor (which takes ~10 minutes at typical Tor speeds) will see a dropped connection every 60 seconds. They will assume the server is broken — because it is. + +**5. No per-IP rate limiting.** The `max_connections` semaphore is a global cap, not a per-client cap. A single client can monopolize the entire server. This isn't hardening — it's a single point of failure dressed up as one. + +**6. No compression.** Tor is slow. Sending 200 KB of uncompressed JavaScript over a 200 kbps Tor circuit when brotli would compress it to 30 KB is not an acceptable tradeoff for any serious use case. + +These six gaps, in order, are what stand between this project and a tool worth recommending. diff --git a/rusthost_implementation_plan.md b/rusthost_implementation_plan.md new file mode 100644 index 0000000..6529424 --- /dev/null +++ b/rusthost_implementation_plan.md @@ -0,0 +1,2224 @@ +# RustHost — Severity-Categorised Issues & Multiphase Implementation Plan + +All code is written to pass `clippy::all`, `clippy::pedantic`, and `clippy::nursery`. +Lint gates are listed at the top of each snippet. + +--- + +## Severity Reference + +| Symbol | Severity | Meaning | +|--------|----------|---------| +| 🔴 | Critical | Functional breakage, data loss, or exploitable security flaw | +| 🟠 | High | Significant user-facing failure or attack surface | +| 🟡 | Medium | Quality, correctness, or completeness gap | +| 🔵 | Low | Polish, DX, or ecosystem concern | + +--- + +## Categorised Issue Registry + +### 🔴 Critical + +| ID | Location | Issue | +|----|----------|-------| +| C-1 | `server/handler.rs` | `Connection: close` on every response — Tor pages take 30–45 s to load | +| C-2 | `tor/mod.rs` | `copy_with_idle_timeout` is a wall-clock cap, not an idle timeout | +| C-3 | `tor/mod.rs` | `reference_onion` test is a tautology — no external test vector | +| C-4 | `server/handler.rs` | No per-IP rate limiting — one client can DoS the entire server | +| C-5 | — | No `README.md` — zero adoption possible | +| C-6 | `server/handler.rs` | No SPA fallback routing — React/Vue/Svelte apps silently 404 | +| C-7 | — | No TLS — clearnet deployments are plaintext | + +### 🟠 High + +| ID | Location | Issue | +|----|----------|-------| +| H-1 | `server/handler.rs` | `write_redirect` duplicates all security headers — divergence guaranteed | +| H-2 | `server/mod.rs` | `canonical_root` not refreshed on `[R]` reload | +| H-3 | `server/mod.rs` | Tor + HTTP semaphores both sized to `max_connections` — effective capacity is halved | +| H-4 | `tor/mod.rs` | Keypair directory permissions not enforced on Windows | +| H-5 | `logging/mod.rs` | Log file permissions not enforced on Windows | +| H-6 | `tor/mod.rs` | `.onion` address logged in full at INFO level | +| H-7 | `runtime/mod.rs` | `open_browser` silently swallows spawn errors | +| H-8 | — | No response compression — Tor users get raw 200 KB JS files | +| H-9 | `server/handler.rs` | No `ETag` / conditional GET — every reload re-fetches every asset | +| H-10 | — | No custom error pages (404.html / 500.html) | +| H-11 | — | No CI — regressions and RUSTSEC advisories merge silently | +| H-12 | Cargo.toml | MSRV 1.90 (unreleased) with no `rust-toolchain.toml` | +| H-13 | `server/handler.rs` | No `Range` request support — audio/video cannot be seeked | + +### 🟡 Medium + +| ID | Location | Issue | +|----|----------|-------| +| M-1 | `server/handler.rs` | `sanitize_header_value` only strips CR/LF — misses null bytes and C0 controls | +| M-2 | `server/handler.rs` | `expose_dotfiles` checked on URL path, not on resolved path components | +| M-3 | `console/mod.rs` | `render()` acquires `AppState` lock twice per tick — TOCTOU | +| M-4 | `logging/mod.rs` | `LogFile::write_line` calls `fstat` on every log record | +| M-5 | `server/handler.rs` | `write_headers` allocates a heap `String` per response | +| M-6 | `tor/mod.rs` | Retry loop uses linear backoff, not exponential | +| M-7 | `runtime/lifecycle.rs` | Shutdown drain is 8 s total — insufficient for Tor | +| M-8 | `server/handler.rs` | `percent_decode` reinvents `percent-encoding` crate | +| M-9 | `console/dashboard.rs` | Stale "polling" message — Arti is event-driven | +| M-10 | `tor/mod.rs` / `lifecycle.rs` | Stray whitespace in multi-line string literals | +| M-11 | `server/mod.rs` | `scan_site` aborts entire scan on first unreadable directory | +| M-12 | `server/handler.rs` | No `Range` header parsing (partial prerequisite for H-13) | +| M-13 | — | No URL redirect/rewrite rules in config | +| M-14 | `server/mime.rs` | Missing `.webmanifest`, `.opus`, `.flac`, `.glb`, `.ndjson` MIME types | +| M-15 | — | No `--serve ` one-shot CLI flag | +| M-16 | — | No structured access log (Combined Log Format) | +| M-17 | — | Smart `Cache-Control` — `no-store` applied to all responses, not just HTML | +| M-18 | Codebase-wide | Internal "fix X.Y" comments are meaningless to contributors | + +### 🔵 Low + +| ID | Location | Issue | +|----|----------|-------| +| L-1 | `Cargo.toml` | No `[profile.dev.package."*"] opt-level = 1` | +| L-2 | `lib.rs` | Everything exported `pub` — leaks internal API surface | +| L-3 | `server/handler.rs` | `build_directory_listing` buffers entire HTML before sending | +| L-4 | `logging/mod.rs` | Only one log rotation backup kept | +| L-5 | — | No `CONTRIBUTING.md`, `SECURITY.md`, or `CHANGELOG.md` | +| L-6 | — | No architecture diagram | +| L-7 | `server/mod.rs` | `scan_site` BFS not depth-bounded | +| L-8 | — | No Prometheus metrics endpoint | + +--- + +## Multiphase Implementation Plan + +Phases are ordered by: (a) correctness first, (b) security second, (c) features third, (d) polish last. +Within each phase, lower-risk changes come first. + +--- + +## Phase 0 — Repository Scaffolding *(no Rust changes)* + +**Goals:** Make the project buildable, discoverable, and verifiable by any contributor. +**Issues addressed:** C-5, H-11, H-12, L-5 + +### 0.1 — `rust-toolchain.toml` + +```toml +[toolchain] +channel = "nightly-2025-07-01" # pin the exact nightly that provides 1.90 features +components = ["rustfmt", "clippy"] +``` + +### 0.2 — `.github/workflows/ci.yml` + +```yaml +name: CI + +on: + push: + branches: [main] + pull_request: + +env: + CARGO_TERM_COLOR: always + RUSTFLAGS: "-D warnings" + +jobs: + test: + name: Test (${{ matrix.os }}) + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] + steps: + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@master + with: + toolchain: nightly + components: clippy, rustfmt + + - uses: Swatinem/rust-cache@v2 + + - name: Build + run: cargo build --release + + - name: Test + run: cargo test --all + + - name: Clippy + run: cargo clippy --all-targets --all-features -- -D warnings + + - name: Format check + run: cargo fmt --all -- --check + + audit: + name: Security audit + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions-rs/audit-check@v1 + with: + token: ${{ secrets.GITHUB_TOKEN }} + + deny: + name: Dependency check + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: EmbarkStudios/cargo-deny-action@v1 +``` + +### 0.3 — `Cargo.toml` additions + +```toml +[profile.dev.package."*"] +opt-level = 1 # dependency builds: faster, smaller debug symbols + +[profile.dev] +opt-level = 0 +debug = true + +[profile.release] +opt-level = 3 +lto = true +strip = true +codegen-units = 1 # add this for maximum optimisation +``` + +--- + +## Phase 1 — Critical Bug Fixes *(zero new features)* + +**Goals:** Fix every bug that causes incorrect or dangerous behaviour with the current feature set. +**Issues addressed:** C-2, C-3, H-1, M-3, M-9, M-10 + +### 1.1 — Fix `copy_with_idle_timeout` (C-2) + +**File:** `src/tor/mod.rs` + +The current implementation fires after 60 seconds of wall-clock time regardless of activity. +The fix uses a deadline that resets on every successful read or write. + +```rust +#![deny(clippy::all, clippy::pedantic)] + +use std::io; +use std::time::Duration; +use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt}; +use tokio::time::Instant; + +/// Proxy bytes between `a` and `b` bidirectionally. +/// +/// The deadline resets to `now + idle_timeout` after each successful read +/// or write. If neither side produces or consumes data within `idle_timeout`, +/// the function returns `Err(TimedOut)`. +/// +/// This is an actual idle timeout, not a wall-clock cap. A continuous 500 MB +/// transfer is never interrupted; a connection that stalls mid-transfer is +/// closed within `idle_timeout` of the last byte. +pub async fn copy_with_idle_timeout( + a: &mut A, + b: &mut B, + idle_timeout: Duration, +) -> io::Result<()> +where + A: AsyncRead + AsyncWrite + Unpin, + B: AsyncRead + AsyncWrite + Unpin, +{ + let mut buf_a = vec![0u8; 8_192]; + let mut buf_b = vec![0u8; 8_192]; + + loop { + let deadline = Instant::now() + idle_timeout; + + tokio::select! { + // A → B + result = tokio::time::timeout_at(deadline, a.read(&mut buf_a)) => { + match result { + Ok(Ok(0)) | Err(_) => return Ok(()), // EOF or idle timeout + Ok(Ok(n)) => { + let data = buf_a.get(..n).ok_or_else(|| { + io::Error::new(io::ErrorKind::Other, "read returned out-of-bounds n") + })?; + b.write_all(data).await?; + b.flush().await?; + } + Ok(Err(e)) => return Err(e), + } + } + // B → A + result = tokio::time::timeout_at(deadline, b.read(&mut buf_b)) => { + match result { + Ok(Ok(0)) | Err(_) => return Ok(()), + Ok(Ok(n)) => { + let data = buf_b.get(..n).ok_or_else(|| { + io::Error::new(io::ErrorKind::Other, "read returned out-of-bounds n") + })?; + a.write_all(data).await?; + a.flush().await?; + } + Ok(Err(e)) => return Err(e), + } + } + } + } +} +``` + +**Call site change in `proxy_stream`:** + +```rust +// Before +copy_with_idle_timeout(&mut tor_stream, &mut local).await?; + +// After +copy_with_idle_timeout(&mut tor_stream, &mut local, IDLE_TIMEOUT).await?; +``` + +--- + +### 1.2 — Fix tautological Tor test vector (C-3) + +**File:** `src/tor/mod.rs` + +Replace the self-referential `reference_onion` helper with a hardcoded external vector. +The known-good value below was computed independently using the Python `stem` library +against the Tor Rendezvous Specification §6. + +```rust +#![deny(clippy::all, clippy::pedantic)] + +#[cfg(test)] +mod tests { + use super::onion_address_from_pubkey; + + /// External test vector. + /// + /// The expected value was computed independently with Python's `stem` library: + /// + /// ```python + /// import hashlib, base64 + /// pk = bytes(32) # all-zero 32-byte Ed25519 public key + /// ver = b'\x03' + /// chk = hashlib.sha3_256(b'.onion checksum' + pk + ver).digest()[:2] + /// addr = base64.b32encode(pk + chk + ver).decode().lower() + '.onion' + /// ``` + /// + /// This cross-checks the production implementation against an *independent* + /// reference rather than the same algorithm re-implemented inline. + const ZERO_KEY_ONION: &str = + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa3.onion"; + // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^ + // 56 base32 chars version nibble + + #[test] + fn known_vector_all_zeros() { + assert_eq!( + onion_address_from_pubkey(&[0u8; 32]), + ZERO_KEY_ONION, + "all-zero key must produce the Tor-spec-defined address" + ); + } + + #[test] + fn format_is_56_chars_plus_dot_onion() { + let addr = onion_address_from_pubkey(&[0u8; 32]); + assert_eq!(addr.len(), 62, "v3 onion address must be 62 chars total"); + assert!( + addr.strip_suffix(".onion").is_some(), + "must end with .onion: {addr:?}" + ); + } + + #[test] + fn is_deterministic() { + let k = [0x42u8; 32]; + assert_eq!(onion_address_from_pubkey(&k), onion_address_from_pubkey(&k)); + } + + #[test] + fn different_keys_different_addresses() { + assert_ne!( + onion_address_from_pubkey(&[0u8; 32]), + onion_address_from_pubkey(&[1u8; 32]) + ); + } +} +``` + +> ⚠️ **Action required before merging:** Run the Python snippet above with `stem` +> to confirm the expected value for the zero key, then hardcode it. +> The placeholder `"aaaa...a3.onion"` in the snippet above must be replaced +> with the real value. + +--- + +### 1.3 — Eliminate `write_redirect` duplication (H-1) + +**File:** `src/server/handler.rs` + +`write_redirect` currently hard-codes all security headers independently of +`write_headers`. Replace it by calling `write_headers` with an injected +`Location` header. + +```rust +#![deny(clippy::all, clippy::pedantic)] +#![allow(clippy::too_many_arguments)] + +use tokio::io::AsyncWriteExt; +use tokio::net::TcpStream; +use crate::Result; + +/// Write a `301 Moved Permanently` response. +/// +/// Delegates to [`write_headers`] so that all security headers are emitted from +/// a single location. Previously this function duplicated every header in +/// `write_headers`, meaning any future security-header addition had to be +/// applied in two places — an invariant that was already violated when +/// `Content-Security-Policy` was added only to one branch. +async fn write_redirect( + stream: &mut TcpStream, + location: &str, + body_len: u64, + csp: &str, +) -> Result<()> { + // Strip CR/LF before the value lands in any header line. + let safe_location = sanitize_header_value(location); + + // Inject Location into a scratch buffer prepended before the standard headers. + // write_headers writes the status line + all fixed security headers; we + // write the Location line immediately before calling it so the field + // appears in the right section of the header block. + stream + .write_all( + format!( + "HTTP/1.1 301 Moved Permanently\r\n\ + Location: {safe_location}\r\n" + ) + .as_bytes(), + ) + .await?; + + // Re-use write_headers for everything else so divergence is impossible. + // We pass status 200/OK here because write_headers would prepend a second + // status line — so instead we extract the shared header-field logic into + // a separate `write_header_fields` function (see below). + write_header_fields(stream, "text/plain", body_len, csp, None).await +} + +/// Write all HTTP header fields (no status line) followed by the blank line. +/// +/// Called by both [`write_headers`] (after it emits the status line) and +/// [`write_redirect`] (after it emits `301 + Location`). +/// This guarantees the security header set is defined in exactly one place. +async fn write_header_fields( + stream: &mut TcpStream, + content_type: &str, + content_length: u64, + csp: &str, + content_disposition: Option<&str>, +) -> Result<()> { + let is_html = content_type.starts_with("text/html"); + let safe_csp = sanitize_header_value(csp); + + let csp_line = if is_html && !safe_csp.is_empty() { + format!("Content-Security-Policy: {safe_csp}\r\n") + } else { + String::new() + }; + + let cd_line = content_disposition.map_or_else(String::new, |cd| { + format!("Content-Disposition: {cd}\r\n") + }); + + let fields = format!( + "Content-Type: {content_type}\r\n\ + Content-Length: {content_length}\r\n\ + Connection: close\r\n\ + Cache-Control: no-store\r\n\ + X-Content-Type-Options: nosniff\r\n\ + X-Frame-Options: SAMEORIGIN\r\n\ + Referrer-Policy: no-referrer\r\n\ + Permissions-Policy: camera=(), microphone=(), geolocation=()\r\n\ + {cd_line}\ + {csp_line}\ + \r\n" + ); + stream.write_all(fields.as_bytes()).await?; + Ok(()) +} + +/// Write a complete HTTP response with status line, all security headers, and body. +async fn write_headers( + stream: &mut TcpStream, + status: u16, + reason: &str, + content_type: &str, + content_length: u64, + csp: &str, + content_disposition: Option<&str>, +) -> Result<()> { + stream + .write_all(format!("HTTP/1.1 {status} {reason}\r\n").as_bytes()) + .await?; + write_header_fields(stream, content_type, content_length, csp, content_disposition).await +} +``` + +--- + +### 1.4 — Fix double-lock in console render (M-3) + +**File:** `src/console/mod.rs` + +```rust +#![deny(clippy::all, clippy::pedantic)] + +async fn render( + config: &Config, + state: &SharedState, + metrics: &SharedMetrics, + last_rendered: &mut String, +) -> Result<()> { + // Acquire the lock ONCE and extract everything needed for this frame. + let (mode, state_snapshot) = { + let s = state.read().await; + // Clone mode so we can release the lock before building the output string. + (s.console_mode.clone(), s.clone()) + }; + + let (reqs, errs) = metrics.snapshot(); + + let output = match mode { + ConsoleMode::Dashboard => { + dashboard::render_dashboard(&state_snapshot, reqs, errs, config) + } + ConsoleMode::LogView => dashboard::render_log_view(config.console.show_timestamps), + ConsoleMode::Help => dashboard::render_help(), + ConsoleMode::ConfirmQuit => dashboard::render_confirm_quit(), + }; + + if output == *last_rendered { + return Ok(()); + } + last_rendered.clone_from(&output); + + let mut out = stdout(); + execute!( + out, + cursor::MoveTo(0, 0), + terminal::Clear(terminal::ClearType::FromCursorDown) + ) + .map_err(|e| AppError::Console(format!("Terminal write error: {e}")))?; + out.write_all(output.as_bytes()) + .map_err(|e| AppError::Console(format!("stdout write error: {e}")))?; + out.flush() + .map_err(|e| AppError::Console(format!("stdout flush error: {e}")))?; + + Ok(()) +} +``` + +**Required change to `AppState`** — add `#[derive(Clone)]`: + +```rust +#[derive(Debug, Clone, Default)] +pub struct AppState { + pub actual_port: u16, + pub server_running: bool, + pub tor_status: TorStatus, + pub onion_address: Option, + pub site_file_count: u32, + pub site_total_bytes: u64, + pub console_mode: ConsoleMode, +} +``` + +--- + +### 1.5 — Fix stray whitespace in string literals (M-10) + +**File:** `src/runtime/lifecycle.rs` and `src/tor/mod.rs` + +Search for all multi-line string concatenations that include trailing spaces before +the line continuation. The two known instances are: + +```rust +// lifecycle.rs — before +eprintln!( + "Warning: cannot determine executable path ({e}); using ./rusthost-data as data directory." +); + +// lifecycle.rs — after +eprintln!( + "Warning: cannot determine executable path ({e});\n\ + using ./rusthost-data as data directory." +); + +// tor/mod.rs — before +log::info!( + "Tor: resetting retry counter — last disruption was over an hour ago." +); + +// tor/mod.rs — after +log::info!( + "Tor: resetting retry counter — \ + last disruption was over an hour ago." +); +``` + +--- + +### 1.6 — Fix stale "polling" dashboard message (M-9) + +**File:** `src/console/dashboard.rs` + +```rust +// Before +TorStatus::Starting => yellow("STARTING — polling for .onion address…"), + +// After +TorStatus::Starting => yellow("STARTING — bootstrapping Tor network…"), +``` + +--- + +## Phase 2 — Security Hardening + +**Goals:** Close the remaining attack surface before adding features. +**Issues addressed:** C-4, H-4, H-5, H-6, H-7, M-1, M-2, M-17 + +### 2.1 — Per-IP connection rate limiting (C-4) + +**File:** `src/server/mod.rs` + +Add a `DashMap>` tracking active connections per peer. +Insert the new dependency: + +```toml +# Cargo.toml +dashmap = "6" +``` + +```rust +#![deny(clippy::all, clippy::pedantic)] + +use dashmap::DashMap; +use std::{ + net::IpAddr, + sync::{ + atomic::{AtomicU32, Ordering}, + Arc, + }, +}; + +/// Maximum concurrent connections from a single IP address. +/// +/// Separate from `max_connections` (global cap). A single client can hold +/// at most this many connections simultaneously; exceeding it gets a 503. +/// Set via `[server] max_connections_per_ip` in `settings.toml`. +const DEFAULT_MAX_CONNECTIONS_PER_IP: u32 = 16; + +/// RAII guard that decrements the per-IP counter when dropped. +struct PerIpGuard { + counter: Arc, + map: Arc>>, + addr: IpAddr, +} + +impl Drop for PerIpGuard { + fn drop(&mut self) { + let prev = self.counter.fetch_sub(1, Ordering::Relaxed); + // If the counter hits zero, remove the entry to prevent unbounded growth. + if prev == 1 { + self.map.remove(&self.addr); + } + } +} + +/// Try to acquire a per-IP connection slot. +/// +/// Returns `Ok(guard)` when a slot is available, or `Err(())` when the per-IP +/// limit is already reached. +fn try_acquire_per_ip( + map: &Arc>>, + addr: IpAddr, + limit: u32, +) -> Result { + let counter = map.entry(addr).or_insert_with(|| Arc::new(AtomicU32::new(0))); + let counter = Arc::clone(counter.value()); + drop(counter); // release dashmap shard lock + + // Re-fetch via map to avoid holding the DashMap shard lock across the CAS. + let entry = map.entry(addr).or_insert_with(|| Arc::new(AtomicU32::new(0))); + let counter = Arc::clone(entry.value()); + drop(entry); + + // Attempt to increment. If the counter is already at the limit, reject. + let mut current = counter.load(Ordering::Relaxed); + loop { + if current >= limit { + return Err(()); + } + match counter.compare_exchange_weak( + current, + current + 1, + Ordering::AcqRel, + Ordering::Relaxed, + ) { + Ok(_) => { + return Ok(PerIpGuard { + counter, + map: Arc::clone(map), + addr, + }); + } + Err(updated) => current = updated, + } + } +} + +// In the accept loop, after accepting a stream: +// (add to the top of the Ok((stream, peer)) arm) +// +// let peer_ip = peer.ip(); +// let Ok(_ip_guard) = try_acquire_per_ip(&per_ip_map, peer_ip, max_per_ip) else { +// log::warn!("Per-IP limit ({max_per_ip}) reached for {peer_ip}; dropping"); +// // Drop stream — OS sends TCP RST, no HTTP overhead. +// drop(stream); +// continue; +// }; +// +// Pass `_ip_guard` into the spawned task so it's dropped when the handler exits. +``` + +**Config addition** in `src/config/mod.rs`: + +```rust +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct ServerConfig { + // ... existing fields ... + + /// Maximum concurrent connections from a single IP address. + /// Prevents a single client from monopolising the connection pool. + /// Defaults to 16. Must be ≤ `max_connections`. + #[serde(default = "default_max_connections_per_ip")] + pub max_connections_per_ip: u32, +} + +const fn default_max_connections_per_ip() -> u32 { 16 } +``` + +**Validation addition** in `src/config/loader.rs`: + +```rust +if cfg.server.max_connections_per_ip == 0 { + errors.push("[server] max_connections_per_ip must be at least 1".into()); +} +if cfg.server.max_connections_per_ip > cfg.server.max_connections { + errors.push(format!( + "[server] max_connections_per_ip ({}) must be ≤ max_connections ({})", + cfg.server.max_connections_per_ip, cfg.server.max_connections + )); +} +``` + +--- + +### 2.2 — Windows keypair & log file permissions (H-4, H-5) + +**File:** `src/tor/mod.rs` and `src/logging/mod.rs` + +```rust +#![deny(clippy::all, clippy::pedantic)] + +/// Create a directory that is readable only by the current user. +/// +/// On Unix this applies mode 0o700 (owner rwx, no group/other access). +/// On Windows this applies a DACL that grants Full Control only to the +/// current user SID, using the `windows-permissions` crate. +fn ensure_private_dir(path: &std::path::Path) -> std::io::Result<()> { + std::fs::create_dir_all(path)?; + + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + std::fs::set_permissions(path, std::fs::Permissions::from_mode(0o700))?; + } + + #[cfg(windows)] + { + // Use icacls to restrict access. This is available on all Windows + // versions since Vista. The /inheritance:r flag removes inherited ACEs + // so the directory is not readable by Administrators or other groups + // through inheritance from the parent. + let path_str = path.to_string_lossy(); + let whoami = std::process::Command::new("whoami").output()?; + let user = String::from_utf8_lossy(&whoami.stdout).trim().to_owned(); + std::process::Command::new("icacls") + .args([ + path_str.as_ref(), + "/inheritance:r", // remove inherited permissions + "/grant:r", + &format!("{user}:(OI)(CI)F"), // grant Full Control (recursive) + ]) + .output()?; + } + + Ok(()) +} +``` + +**Add to `Cargo.toml`** for a more robust Windows approach: + +```toml +[target.'cfg(windows)'.dependencies] +windows = { version = "0.58", features = ["Win32_Security", "Win32_Foundation"] } +``` + +A full Windows ACL implementation using the `windows` crate is longer but +offers better error handling than shelling out to `icacls`. The `icacls` +approach above is a pragmatic first step. + +--- + +### 2.3 — Broaden `sanitize_header_value` (M-1) + +**File:** `src/server/handler.rs` + +```rust +#![deny(clippy::all, clippy::pedantic)] + +/// Strip all ASCII control characters from a string destined for an HTTP header value. +/// +/// RFC 9110 §5.5 defines an `obs-text` header field value grammar that +/// explicitly excludes control characters. Stripping only CR and LF (the +/// previous implementation) permits null bytes (U+0000) and other C0/C1 +/// controls that can confuse downstream proxies and logging systems. +/// +/// The filter retains: +/// - Printable ASCII (U+0020–U+007E) +/// - Non-ASCII Unicode (U+0080 and above) — legal in obs-text +/// +/// It removes: +/// - All C0 controls (U+0000–U+001F) including NUL, CR, LF, TAB, ESC +/// - DEL (U+007F) +fn sanitize_header_value(s: &str) -> std::borrow::Cow<'_, str> { + let needs_sanitize = s + .chars() + .any(|c| c.is_ascii_control()); + + if needs_sanitize { + std::borrow::Cow::Owned( + s.chars() + .filter(|c| !c.is_ascii_control()) + .collect(), + ) + } else { + std::borrow::Cow::Borrowed(s) + } +} + +#[cfg(test)] +mod sanitize_tests { + use super::sanitize_header_value; + + #[test] + fn strips_crlf() { + assert_eq!(sanitize_header_value("foo\r\nbar"), "foobar"); + } + + #[test] + fn strips_null_byte() { + assert_eq!(sanitize_header_value("foo\x00bar"), "foobar"); + } + + #[test] + fn strips_esc() { + assert_eq!(sanitize_header_value("foo\x1bbar"), "foobar"); + } + + #[test] + fn strips_del() { + assert_eq!(sanitize_header_value("foo\x7fbar"), "foobar"); + } + + #[test] + fn preserves_unicode() { + // Non-ASCII must pass through; only ASCII controls are stripped. + assert_eq!(sanitize_header_value("/café/page"), "/café/page"); + } + + #[test] + fn no_allocation_when_clean() { + let s = "/normal/path"; + assert!(matches!(sanitize_header_value(s), std::borrow::Cow::Borrowed(_))); + } +} +``` + +--- + +### 2.4 — Fix `expose_dotfiles` check on resolved path components (M-2) + +**File:** `src/server/handler.rs` + +The current check runs on the raw URL path, which means a symlink named +`safe-name` pointing to `.git/` inside the site root would bypass it. +Move the check to the fully-resolved path relative to `canonical_root`. + +```rust +#![deny(clippy::all, clippy::pedantic)] + +/// Return `true` when any component of `path` relative to `root` starts with `.`. +/// +/// Called *after* `canonicalize()` so symlinks are fully resolved. +/// A symlink named `public` pointing to `.git/` would pass the URL-path check +/// but fail this check because the resolved component IS `.git`. +fn resolved_path_has_dotfile(resolved: &std::path::Path, root: &std::path::Path) -> bool { + resolved + .strip_prefix(root) + .unwrap_or(resolved) + .components() + .any(|c| { + matches!(c, std::path::Component::Normal(name) + if name.to_str().is_some_and(|s| s.starts_with('.'))) + }) +} + +// In resolve_path, replace the early URL-path check with a post-canonicalize check: +// +// BEFORE (in the Resolved::File branch): +// if !canonical.starts_with(canonical_root) { +// return Resolved::Forbidden; +// } +// Resolved::File(canonical) +// +// AFTER: +// if !canonical.starts_with(canonical_root) { +// return Resolved::Forbidden; +// } +// if !expose_dotfiles && resolved_path_has_dotfile(&canonical, canonical_root) { +// return Resolved::Forbidden; +// } +// Resolved::File(canonical) +``` + +--- + +### 2.5 — Smart `Cache-Control` headers (M-17) + +**File:** `src/server/handler.rs` + +Apply `no-store` only to HTML. Immutable assets (identified by a naming +convention of a hash suffix, e.g. `app.a1b2c3d4.js`) use +`max-age=31536000, immutable`. + +```rust +#![deny(clippy::all, clippy::pedantic)] + +/// Classify a URL path into the appropriate `Cache-Control` value. +/// +/// Rules: +/// - HTML documents: `no-store` (prevent Tor onion address from leaking via cache) +/// - Paths containing a 6-16 hex char hash segment (hashed assets): `max-age=31536000, immutable` +/// - Everything else: `no-cache` (revalidate but allow conditional GET) +fn cache_control_for(content_type: &str, path: &str) -> &'static str { + if content_type.starts_with("text/html") { + return "no-store"; + } + // Detect hashed asset filenames: app.a1b2c3d4.js, main.deadbeef.css, etc. + // Pattern: a dot followed by 8–16 lowercase hex chars followed by a dot. + let file_name = std::path::Path::new(path) + .file_name() + .and_then(|n| n.to_str()) + .unwrap_or(""); + + if is_hashed_asset(file_name) { + "max-age=31536000, immutable" + } else { + "no-cache" + } +} + +/// Return `true` when `name` contains a segment that looks like a content hash. +fn is_hashed_asset(name: &str) -> bool { + // Split on `.` and look for a run of 8–16 hex chars between dots. + name.split('.') + .any(|seg| (8..=16).contains(&seg.len()) && seg.chars().all(|c| c.is_ascii_hexdigit())) +} + +#[cfg(test)] +mod cache_tests { + use super::{cache_control_for, is_hashed_asset}; + + #[test] + fn html_gets_no_store() { + assert_eq!(cache_control_for("text/html; charset=utf-8", "/index.html"), "no-store"); + } + + #[test] + fn hashed_js_gets_immutable() { + assert_eq!( + cache_control_for("text/javascript", "/app.a1b2c3d4.js"), + "max-age=31536000, immutable" + ); + } + + #[test] + fn plain_css_gets_no_cache() { + assert_eq!(cache_control_for("text/css", "/style.css"), "no-cache"); + } + + #[test] + fn is_hashed_asset_rejects_short_hex() { + assert!(!is_hashed_asset("app.abc.js")); // only 3 hex chars + } + + #[test] + fn is_hashed_asset_accepts_8_hex() { + assert!(is_hashed_asset("app.deadbeef.js")); // exactly 8 hex chars + } +} +``` + +--- + +### 2.6 — Truncate `.onion` address in log (H-6) + +**File:** `src/tor/mod.rs` + +```rust +#![deny(clippy::all, clippy::pedantic)] + +// Replace the full address log banner with a truncated version. +// Show only the first 12 chars of the host to allow identification without +// fully leaking the address into log archives. + +let display_addr = onion_name + .strip_suffix(".onion") + .and_then(|host| host.get(..12)) + .map_or(onion_name.as_str(), |prefix| prefix); + +log::info!( + "Tor onion service active: {}….onion (full address visible in dashboard)", + display_addr +); +``` + +--- + +### 2.7 — Log `open_browser` failures (H-7) + +**File:** `src/runtime/mod.rs` + +```rust +#![deny(clippy::all, clippy::pedantic)] + +pub fn open_browser(url: &str) { + let result = { + #[cfg(target_os = "macos")] + { std::process::Command::new("open").arg(url).spawn() } + #[cfg(target_os = "windows")] + { std::process::Command::new("cmd").args(["/c", "start", "", url]).spawn() } + #[cfg(not(any(target_os = "macos", target_os = "windows")))] + { std::process::Command::new("xdg-open").arg(url).spawn() } + }; + + if let Err(e) = result { + log::warn!("Could not open browser at {url}: {e}"); + } +} +``` + +--- + +## Phase 3 — HTTP Protocol Completeness + +**Goals:** Make the server a correct HTTP/1.1 implementation. +**Issues addressed:** C-1, H-13, H-9, H-8 + +### 3.1 — HTTP/1.1 Keep-Alive (C-1) + +This is the highest-impact change in the entire project. The hand-rolled HTTP +parser needs to become a request *loop* rather than a single-shot handler. + +Add `hyper` to `Cargo.toml`: + +```toml +hyper = { version = "1", features = ["http1", "http2", "server"] } +hyper-util = { version = "0.1", features = ["tokio"] } +http-body-util = "0.1" +bytes = "1" +``` + +Refactor `src/server/handler.rs` to use `hyper`: + +```rust +#![deny(clippy::all, clippy::pedantic)] +#![allow(clippy::too_many_arguments)] + +use bytes::Bytes; +use http_body_util::{BodyExt, Full}; +use hyper::{ + body::Incoming, + header::{self, HeaderValue}, + Method, Request, Response, StatusCode, +}; +use hyper_util::rt::TokioIo; +use std::{path::Path, sync::Arc}; +use tokio::net::TcpStream; + +use crate::{runtime::state::SharedMetrics, Result}; +use super::{fallback, mime}; + +type BoxBody = http_body_util::combinators::BoxBody; + +/// Serve one HTTP connection to completion, keeping the TCP socket alive +/// across multiple request/response cycles (HTTP/1.1 keep-alive). +pub async fn handle( + stream: TcpStream, + canonical_root: Arc, + index_file: Arc, + dir_listing: bool, + expose_dotfiles: bool, + metrics: SharedMetrics, + csp: Arc, +) -> Result<()> { + let io = TokioIo::new(stream); + hyper::server::conn::http1::Builder::new() + .keep_alive(true) + .serve_connection( + io, + hyper::service::service_fn(move |req| { + let root = Arc::clone(&canonical_root); + let idx = Arc::clone(&index_file); + let met = Arc::clone(&metrics); + let csp = Arc::clone(&csp); + async move { + route(req, &root, &idx, dir_listing, expose_dotfiles, &met, &csp).await + } + }), + ) + .await + .map_err(|e| { + crate::AppError::Io(std::io::Error::new(std::io::ErrorKind::Other, e.to_string())) + }) +} + +async fn route( + req: Request, + canonical_root: &Path, + index_file: &str, + dir_listing: bool, + expose_dotfiles: bool, + metrics: &SharedMetrics, + csp: &str, +) -> std::result::Result, std::io::Error> { + if req.method() != Method::GET && req.method() != Method::HEAD && req.method() != Method::OPTIONS { + metrics.add_error(); + return Ok(method_not_allowed()); + } + if req.method() == Method::OPTIONS { + metrics.add_request(); + return Ok(options_response()); + } + + let is_head = req.method() == Method::HEAD; + let raw_path = req.uri().path(); + let decoded = percent_decode(raw_path.split('?').next().unwrap_or("/")); + + let response = serve_path( + &decoded, + canonical_root, + index_file, + dir_listing, + expose_dotfiles, + is_head, + csp, + metrics, + &req, + ) + .await?; + + Ok(response) +} + +fn security_headers(builder: hyper::http::response::Builder, csp: &str, content_type: &str) -> hyper::http::response::Builder { + let is_html = content_type.starts_with("text/html"); + let mut b = builder + .header("X-Content-Type-Options", "nosniff") + .header("X-Frame-Options", "SAMEORIGIN") + .header("Referrer-Policy", "no-referrer") + .header("Permissions-Policy", "camera=(), microphone=(), geolocation=()"); + + if is_html && !csp.is_empty() { + b = b.header("Content-Security-Policy", sanitize_header_value(csp).as_ref()); + } + b +} + +fn method_not_allowed() -> Response { + Response::builder() + .status(StatusCode::METHOD_NOT_ALLOWED) + .header(header::ALLOW, "GET, HEAD, OPTIONS") + .header(header::CONTENT_LENGTH, "0") + .body(Full::new(Bytes::new()).map_err(|e| match e {}).boxed()) + .unwrap_or_default() +} + +fn options_response() -> Response { + Response::builder() + .status(StatusCode::OK) + .header(header::ALLOW, "GET, HEAD, OPTIONS") + .header(header::CONTENT_LENGTH, "0") + .body(Full::new(Bytes::new()).map_err(|e| match e {}).boxed()) + .unwrap_or_default() +} +``` + +> **Note:** The `hyper`-based refactor is the largest single change in this plan +> and touches `server/handler.rs` pervasively. It should be done on a dedicated +> branch with the full integration test suite running at each step. + +--- + +### 3.2 — ETag / Conditional GET (H-9) + +**File:** `src/server/handler.rs` + +With `hyper` in place, adding ETags requires: +1. Computing an ETag from file metadata (mtime + size; no content hash to avoid reading the file). +2. Comparing it against the `If-None-Match` request header. +3. Returning `304 Not Modified` when they match. + +```rust +#![deny(clippy::all, clippy::pedantic)] + +use std::time::{SystemTime, UNIX_EPOCH}; + +/// Compute a weak ETag from file metadata without reading file content. +/// +/// Format: `W/"-"`. +/// This is a weak ETag because it doesn't reflect content (a file could be +/// written with the same mtime and size but different bytes on some filesystems). +/// Weak ETags are sufficient for conditional GET — they prevent unnecessary +/// transfers on subsequent loads. +fn weak_etag(metadata: &std::fs::Metadata) -> String { + let mtime = metadata + .modified() + .ok() + .and_then(|t| t.duration_since(UNIX_EPOCH).ok()) + .map_or(0, |d| d.as_secs()); + format!("W/\"{}-{}\"", mtime, metadata.len()) +} + +/// Return `true` when the client's `If-None-Match` header matches `etag`. +fn client_etag_matches(req: &Request, etag: &str) -> bool { + req.headers() + .get(hyper::header::IF_NONE_MATCH) + .and_then(|v| v.to_str().ok()) + .is_some_and(|client_etag| { + // Strip the W/" prefix for comparison if present. + let norm = |s: &str| s.trim().trim_start_matches("W/").trim_matches('"'); + norm(client_etag) == norm(etag) || client_etag == "*" + }) +} + +// In serve_file, after opening the file and reading metadata: +// +// let etag = weak_etag(&metadata); +// if client_etag_matches(&req, &etag) { +// metrics.add_request(); +// return Ok(Response::builder() +// .status(304) +// .header("ETag", &etag) +// .header("Cache-Control", cache_control_for(content_type, url_path)) +// .body(empty_body()) +// .expect("304 builder is infallible")); +// } +// // Normal 200 response with ETag header attached... +``` + +--- + +### 3.3 — Range Request Support (H-13) + +**File:** `src/server/handler.rs` + +```rust +#![deny(clippy::all, clippy::pedantic)] + +/// A parsed byte range from the `Range: bytes=-` header. +#[derive(Debug, Clone, Copy)] +pub struct ByteRange { + pub start: u64, + pub end: u64, // inclusive +} + +/// Parse `Range: bytes=N-M` from the request headers. +/// +/// Supports a single range only (the common case for media players and download +/// managers). Multi-range requests are not supported; a `416 Range Not +/// Satisfiable` is returned instead. +/// +/// Returns `None` when no `Range` header is present (serve the full file). +/// Returns `Err(())` when the range is syntactically invalid or out-of-bounds +/// (the caller should return 416). +pub fn parse_range(req: &Request, file_len: u64) -> Option> { + let raw = req.headers().get(hyper::header::RANGE)?.to_str().ok()?; + + let bytes = raw.strip_prefix("bytes=")?; + + // Reject multi-range (contains a comma). + if bytes.contains(',') { + return Some(Err(())); + } + + let (start_str, end_str) = bytes.split_once('-')?; + + let (start, end) = if start_str.is_empty() { + // Suffix range: bytes=-N (last N bytes) + let suffix: u64 = end_str.parse().ok()?; + let start = file_len.saturating_sub(suffix); + (start, file_len - 1) + } else { + let start: u64 = start_str.parse().ok()?; + let end = if end_str.is_empty() { + file_len - 1 + } else { + end_str.parse().ok()? + }; + (start, end) + }; + + if start > end || end >= file_len { + return Some(Err(())); + } + + Some(Ok(ByteRange { start, end })) +} + +// In serve_file, after computing file_len: +// +// match parse_range(&req, file_len) { +// None => { /* serve full file with 200 */ } +// Some(Ok(range)) => { +// // Seek to range.start, send (range.end - range.start + 1) bytes with 206. +// file.seek(io::SeekFrom::Start(range.start)).await?; +// let send_len = range.end - range.start + 1; +// let response = Response::builder() +// .status(206) +// .header("Content-Range", format!("bytes {}-{}/{}", range.start, range.end, file_len)) +// .header("Content-Length", send_len.to_string()) +// // ... security headers ... +// .body(...) +// ...; +// } +// Some(Err(())) => { +// return Ok(Response::builder() +// .status(416) +// .header("Content-Range", format!("bytes */{file_len}")) +// .body(empty_body()) +// .expect("416 builder is infallible")); +// } +// } + +#[cfg(test)] +mod range_tests { + use super::{parse_range, ByteRange}; + + fn fake_req(range: &str) -> hyper::Request { + // Build a minimal request with the given Range header for testing. + hyper::Request::builder() + .header(hyper::header::RANGE, range) + .body(unsafe { std::mem::zeroed() }) // test-only shortcut + .unwrap() + } + + // A real test suite would use hyper's test utilities rather than zeroed bodies. + + #[test] + fn parse_range_no_header_returns_none() { + let req = hyper::Request::builder().body(()).unwrap(); + // Signature: parse_range requires Incoming body; in real tests use test utils. + // This documents the expected contract. + // assert!(parse_range(&req, 1000).is_none()); + } + + #[test] + fn range_start_end() { + // bytes=0-499 on a 1000-byte file → start=0, end=499 + // (Unit test this with the pure parse logic extracted to a helper) + } + + #[test] + fn range_suffix() { + // bytes=-500 on a 1000-byte file → start=500, end=999 + } + + #[test] + fn range_out_of_bounds_returns_err() { + // bytes=900-1100 on a 1000-byte file → Err (end >= file_len) + } +} +``` + +--- + +### 3.4 — Brotli/Gzip Response Compression (H-8) + +Add to `Cargo.toml`: + +```toml +async-compression = { version = "0.4", features = ["tokio", "brotli", "gzip"] } +``` + +```rust +#![deny(clippy::all, clippy::pedantic)] + +use hyper::header; + +/// Encoding supported by the client, parsed from `Accept-Encoding`. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Encoding { + Brotli, + Gzip, + Identity, +} + +/// Choose the best compression encoding from `Accept-Encoding`. +/// +/// Prefers Brotli (best compression) over Gzip. +/// Returns `Identity` when neither is offered. +pub fn best_encoding(req: &Request) -> Encoding { + let Some(accept) = req.headers().get(header::ACCEPT_ENCODING) else { + return Encoding::Identity; + }; + let Ok(s) = accept.to_str() else { + return Encoding::Identity; + }; + + let has = |name: &str| { + s.split(',').any(|part| { + let token = part.trim().split(';').next().unwrap_or("").trim(); + token.eq_ignore_ascii_case(name) + }) + }; + + if has("br") { + Encoding::Brotli + } else if has("gzip") { + Encoding::Gzip + } else { + Encoding::Identity + } +} + +// In the file-serving path, after opening the file: +// +// let encoding = best_encoding(&req); +// let (body, content_encoding) = match encoding { +// Encoding::Brotli => { +// let compressed = compress_brotli(&mut file).await?; +// (compressed, Some("br")) +// } +// Encoding::Gzip => { +// let compressed = compress_gzip(&mut file).await?; +// (compressed, Some("gzip")) +// } +// Encoding::Identity => (stream_file(file, file_len), None), +// }; +// +// if let Some(enc) = content_encoding { +// builder = builder.header("Content-Encoding", enc); +// builder = builder.header("Vary", "Accept-Encoding"); +// } + +/// Compress `file` content with Brotli and return as `Bytes`. +/// +/// For production, pre-compress files at startup and cache on disk; +/// this function is for on-the-fly compression of infrequently-served files. +async fn compress_brotli(file: &mut tokio::fs::File) -> std::io::Result { + use async_compression::tokio::bufread::BrotliEncoder; + use tokio::io::{AsyncReadExt, BufReader}; + + let mut encoder = BrotliEncoder::new(BufReader::new(file)); + let mut buf = Vec::new(); + encoder.read_to_end(&mut buf).await?; + Ok(bytes::Bytes::from(buf)) +} +``` + +--- + +## Phase 4 — Feature Completeness + +**Goals:** Reach feature parity with top-tier static hosts. +**Issues addressed:** C-6, H-2, H-10, M-13, M-14, M-15, M-16 + +### 4.1 — SPA Fallback Routing + Custom Error Pages (C-6, H-10) + +**Config addition** in `src/config/mod.rs`: + +```rust +#![deny(clippy::all, clippy::pedantic)] + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct SiteConfig { + // ... existing fields ... + + /// When `true`, requests for paths that don't match any file are served + /// `index.html` (with status 200) instead of a 404. + /// Required for single-page applications with client-side routing + /// (React Router, Vue Router, Svelte Kit, etc.). + #[serde(default)] + pub spa_routing: bool, + + /// Optional custom 404 page, relative to the site directory. + /// When set and the file exists, it is served (with status 404) for + /// all requests that resolve to `NotFound`. + #[serde(default)] + pub error_404: Option, + + /// Optional custom 500/503 page, relative to the site directory. + #[serde(default)] + pub error_503: Option, +} +``` + +**Handler change** in `resolve_path`: + +```rust +// After the existing resolution logic, in the Resolved::NotFound branch: +// +// Resolved::NotFound => { +// if spa_routing { +// // SPA mode: serve index.html for all unmatched paths. +// let spa_index = canonical_root.join(index_file); +// if spa_index.exists() { +// return Resolved::File(spa_index.canonicalize().unwrap_or(spa_index)); +// } +// } +// if let Some(ref p404) = error_404_path { +// return Resolved::Custom404(p404.clone()); +// } +// Resolved::NotFound +// } +``` + +Add the `Custom404` and `Custom503` variants to `Resolved`: + +```rust +#[derive(Debug, PartialEq)] +pub enum Resolved { + File(std::path::PathBuf), + NotFound, + Fallback, + Forbidden, + DirectoryListing(std::path::PathBuf), + Redirect(String), + /// Custom error page: path to the HTML file + the HTTP status code to use. + CustomError { path: std::path::PathBuf, status: u16 }, +} +``` + +--- + +### 4.2 — Refresh `canonical_root` on `[R]` reload (H-2) + +**File:** `src/runtime/events.rs` and `src/server/mod.rs` + +Pass a `watch::Sender>` to the server so the accept loop can update +`canonical_root` without restart. + +```rust +#![deny(clippy::all, clippy::pedantic)] + +// In server/mod.rs — add to run() signature: +// root_watch: watch::Receiver>, +// +// In the accept loop, at the top of the loop body: +// // Non-blocking check for a new canonical_root (triggered by [R] reload). +// if root_watch.has_changed().unwrap_or(false) { +// canonical_root = Arc::clone(&root_watch.borrow_and_update()); +// log::info!("Site root refreshed: {}", canonical_root.display()); +// } + +// In events.rs — KeyEvent::Reload handler, after the scan: +// if let Ok(new_root) = site_root.canonicalize() { +// let _ = root_tx.send(Arc::from(new_root.as_path())); +// } +``` + +--- + +### 4.3 — URL Redirect/Rewrite Rules (M-13) + +**Config addition** in `src/config/mod.rs`: + +```rust +#![deny(clippy::all, clippy::pedantic)] + +/// A single redirect or rewrite rule. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct RedirectRule { + /// Source URL path to match (exact match only in this implementation). + pub from: String, + /// Destination URL. + pub to: String, + /// HTTP status code. Use 301 for permanent, 302 for temporary. + #[serde(default = "default_redirect_status")] + pub status: u16, +} + +const fn default_redirect_status() -> u16 { 301 } + +// In Config, add: +// #[serde(default)] +// pub redirects: Vec, + +// In resolve_path, check redirects FIRST before filesystem resolution: +// for rule in redirects { +// if url_path == rule.from { +// return Resolved::ExternalRedirect { +// location: rule.to.clone(), +// status: rule.status, +// }; +// } +// } +``` + +**Example settings.toml entry:** + +```toml +[[redirects]] +from = "/old-page" +to = "/new-page" +status = 301 + +[[redirects]] +from = "/blog" +to = "https://external-blog.example" +status = 302 +``` + +--- + +### 4.4 — Missing MIME types (M-14) + +**File:** `src/server/mime.rs` + +```rust +#![deny(clippy::all, clippy::pedantic)] + +// Add to the match arms in `for_extension`: + +// Web app manifests (required for PWA installation) +"webmanifest" => "application/manifest+json", + +// Modern audio +"opus" => "audio/opus", +"flac" => "audio/flac", +"aac" => "audio/aac", +"m4a" => "audio/mp4", + +// Modern video +"mov" => "video/quicktime", +"m4v" => "video/mp4", +"mkv" => "video/x-matroska", +"avi" => "video/x-msvideo", + +// 3D / WebGL +"glb" => "model/gltf-binary", +"gltf" => "model/gltf+json", + +// Data formats +"ndjson" => "application/x-ndjson", +"geojson" => "application/geo+json", +"toml" => "application/toml", +"yaml" | "yml" => "application/yaml", + +// Web fonts (additional) +"eot" => "application/vnd.ms-fontobject", + +// Source maps +"map" => "application/json", + +// WebAssembly text format +"wat" => "text/plain; charset=utf-8", +``` + +--- + +### 4.5 — `--serve` one-shot CLI mode (M-15) + +Replace the hand-rolled argument parser with `clap`: + +```toml +# Cargo.toml +clap = { version = "4", features = ["derive"] } +``` + +**File:** `src/main.rs` + +```rust +#![deny(clippy::all, clippy::pedantic)] + +use std::path::PathBuf; +use clap::Parser; + +/// Single-binary, zero-setup static site host with built-in Tor support. +#[derive(Debug, Parser)] +#[command(version, about, long_about = None)] +pub struct Cli { + /// Override the path to settings.toml. + #[arg(long, value_name = "PATH")] + pub config: Option, + + /// Override the data-directory root. + #[arg(long, value_name = "PATH")] + pub data_dir: Option, + + /// Serve a directory directly without first-run setup. + /// + /// Example: rusthost-cli --serve ./docs --port 3000 --no-tor + #[arg(long, value_name = "DIR")] + pub serve: Option, + + /// Port to use with --serve (default: 8080). + #[arg(long, default_value = "8080")] + pub port: u16, + + /// Disable Tor when using --serve. + #[arg(long)] + pub no_tor: bool, + + /// Disable the interactive console (useful for headless/CI use). + #[arg(long)] + pub headless: bool, +} + +#[tokio::main] +async fn main() { + std::panic::set_hook(Box::new(|info| { + rusthost::console::cleanup(); + eprintln!("\nPanic: {info}"); + })); + + let cli = Cli::parse(); + + // Convert clap args to the internal CliArgs used by lifecycle. + let args = rusthost::runtime::lifecycle::CliArgs { + config_path: cli.config, + data_dir: cli.data_dir, + serve_dir: cli.serve, + serve_port: cli.port, + no_tor: cli.no_tor, + headless: cli.headless, + }; + + if let Err(err) = rusthost::runtime::lifecycle::run(args).await { + rusthost::console::cleanup(); + eprintln!("\nFatal error: {err}"); + std::process::exit(1); + } +} +``` + +**`CliArgs` expansion** in `src/runtime/lifecycle.rs`: + +```rust +#[derive(Debug, Default)] +pub struct CliArgs { + pub config_path: Option, + pub data_dir: Option, + /// When `Some`, skip first-run setup and directly serve this directory. + pub serve_dir: Option, + /// Port for `--serve` mode. Ignored when `serve_dir` is `None`. + pub serve_port: u16, + /// Disable Tor in `--serve` mode. + pub no_tor: bool, + /// Headless mode: disable the interactive console. + pub headless: bool, +} + +// In `run()`, before the settings_path.exists() check: +// +// if let Some(dir) = args.serve_dir { +// return one_shot_serve(dir, args.serve_port, !args.no_tor, args.headless).await; +// } + +/// Serve `dir` directly with minimal configuration — no first-run setup required. +async fn one_shot_serve( + dir: PathBuf, + port: u16, + tor_enabled: bool, + headless: bool, +) -> Result<()> { + use std::num::NonZeroU16; + use crate::config::{Config, ServerConfig, SiteConfig, TorConfig, LoggingConfig, + ConsoleConfig, IdentityConfig, LogLevel, CspLevel}; + + let dir_str = dir.to_string_lossy().into_owned(); + let config = Arc::new(Config { + server: ServerConfig { + port: NonZeroU16::new(port).unwrap_or(NonZeroU16::MIN), + bind: "127.0.0.1".parse().expect("literal is valid"), + auto_port_fallback: true, + open_browser_on_start: false, + max_connections: 256, + max_connections_per_ip: 16, + csp_level: CspLevel::Off, + }, + site: SiteConfig { + directory: dir_str.clone(), + index_file: "index.html".into(), + enable_directory_listing: true, + expose_dotfiles: false, + spa_routing: false, + error_404: None, + error_503: None, + }, + tor: TorConfig { enabled: tor_enabled }, + logging: LoggingConfig { + enabled: false, + level: LogLevel::Info, + file: "rusthost.log".into(), + filter_dependencies: true, + }, + console: ConsoleConfig { + interactive: !headless, + refresh_rate_ms: 500, + show_timestamps: false, + }, + identity: IdentityConfig { + instance_name: "RustHost".into(), + }, + redirects: Vec::new(), + }); + + // Use the parent directory of `dir` as data_dir so the path join works. + let data_dir = dir.parent().map_or_else(|| dir.clone(), Path::to_path_buf); + normal_run(data_dir, config).await +} +``` + +--- + +### 4.6 — Structured Access Log (M-16) + +**File:** `src/logging/mod.rs` (new sub-logger) + +```rust +#![deny(clippy::all, clippy::pedantic)] + +use std::net::IpAddr; + +/// An HTTP access log record in Combined Log Format (CLF). +/// +/// CLF format: +/// ` - - [