diff --git a/Textream/Info.plist b/Textream/Info.plist
index 41c3c5c..ece44f0 100644
--- a/Textream/Info.plist
+++ b/Textream/Info.plist
@@ -69,5 +69,7 @@
+ NSCameraUsageDescription
+ Textream uses the camera to detect hand gestures for hands-free script control.
diff --git a/Textream/Textream/HandGestureController.swift b/Textream/Textream/HandGestureController.swift
new file mode 100644
index 0000000..225391d
--- /dev/null
+++ b/Textream/Textream/HandGestureController.swift
@@ -0,0 +1,196 @@
+import AVFoundation
+import Vision
+import AppKit
+
+@Observable
+class HandGestureController: NSObject {
+ private static let logFile: FileHandle? = {
+ let path = "/tmp/textream_hand.log"
+ FileManager.default.createFile(atPath: path, contents: nil)
+ return FileHandle(forWritingAtPath: path)
+ }()
+
+ static func log(_ msg: String) {
+ let line = "\(Date()): \(msg)\n"
+ logFile?.seekToEndOfFile()
+ logFile?.write(line.data(using: .utf8)!)
+ }
+ var isHandRaised: Bool = false {
+ didSet {
+ if isHandRaised != oldValue {
+ onHandStateChanged?(isHandRaised, handHeight)
+ }
+ }
+ }
+ var handHeight: Float = 0.0 // 0.0 = just above threshold, 1.0 = top of frame
+
+ /// Called on main thread when hand raise state changes. (raised, height)
+ var onHandStateChanged: ((Bool, Float) -> Void)?
+
+ private var captureSession: AVCaptureSession?
+ private var videoOutput = AVCaptureVideoDataOutput()
+ private let processingQueue = DispatchQueue(label: "com.textream.handgesture", qos: .userInteractive)
+ private let handPoseRequest = VNDetectHumanHandPoseRequest()
+
+ private let raiseThreshold: Float = 0.25 // wrist Y must exceed this to trigger raise
+ private let lowerThreshold: Float = 0.20 // wrist Y must drop below this to trigger lower (hysteresis)
+ private var recentWristY: [Float] = [] // rolling buffer for smoothing
+ private let smoothingWindow = 4
+
+ private var isRunning = false
+ private var frameCount = 0
+
+ override init() {
+ super.init()
+ handPoseRequest.maximumHandCount = 2
+ Self.log("[HandGesture] init()")
+ }
+
+ func start() {
+ guard !isRunning else {
+ Self.log("[HandGesture] start() skipped — already running")
+ return
+ }
+
+ let status = AVCaptureDevice.authorizationStatus(for: .video)
+ Self.log("[HandGesture] start() called, auth status=\(status.rawValue)")
+ switch status {
+ case .authorized:
+ setupAndStart()
+ case .notDetermined:
+ AVCaptureDevice.requestAccess(for: .video) { [weak self] granted in
+ Self.log("[HandGesture] camera permission granted=\(granted)")
+ if granted {
+ DispatchQueue.main.async { self?.setupAndStart() }
+ }
+ }
+ default:
+ Self.log("[HandGesture] camera permission denied/restricted")
+ return
+ }
+ }
+
+ func stop() {
+ guard isRunning else { return }
+ Self.log("[HandGesture] stop()")
+ // Clear callback first to prevent triggering rewind logic during teardown
+ let savedCallback = onHandStateChanged
+ onHandStateChanged = nil
+
+ captureSession?.stopRunning()
+ captureSession = nil // release session so videoOutput can be re-added later
+ isRunning = false
+ isHandRaised = false
+ handHeight = 0.0
+ recentWristY = []
+
+ // Restore callback for next start
+ onHandStateChanged = savedCallback
+ }
+
+ private func setupAndStart() {
+ Self.log("[HandGesture] setupAndStart()")
+ let session = AVCaptureSession()
+ session.sessionPreset = .low
+
+ guard let camera = AVCaptureDevice.default(.builtInWideAngleCamera, for: .video, position: .front)
+ ?? AVCaptureDevice.default(for: .video) else {
+ Self.log("[HandGesture] No camera found")
+ return
+ }
+ Self.log("[HandGesture] Using camera: \(camera.localizedName)")
+
+ guard let input = try? AVCaptureDeviceInput(device: camera) else {
+ Self.log("[HandGesture] Failed to create camera input")
+ return
+ }
+ guard session.canAddInput(input) else {
+ Self.log("[HandGesture] Cannot add input to session")
+ return
+ }
+ session.addInput(input)
+
+ videoOutput = AVCaptureVideoDataOutput()
+ videoOutput.setSampleBufferDelegate(self, queue: processingQueue)
+ videoOutput.alwaysDiscardsLateVideoFrames = true
+ guard session.canAddOutput(videoOutput) else {
+ Self.log("[HandGesture] Cannot add output to session")
+ return
+ }
+ session.addOutput(videoOutput)
+
+ captureSession = session
+
+ processingQueue.async {
+ session.startRunning()
+ Self.log("[HandGesture] session.startRunning() completed")
+ }
+ isRunning = true
+ }
+}
+
+extension HandGestureController: AVCaptureVideoDataOutputSampleBufferDelegate {
+ func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
+ frameCount += 1
+ if frameCount % 30 == 1 {
+ Self.log("[HandGesture] frame \(frameCount) received")
+ }
+ guard let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) else { return }
+
+ let handler = VNImageRequestHandler(cvPixelBuffer: pixelBuffer, orientation: .up, options: [:])
+ try? handler.perform([handPoseRequest])
+
+ guard let results = handPoseRequest.results, !results.isEmpty else {
+ DispatchQueue.main.async {
+ self.updateWristPosition(nil)
+ }
+ return
+ }
+
+ // Find the hand with the highest wrist Y
+ var highestWristY: Float = 0
+ for hand in results {
+ if let wrist = try? hand.recognizedPoint(.wrist),
+ wrist.confidence > 0.3 {
+ let y = Float(wrist.location.y)
+ Self.log("[HandGesture] wrist y=\(String(format: "%.3f", y)) conf=\(String(format: "%.2f", wrist.confidence))")
+ if y > highestWristY {
+ highestWristY = y
+ }
+ }
+ }
+
+ DispatchQueue.main.async {
+ self.updateWristPosition(highestWristY > 0 ? highestWristY : nil)
+ }
+ }
+
+ private func updateWristPosition(_ wristY: Float?) {
+ guard let y = wristY else {
+ recentWristY = []
+ handHeight = 0.0
+ isHandRaised = false // set after height so callback has correct height
+ return
+ }
+
+ recentWristY.append(y)
+ if recentWristY.count > smoothingWindow {
+ recentWristY.removeFirst()
+ }
+ let smoothed = recentWristY.reduce(0, +) / Float(recentWristY.count)
+
+ // Hysteresis: raise at raiseThreshold, lower at lowerThreshold
+ if !isHandRaised && smoothed > raiseThreshold {
+ Self.log("[HandGesture] HAND RAISED (smoothed=\(String(format: "%.3f", smoothed)))")
+ handHeight = min(1.0, (smoothed - lowerThreshold) / (1.0 - lowerThreshold))
+ isHandRaised = true
+ } else if isHandRaised && smoothed < lowerThreshold {
+ Self.log("[HandGesture] HAND LOWERED (smoothed=\(String(format: "%.3f", smoothed)))")
+ handHeight = 0.0
+ isHandRaised = false
+ } else if isHandRaised {
+ // Update height while raised (for speed control)
+ handHeight = min(1.0, (smoothed - lowerThreshold) / (1.0 - lowerThreshold))
+ }
+ }
+}
diff --git a/Textream/Textream/NotchOverlayController.swift b/Textream/Textream/NotchOverlayController.swift
index 330d202..b292c95 100644
--- a/Textream/Textream/NotchOverlayController.swift
+++ b/Textream/Textream/NotchOverlayController.swift
@@ -47,6 +47,10 @@ class OverlayContent {
class NotchOverlayController: NSObject {
private var panel: NSPanel?
let speechRecognizer = SpeechRecognizer()
+ let handGestureController = HandGestureController()
+ private var rewindTimer: Timer?
+ private var indicatorWindow: NSWindow?
+ private var indicatorView: NSHostingView?
let overlayContent = OverlayContent()
var onComplete: (() -> Void)?
var onNextPage: (() -> Void)?
@@ -117,6 +121,90 @@ class NotchOverlayController: NSObject {
if settings.listeningMode != .classic {
speechRecognizer.start(with: text)
}
+
+ if settings.handGestureRewind {
+ handGestureController.onHandStateChanged = { [weak self] raised, height in
+ self?.handleHandGesture(raised: raised, height: height)
+ }
+ handGestureController.start()
+ }
+ }
+
+ private func handleHandGesture(raised: Bool, height: Float) {
+ let settings = NotchSettings.shared
+ HandGestureController.log("[Controller] handleHandGesture raised=\(raised) height=\(height) mode=\(settings.listeningMode.rawValue)")
+
+ if raised {
+ showHandIndicator()
+
+ // Pause current mode
+ speechRecognizer.pauseForRewind()
+
+ // Start rewind timer
+ rewindTimer?.invalidate()
+ rewindTimer = Timer.scheduledTimer(withTimeInterval: 0.25, repeats: true) { [weak self] _ in
+ guard let self else { return }
+ let h = self.handGestureController.handHeight
+ let words: Int
+ if h < 0.3 { words = 1 }
+ else if h < 0.7 { words = 2 }
+ else { words = 4 }
+
+ self.speechRecognizer.rewindByWords(words)
+ }
+ } else {
+ HandGestureController.log("[Controller] hiding indicator, window=\(indicatorWindow != nil)")
+ hideHandIndicator()
+
+ // Stop rewind
+ rewindTimer?.invalidate()
+ rewindTimer = nil
+
+ HandGestureController.log("[Controller] calling resumeAfterRewind, isListening=\(speechRecognizer.isListening)")
+ switch settings.listeningMode {
+ case .wordTracking:
+ speechRecognizer.resumeAfterRewind()
+ HandGestureController.log("[Controller] resumeAfterRewind called, isListening=\(speechRecognizer.isListening)")
+ case .classic, .silencePaused:
+ DispatchQueue.main.asyncAfter(deadline: .now() + 1.5) { [weak self] in
+ self?.speechRecognizer.resumeAfterRewind()
+ }
+ }
+ }
+ }
+
+ private func showHandIndicator() {
+ guard indicatorWindow == nil else { return }
+ guard let screen = NSScreen.main ?? NSScreen.screens.first else { return }
+
+ let size: CGFloat = 60
+ let margin: CGFloat = 20
+ let frame = NSRect(
+ x: screen.frame.maxX - size - margin,
+ y: screen.frame.maxY - size - margin - 30, // below menu bar
+ width: size,
+ height: size
+ )
+
+ let window = NSWindow(contentRect: frame, styleMask: .borderless, backing: .buffered, defer: false)
+ window.isOpaque = false
+ window.backgroundColor = .clear
+ window.level = NSWindow.Level(Int(CGShieldingWindowLevel()) + 1)
+ window.ignoresMouseEvents = true
+ window.hasShadow = false
+
+ let hostView = NSHostingView(rootView: HandIndicatorView(isRewinding: true))
+ window.contentView = hostView
+ window.orderFront(nil)
+
+ indicatorWindow = window
+ indicatorView = hostView
+ }
+
+ private func hideHandIndicator() {
+ indicatorWindow?.orderOut(nil)
+ indicatorWindow = nil
+ indicatorView = nil
}
func updateContent(text: String, hasNextPage: Bool) {
@@ -136,6 +224,10 @@ class NotchOverlayController: NSObject {
if settings.listeningMode != .classic {
speechRecognizer.start(with: text)
}
+
+ if NotchSettings.shared.handGestureRewind {
+ handGestureController.start()
+ }
}
private func screenUnderMouse() -> NSScreen? {
@@ -232,7 +324,7 @@ class NotchOverlayController: NSObject {
self.frameTracker = tracker
self.currentScreenID = screen.displayID
- let overlayView = NotchOverlayView(content: overlayContent, speechRecognizer: speechRecognizer, menuBarHeight: menuBarHeight, baseTextHeight: textAreaHeight, maxExtraHeight: maxExtraHeight, frameTracker: tracker)
+ let overlayView = NotchOverlayView(content: overlayContent, speechRecognizer: speechRecognizer, handGesture: handGestureController, menuBarHeight: menuBarHeight, baseTextHeight: textAreaHeight, maxExtraHeight: maxExtraHeight, frameTracker: tracker)
let contentView = NSHostingView(rootView: overlayView)
// Start panel at full target size (SwiftUI animates the notch shape inside)
@@ -277,6 +369,7 @@ class NotchOverlayController: NSObject {
let floatingView = FloatingOverlayView(
content: overlayContent,
speechRecognizer: speechRecognizer,
+ handGesture: handGestureController,
baseHeight: panelHeight,
followingCursor: true
)
@@ -345,6 +438,7 @@ class NotchOverlayController: NSObject {
let floatingView = FloatingOverlayView(
content: overlayContent,
speechRecognizer: speechRecognizer,
+ handGesture: handGestureController,
baseHeight: panelHeight
)
let contentView = NSHostingView(rootView: floatingView)
@@ -377,6 +471,10 @@ class NotchOverlayController: NSObject {
// Trigger the shrink animation
speechRecognizer.shouldDismiss = true
speechRecognizer.forceStop()
+ handGestureController.stop()
+ hideHandIndicator()
+ rewindTimer?.invalidate()
+ rewindTimer = nil
// Wait for animation, then remove panel
DispatchQueue.main.asyncAfter(deadline: .now() + 0.4) { [weak self] in
@@ -415,6 +513,10 @@ class NotchOverlayController: NSObject {
removeEscMonitor()
cancellables.removeAll()
speechRecognizer.forceStop()
+ handGestureController.stop()
+ hideHandIndicator()
+ rewindTimer?.invalidate()
+ rewindTimer = nil
speechRecognizer.recognizedCharCount = 0
panel?.orderOut(nil)
panel = nil
@@ -600,11 +702,49 @@ struct DynamicIslandShape: Shape {
}
}
+// MARK: - Hand Gesture Indicator
+
+struct HandIndicatorView: View {
+ let isRewinding: Bool
+
+ @State private var rotation: Double = 0
+
+ var body: some View {
+ ZStack {
+ // Background circle
+ Circle()
+ .stroke(Color.white.opacity(0.3), lineWidth: 3)
+ .frame(width: 44, height: 44)
+
+ // Animated arc
+ Circle()
+ .trim(from: 0, to: 0.7)
+ .stroke(Color.green, style: StrokeStyle(lineWidth: 3, lineCap: .round))
+ .frame(width: 44, height: 44)
+ .rotationEffect(.degrees(rotation))
+
+ // Rewind icon
+ Image(systemName: "backward.fill")
+ .font(.system(size: 16, weight: .bold))
+ .foregroundColor(.green)
+ }
+ .frame(width: 60, height: 60)
+ .background(Color.black.opacity(0.6))
+ .clipShape(Circle())
+ .onAppear {
+ withAnimation(.linear(duration: 1).repeatForever(autoreverses: false)) {
+ rotation = -360
+ }
+ }
+ }
+}
+
// MARK: - Overlay SwiftUI View
struct NotchOverlayView: View {
@Bindable var content: OverlayContent
@Bindable var speechRecognizer: SpeechRecognizer
+ var handGesture: HandGestureController
let menuBarHeight: CGFloat
let baseTextHeight: CGFloat
let maxExtraHeight: CGFloat
@@ -627,6 +767,8 @@ struct NotchOverlayView: View {
@State private var isUserScrolling: Bool = false
private let scrollTimer = Timer.publish(every: 0.05, on: .main, in: .common).autoconnect()
+ // Hand-gesture rewind state
+
// Auto next page countdown
@State private var countdownRemaining: Int = 0
@State private var countdownTimer: Timer? = nil
@@ -642,6 +784,7 @@ struct NotchOverlayView: View {
NotchSettings.shared.listeningMode
}
+
/// Convert fractional word index to char offset using actual word lengths
private func charOffsetForWordProgress(_ progress: Double) -> Int {
let wholeWord = Int(progress)
@@ -1136,6 +1279,7 @@ struct GlassEffectView: NSViewRepresentable {
struct FloatingOverlayView: View {
@Bindable var content: OverlayContent
@Bindable var speechRecognizer: SpeechRecognizer
+ var handGesture: HandGestureController
let baseHeight: CGFloat
var followingCursor: Bool = false
@@ -1155,10 +1299,13 @@ struct FloatingOverlayView: View {
@State private var isUserScrolling: Bool = false
private let scrollTimer = Timer.publish(every: 0.05, on: .main, in: .common).autoconnect()
+ // Hand-gesture rewind state
+
private var listeningMode: ListeningMode {
NotchSettings.shared.listeningMode
}
+
/// Convert fractional word index to char offset using actual word lengths
private func charOffsetForWordProgress(_ progress: Double) -> Int {
let wholeWord = Int(progress)
diff --git a/Textream/Textream/NotchSettings.swift b/Textream/Textream/NotchSettings.swift
index 1a02c9d..9b4f7a0 100644
--- a/Textream/Textream/NotchSettings.swift
+++ b/Textream/Textream/NotchSettings.swift
@@ -407,6 +407,10 @@ class NotchSettings {
didSet { UserDefaults.standard.set(selectedMicUID, forKey: "selectedMicUID") }
}
+ var handGestureRewind: Bool {
+ didSet { UserDefaults.standard.set(handGestureRewind, forKey: "handGestureRewind") }
+ }
+
var autoNextPage: Bool {
didSet { UserDefaults.standard.set(autoNextPage, forKey: "autoNextPage") }
}
@@ -483,6 +487,7 @@ class NotchSettings {
self.hideFromScreenShare = UserDefaults.standard.object(forKey: "hideFromScreenShare") as? Bool ?? true
self.showElapsedTime = UserDefaults.standard.object(forKey: "showElapsedTime") as? Bool ?? true
self.selectedMicUID = UserDefaults.standard.string(forKey: "selectedMicUID") ?? ""
+ self.handGestureRewind = UserDefaults.standard.object(forKey: "handGestureRewind") as? Bool ?? true
self.autoNextPage = UserDefaults.standard.object(forKey: "autoNextPage") as? Bool ?? false
let savedDelay = UserDefaults.standard.integer(forKey: "autoNextPageDelay")
self.autoNextPageDelay = savedDelay > 0 ? savedDelay : 3
diff --git a/Textream/Textream/SettingsView.swift b/Textream/Textream/SettingsView.swift
index 581526b..727a3c1 100644
--- a/Textream/Textream/SettingsView.swift
+++ b/Textream/Textream/SettingsView.swift
@@ -743,6 +743,19 @@ struct SettingsView: View {
}
}
+ Divider()
+
+ Toggle(isOn: $settings.handGestureRewind) {
+ VStack(alignment: .leading, spacing: 2) {
+ Text("Hand Gesture Rewind")
+ .font(.system(size: 13, weight: .medium))
+ Text("Raise your hand to pause and rewind. The higher you raise, the faster it rewinds. Lower your hand to resume. Uses your camera to detect hand position.")
+ .font(.system(size: 11))
+ .foregroundStyle(.secondary)
+ }
+ }
+ .toggleStyle(.checkbox)
+
Spacer()
}
.padding(16)
diff --git a/Textream/Textream/SpeechRecognizer.swift b/Textream/Textream/SpeechRecognizer.swift
index 0730b1c..9e01414 100644
--- a/Textream/Textream/SpeechRecognizer.swift
+++ b/Textream/Textream/SpeechRecognizer.swift
@@ -209,6 +209,41 @@ class SpeechRecognizer {
beginRecognition()
}
+ /// Pause speech recognition for gesture rewind without changing isListening state.
+ func pauseForRewind() {
+ cleanupRecognition()
+ }
+
+ /// Move recognizedCharCount backward by N words. Used during gesture rewind.
+ func rewindByWords(_ count: Int) {
+ let chars = Array(sourceText)
+ var remaining = count
+ var offset = recognizedCharCount
+
+ while remaining > 0 && offset > 0 {
+ // Skip any spaces at current position
+ while offset > 0 && chars[offset - 1] == " " {
+ offset -= 1
+ }
+ // Skip to start of current word
+ while offset > 0 && chars[offset - 1] != " " {
+ offset -= 1
+ }
+ remaining -= 1
+ }
+
+ recognizedCharCount = max(0, offset)
+ matchStartOffset = recognizedCharCount
+ }
+
+ /// Resume speech recognition after gesture rewind from current position.
+ func resumeAfterRewind() {
+ matchStartOffset = recognizedCharCount
+ retryCount = 0
+ isListening = true
+ beginRecognition()
+ }
+
private func cleanupRecognition() {
// Cancel any pending restart to prevent overlapping beginRecognition calls
pendingRestart?.cancel()
diff --git a/Textream/Textream/Textream.entitlements b/Textream/Textream/Textream.entitlements
index e2726b6..7c2b752 100644
--- a/Textream/Textream/Textream.entitlements
+++ b/Textream/Textream/Textream.entitlements
@@ -6,6 +6,8 @@
com.apple.security.device.audio-input
+ com.apple.security.device.camera
+
com.apple.security.files.user-selected.read-write
com.apple.security.network.client
diff --git a/docs/superpowers/plans/2026-03-22-hand-gesture-rewind.md b/docs/superpowers/plans/2026-03-22-hand-gesture-rewind.md
new file mode 100644
index 0000000..db29b4e
--- /dev/null
+++ b/docs/superpowers/plans/2026-03-22-hand-gesture-rewind.md
@@ -0,0 +1,501 @@
+# Hand Gesture Rewind Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** Add hands-free script rewind via raised hand detection using the Mac's front-facing camera and Apple Vision framework.
+
+**Architecture:** A new `HandGestureController` owns camera capture and Vision hand pose detection, publishing `isHandRaised` and `handHeight`. The overlay views observe these values and dispatch rewind to the appropriate scroll state (`recognizedCharCount` for wordTracking, `timerWordProgress` for classic/silencePaused). New methods on `SpeechRecognizer` handle pause/rewind/resume for wordTracking mode.
+
+**Tech Stack:** Swift, AVFoundation (camera capture), Vision framework (VNDetectHumanHandPoseRequest)
+
+**Spec:** `docs/superpowers/specs/2026-03-22-hand-gesture-rewind-design.md`
+
+**Note:** This project has no test target. All Swift files live in `Textream/Textream/`. The project builds via `xcodebuild` from `Textream/Textream.xcodeproj`. Use `CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY=""` when building.
+
+**Important — Adding files to Xcode project:** This is a pure Xcode project (no SPM). New `.swift` files are auto-discovered by the build system. Bundle resources and entitlements changes need manual verification.
+
+---
+
+## File Map
+
+### New Files
+
+| File | Responsibility |
+|------|---------------|
+| `Textream/Textream/HandGestureController.swift` | AVCaptureSession setup, VNDetectHumanHandPoseRequest processing, wrist Y smoothing, publishes `isHandRaised` and `handHeight` |
+
+### Modified Files
+
+| File | Change |
+|------|--------|
+| `Textream/Textream/SpeechRecognizer.swift` | Add `pauseForRewind()`, `rewindByWords(_:)`, `resumeAfterRewind()` methods |
+| `Textream/Textream/NotchOverlayController.swift` | Create `HandGestureController`, observe hand state in both overlay views, manage rewind timer, dispatch to correct scroll state per mode |
+| `Textream/Info.plist` | Add `NSCameraUsageDescription` |
+| `Textream/Textream/Textream.entitlements` | Add `com.apple.security.device.camera` |
+
+---
+
+### Task 1: Camera Permission and Entitlements
+
+**Files:**
+- Modify: `Textream/Info.plist`
+- Modify: `Textream/Textream/Textream.entitlements`
+
+- [ ] **Step 1: Add camera usage description to Info.plist**
+
+Add the following key/value pair inside the `` in `Textream/Info.plist`, after the existing `NSServices` block:
+
+```xml
+NSCameraUsageDescription
+Textream uses the camera to detect hand gestures for hands-free script control.
+```
+
+- [ ] **Step 2: Add camera entitlement**
+
+Add the following key/value pair inside the `` in `Textream/Textream/Textream.entitlements`, after the existing `com.apple.security.device.audio-input` entry:
+
+```xml
+com.apple.security.device.camera
+
+```
+
+- [ ] **Step 3: Verify build**
+
+```bash
+cd Textream && xcodebuild -scheme Textream -configuration Debug CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY="" build 2>&1 | tail -5
+```
+
+Expected: BUILD SUCCEEDED
+
+- [ ] **Step 4: Commit**
+
+```bash
+git add Textream/Info.plist Textream/Textream/Textream.entitlements
+git commit -m "feat: add camera permission and entitlement for hand gesture rewind"
+```
+
+---
+
+### Task 2: HandGestureController
+
+**Files:**
+- Create: `Textream/Textream/HandGestureController.swift`
+
+This is the core camera + Vision processing class. It owns the capture session, runs hand pose detection on each frame, smooths the wrist position, and publishes state.
+
+- [ ] **Step 1: Implement HandGestureController**
+
+```swift
+import AVFoundation
+import Vision
+import AppKit
+
+@Observable
+class HandGestureController: NSObject {
+ var isHandRaised: Bool = false
+ var handHeight: Float = 0.0 // 0.0 = just above threshold, 1.0 = top of frame
+
+ private var captureSession: AVCaptureSession?
+ private let videoOutput = AVCaptureVideoDataOutput()
+ private let processingQueue = DispatchQueue(label: "com.textream.handgesture", qos: .userInteractive)
+ private let handPoseRequest = VNDetectHumanHandPoseRequest()
+
+ private let raiseThreshold: Float = 0.6 // wrist Y must exceed this to count as raised
+ private var recentWristY: [Float] = [] // rolling buffer for smoothing
+ private let smoothingWindow = 4
+
+ private var isRunning = false
+
+ override init() {
+ super.init()
+ handPoseRequest.maximumHandCount = 2
+ }
+
+ func start() {
+ guard !isRunning else { return }
+
+ // Check camera permission
+ switch AVCaptureDevice.authorizationStatus(for: .video) {
+ case .authorized:
+ setupAndStart()
+ case .notDetermined:
+ AVCaptureDevice.requestAccess(for: .video) { [weak self] granted in
+ if granted {
+ DispatchQueue.main.async { self?.setupAndStart() }
+ }
+ }
+ default:
+ // Permission denied or restricted — silently disable
+ return
+ }
+ }
+
+ func stop() {
+ guard isRunning else { return }
+ captureSession?.stopRunning()
+ isRunning = false
+ isHandRaised = false
+ handHeight = 0.0
+ recentWristY = []
+ }
+
+ private func setupAndStart() {
+ let session = AVCaptureSession()
+ session.sessionPreset = .low // ~640x480, minimal resource usage
+
+ // Find front-facing camera
+ guard let camera = AVCaptureDevice.default(.builtInWideAngleCamera, for: .video, position: .front)
+ ?? AVCaptureDevice.default(for: .video) else {
+ // No camera available — silently disable
+ return
+ }
+
+ guard let input = try? AVCaptureDeviceInput(device: camera) else { return }
+ guard session.canAddInput(input) else { return }
+ session.addInput(input)
+
+ videoOutput.setSampleBufferDelegate(self, queue: processingQueue)
+ videoOutput.alwaysDiscardsLateVideoFrames = true
+ guard session.canAddOutput(videoOutput) else { return }
+ session.addOutput(videoOutput)
+
+ // Limit frame rate to ~15fps to save CPU
+ if let connection = videoOutput.connection(with: .video) {
+ connection.isEnabled = true
+ }
+ try? camera.lockForConfiguration()
+ camera.activeVideoMinFrameDuration = CMTime(value: 1, timescale: 15)
+ camera.activeVideoMaxFrameDuration = CMTime(value: 1, timescale: 15)
+ camera.unlockForConfiguration()
+
+ captureSession = session
+
+ processingQueue.async {
+ session.startRunning()
+ }
+ isRunning = true
+ }
+}
+
+extension HandGestureController: AVCaptureVideoDataOutputSampleBufferDelegate {
+ func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
+ guard let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) else { return }
+
+ let handler = VNImageRequestHandler(cvPixelBuffer: pixelBuffer, orientation: .up, options: [:])
+ try? handler.perform([handPoseRequest])
+
+ guard let results = handPoseRequest.results, !results.isEmpty else {
+ DispatchQueue.main.async {
+ self.updateWristPosition(nil)
+ }
+ return
+ }
+
+ // Find the hand with the highest wrist Y
+ var highestWristY: Float = 0
+ for hand in results {
+ if let wrist = try? hand.recognizedPoint(.wrist),
+ wrist.confidence > 0.3 {
+ let y = Float(wrist.location.y) // Vision coords: 0=bottom, 1=top
+ if y > highestWristY {
+ highestWristY = y
+ }
+ }
+ }
+
+ DispatchQueue.main.async {
+ self.updateWristPosition(highestWristY > 0 ? highestWristY : nil)
+ }
+ }
+
+ private func updateWristPosition(_ wristY: Float?) {
+ guard let y = wristY else {
+ // No hand detected — decay smoothly
+ recentWristY = []
+ isHandRaised = false
+ handHeight = 0.0
+ return
+ }
+
+ // Smooth with rolling average
+ recentWristY.append(y)
+ if recentWristY.count > smoothingWindow {
+ recentWristY.removeFirst()
+ }
+ let smoothed = recentWristY.reduce(0, +) / Float(recentWristY.count)
+
+ if smoothed > raiseThreshold {
+ isHandRaised = true
+ // Map threshold..1.0 → 0.0..1.0
+ handHeight = min(1.0, (smoothed - raiseThreshold) / (1.0 - raiseThreshold))
+ } else {
+ isHandRaised = false
+ handHeight = 0.0
+ }
+ }
+}
+```
+
+- [ ] **Step 2: Verify build**
+
+```bash
+cd Textream && xcodebuild -scheme Textream -configuration Debug CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY="" build 2>&1 | tail -5
+```
+
+Expected: BUILD SUCCEEDED
+
+- [ ] **Step 3: Commit**
+
+```bash
+git add Textream/Textream/HandGestureController.swift
+git commit -m "feat: add HandGestureController with Vision hand pose detection"
+```
+
+---
+
+### Task 3: SpeechRecognizer Rewind Methods
+
+**Files:**
+- Modify: `Textream/Textream/SpeechRecognizer.swift`
+
+Add three new public methods that the overlay views will call during hand gesture rewind. These methods encapsulate access to the private `matchStartOffset`, `sourceText`, `cleanupRecognition()`, and `beginRecognition()`.
+
+- [ ] **Step 1: Add pauseForRewind()**
+
+Add the following method after `resume()` (after line 210 in SpeechRecognizer.swift):
+
+```swift
+ /// Pause speech recognition for gesture rewind without changing isListening state.
+ func pauseForRewind() {
+ cleanupRecognition()
+ }
+```
+
+- [ ] **Step 2: Add rewindByWords(\_:)**
+
+Add directly after `pauseForRewind()`:
+
+```swift
+ /// Move recognizedCharCount backward by N words. Used during gesture rewind.
+ func rewindByWords(_ count: Int) {
+ // Work with the string as an array for O(1) indexing
+ let chars = Array(sourceText)
+ var remaining = count
+ var offset = recognizedCharCount
+
+ while remaining > 0 && offset > 0 {
+ // Skip any spaces at current position
+ while offset > 0 && chars[offset - 1] == " " {
+ offset -= 1
+ }
+ // Skip to start of current word
+ while offset > 0 && chars[offset - 1] != " " {
+ offset -= 1
+ }
+ remaining -= 1
+ }
+
+ recognizedCharCount = max(0, offset)
+ matchStartOffset = recognizedCharCount
+ }
+```
+
+- [ ] **Step 3: Add resumeAfterRewind()**
+
+Add directly after `rewindByWords(_:)`:
+
+```swift
+ /// Resume speech recognition after gesture rewind from current position.
+ func resumeAfterRewind() {
+ matchStartOffset = recognizedCharCount
+ retryCount = 0
+ beginRecognition()
+ }
+```
+
+- [ ] **Step 4: Verify build**
+
+```bash
+cd Textream && xcodebuild -scheme Textream -configuration Debug CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY="" build 2>&1 | tail -5
+```
+
+Expected: BUILD SUCCEEDED
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add Textream/Textream/SpeechRecognizer.swift
+git commit -m "feat: add pauseForRewind, rewindByWords, resumeAfterRewind to SpeechRecognizer"
+```
+
+---
+
+### Task 4: Integrate HandGestureController into Overlay Views
+
+**Files:**
+- Modify: `Textream/Textream/NotchOverlayController.swift`
+
+This is the integration task. The `HandGestureController` needs to be:
+1. Created and owned by `NotchOverlayController`
+2. Started/stopped with reading sessions
+3. Observed by both `NotchOverlayView` and `FloatingOverlayView` to drive rewind
+
+**Key context:** `timerWordProgress` is `@State private` on both `NotchOverlayView` (line 625) and `FloatingOverlayView` (line 1153). The rewind logic for classic/silencePaused must live inside these views since they own the state. The `HandGestureController` is passed to both views and observed via `onChange(of:)`.
+
+- [ ] **Step 1: Add HandGestureController to NotchOverlayController**
+
+In the `NotchOverlayController` class (around line 47), add a property:
+
+```swift
+let handGestureController = HandGestureController()
+```
+
+- [ ] **Step 2: Start/stop camera with reading sessions**
+
+Find `show(text:hasNextPage:onComplete:)` (line 62) — after the existing `speechRecognizer.start(with:)` call (line 118), add:
+
+```swift
+handGestureController.start()
+```
+
+Find `updateContent(text:hasNextPage:)` (line 122) — similarly add `handGestureController.start()` after the recognizer start.
+
+Find `dismiss()` (line 376) and `forceClose()` (line 411) — add to both:
+
+```swift
+handGestureController.stop()
+```
+
+- [ ] **Step 3: Pass HandGestureController to NotchOverlayView**
+
+The `NotchOverlayView` needs access to `handGestureController`. Add it as a parameter to the view's init. Find where `NotchOverlayView` is created in `NotchOverlayController` and pass `handGestureController`.
+
+In `NotchOverlayView`, add a property:
+
+```swift
+var handGesture: HandGestureController
+```
+
+- [ ] **Step 4: Add rewind logic to NotchOverlayView**
+
+Add a rewind timer state and handler to `NotchOverlayView`. Add these properties near the other `@State` declarations (around line 625):
+
+```swift
+@State private var rewindTimer: Timer?
+@State private var resumeDelay: DispatchWorkItem?
+```
+
+Add a helper to compute words-per-tick from hand height:
+
+```swift
+private func rewindWordsPerTick(handHeight: Float) -> Int {
+ if handHeight < 0.3 { return 1 }
+ if handHeight < 0.7 { return 2 }
+ return 4
+}
+```
+
+Add `onChange` handlers in the view body (inside the main container, near the existing `onChange` handlers):
+
+```swift
+.onChange(of: handGesture.isHandRaised) { _, raised in
+ if raised {
+ // Cancel any pending resume delay
+ resumeDelay?.cancel()
+ resumeDelay = nil
+
+ // Pause current mode
+ switch listeningMode {
+ case .wordTracking:
+ speechRecognizer.pauseForRewind()
+ case .classic:
+ isPaused = true
+ case .silencePaused:
+ speechRecognizer.pauseForRewind()
+ isPaused = true // also pause the scroll timer
+ }
+
+ // Start rewind timer
+ rewindTimer = Timer.scheduledTimer(withTimeInterval: 0.25, repeats: true) { _ in
+ let words = rewindWordsPerTick(handHeight: handGesture.handHeight)
+ switch listeningMode {
+ case .wordTracking:
+ speechRecognizer.rewindByWords(words)
+ case .classic, .silencePaused:
+ timerWordProgress = max(0, timerWordProgress - Double(words))
+ }
+ }
+ } else {
+ // Stop rewind timer
+ rewindTimer?.invalidate()
+ rewindTimer = nil
+
+ // Resume based on mode
+ switch listeningMode {
+ case .wordTracking:
+ speechRecognizer.resumeAfterRewind()
+ case .classic:
+ let work = DispatchWorkItem { isPaused = false }
+ resumeDelay = work
+ DispatchQueue.main.asyncAfter(deadline: .now() + 1.5, execute: work)
+ case .silencePaused:
+ speechRecognizer.resumeAfterRewind()
+ let work = DispatchWorkItem { isPaused = false }
+ resumeDelay = work
+ DispatchQueue.main.asyncAfter(deadline: .now() + 1.5, execute: work)
+ }
+ }
+}
+.onDisappear {
+ rewindTimer?.invalidate()
+ rewindTimer = nil
+ resumeDelay?.cancel()
+ resumeDelay = nil
+}
+```
+
+- [ ] **Step 5: Repeat for FloatingOverlayView**
+
+Apply the same changes to `FloatingOverlayView` (starts around line 1136):
+- Add `handGesture: HandGestureController` property
+- Add `rewindTimer`, `resumeDelay` state
+- Add `rewindWordsPerTick` helper
+- Add the same `onChange(of: handGesture.isHandRaised)` handler with `.onDisappear` cleanup
+- Pass `handGestureController` from where `FloatingOverlayView` is created
+
+**Finding all view instantiation sites:** Search `NotchOverlayController.swift` for `NotchOverlayView(` and `FloatingOverlayView(` to find every place these views are created. Each call site must pass the `handGestureController`. There are typically 1-2 sites per view (in `showPinned`, `showFollowCursor`, `showFloating`, etc.).
+
+**Note:** Fullscreen mode uses `ExternalDisplayView` which is not addressed in this plan — gesture rewind in fullscreen is a future enhancement.
+
+- [ ] **Step 6: Verify build**
+
+```bash
+cd Textream && xcodebuild -scheme Textream -configuration Debug CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY="" build 2>&1 | tail -5
+```
+
+Expected: BUILD SUCCEEDED
+
+- [ ] **Step 7: Build and launch the app**
+
+```bash
+cd Textream && xcodebuild -scheme Textream -configuration Debug CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY="" build 2>&1 | tail -5
+```
+
+Then launch the app and test:
+1. Load a script and start reading in Word Tracking mode
+2. Raise your hand above shoulder height — script should start rewinding
+3. Raise hand higher — rewind should speed up
+4. Lower hand — speech recognition should resume from new position
+5. Switch to Classic mode, start auto-scroll, raise hand — should rewind `timerWordProgress`
+6. Lower hand — 1.5s pause, then auto-scroll resumes
+
+- [ ] **Step 8: Commit**
+
+```bash
+git add Textream/Textream/NotchOverlayController.swift
+git commit -m "feat: integrate hand gesture rewind into overlay views
+
+Start/stop camera with reading sessions. Both NotchOverlayView
+and FloatingOverlayView observe HandGestureController to drive
+rewind in all three listening modes."
+```
diff --git a/docs/superpowers/specs/2026-03-22-hand-gesture-rewind-design.md b/docs/superpowers/specs/2026-03-22-hand-gesture-rewind-design.md
new file mode 100644
index 0000000..bd02229
--- /dev/null
+++ b/docs/superpowers/specs/2026-03-22-hand-gesture-rewind-design.md
@@ -0,0 +1,99 @@
+# Hand Gesture Rewind — Design Spec
+
+## Problem
+
+When presenting with Textream, the speaker sometimes needs to go back a few sentences in the script — to re-read a section, correct a mistake, or recover after going off-script. Currently there is no hands-free way to rewind. The speaker would have to walk to the laptop and tap the screen.
+
+## Solution
+
+Use the Mac's front-facing camera and Apple's Vision framework to detect a raised hand. While the hand is raised, the script rewinds continuously. Hand height controls rewind speed — higher hand = faster rewind. Lowering the hand resumes normal operation.
+
+## Hand Detection Pipeline
+
+A new `HandGestureController` class owns camera capture and Vision processing:
+
+1. **AVCaptureSession** captures frames from the default front-facing camera at low resolution (~640x480) and low frame rate (~15fps)
+2. Each frame is processed by **VNDetectHumanHandPoseRequest** which returns hand landmark coordinates
+3. The **wrist Y-position** (0.0 = bottom of frame, 1.0 = top in Vision coordinates) is extracted and smoothed with a rolling average of the last 3-4 frames to reduce jitter
+4. A **raise threshold** (wrist Y > 0.6) determines whether the hand is raised. Below this, the hand is in the speaker's lap or at their side and is ignored
+5. If multiple hands are detected, use the one with the highest wrist Y-position
+6. The controller publishes two observable values:
+ - `isHandRaised: Bool`
+ - `handHeight: Float` (0.0 = just above threshold, 1.0 = top of frame)
+
+### Lifecycle
+
+- Camera starts when a reading session begins — triggered by the overlay controller's `startReading()` flow, not tied to `SpeechRecognizer.start()` (since classic mode never calls it)
+- Camera stops when the reading session ends
+- Camera is NOT running when the app is idle
+- If no camera is available (Mac Mini, Mac Pro, external displays without camera), the gesture feature is silently disabled
+
+### Camera Selection
+
+Uses the default front-facing camera. No settings UI for camera selection in this iteration.
+
+### Camera Permission
+
+If camera access is denied, the gesture feature is silently unavailable — no error is shown, no functionality is blocked. The rest of the app works normally. The `NSCameraUsageDescription` key in `Info.plist` provides the permission prompt text.
+
+## Rewind Behavior
+
+The scroll state lives in different places depending on the listening mode:
+- **wordTracking:** `recognizedCharCount` on `SpeechRecognizer`
+- **classic / silencePaused:** `timerWordProgress` on the overlay controller
+
+The `HandGestureController` publishes `isHandRaised` and `handHeight`. The overlay controller observes these and dispatches rewind to the appropriate state.
+
+### When hand is raised (`isHandRaised` becomes true):
+
+**In wordTracking mode:**
+1. Pause speech recognition — call a new `pauseForRewind()` method on `SpeechRecognizer` that stops the audio engine and recognition task without setting `isListening = false`
+2. Start a rewind timer (every 0.25 seconds) that calls a new `rewindByWords(_ count: Int)` method on `SpeechRecognizer`, which moves `recognizedCharCount` backward by N words (finding previous space characters in `sourceText`) and updates `matchStartOffset` to match
+
+**In classic / silencePaused mode:**
+1. Pause the scroll timer
+2. Start a rewind timer (every 0.25 seconds) that decrements `timerWordProgress` by N words
+
+**Speed (all modes):** The `handHeight` value controls how many words per tick:
+- Low hand (0.0–0.3): 1 word per tick (~4 words/sec)
+- Mid hand (0.3–0.7): 2 words per tick (~8 words/sec)
+- High hand (0.7–1.0): 4 words per tick (~16 words/sec)
+
+Position is clamped to never go below 0.
+
+### When hand is lowered (`isHandRaised` becomes false):
+
+**In wordTracking mode:** Call a new `resumeAfterRewind()` method on `SpeechRecognizer` that sets `matchStartOffset = recognizedCharCount` and calls `beginRecognition()` to resume speech tracking from the new position.
+
+**In classic / silencePaused mode:** Wait 1.5 seconds, then resume the scroll timer from the current `timerWordProgress` position.
+
+### Visual Feedback
+
+In wordTracking mode, MarqueeTextView observes `recognizedCharCount` — rewind scrolls backward automatically. In classic/silencePaused modes, the view observes `timerWordProgress` — same effect. No new UI components needed.
+
+## File Organization
+
+### New File
+
+| File | Responsibility |
+|------|---------------|
+| `HandGestureController.swift` | AVCaptureSession setup, VNDetectHumanHandPoseRequest processing, wrist position smoothing, publishes `isHandRaised` and `handHeight` |
+
+### Modified Files
+
+| File | Change |
+|------|--------|
+| `SpeechRecognizer.swift` | Add `pauseForRewind()`, `rewindByWords(_:)`, and `resumeAfterRewind()` methods |
+| `NotchOverlayController.swift` | Create and own `HandGestureController`, observe hand state, dispatch rewind to `SpeechRecognizer` or `timerWordProgress` depending on mode, manage rewind timer |
+| `Info.plist` | Add `NSCameraUsageDescription` |
+
+### Unchanged
+
+MarqueeTextView, NotchSettings, ContentView, SettingsView, BrowserServer, ExternalDisplayController.
+
+## Supported Listening Modes
+
+The gesture rewind works in all three listening modes:
+- **Word Tracking** (speech recognition) — rewinds `recognizedCharCount`
+- **Classic** (constant auto-scroll) — rewinds `timerWordProgress`
+- **Voice-Activated** (silence-paused auto-scroll) — rewinds `timerWordProgress`