Skip to content

Commit bdbaf1c

Browse files
feat: grace period and force-kill for background worker shutdown
On restart/shutdown, background workers receive "stop\n" on the signaling stream and have 5 seconds to exit gracefully. After the grace period, stuck threads get a best-effort force-kill by arming PHP's own max_execution_time timer cross-thread (Linux ZTS only, via timer_settime on EG(max_execution_timer_timer)). This triggers a "Maximum execution time exceeded" fatal error on the stuck thread. On other platforms, stuck threads are abandoned and exit when the blocking call eventually returns.
1 parent 87364fe commit bdbaf1c

5 files changed

Lines changed: 171 additions & 14 deletions

File tree

docs/background-workers.md

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,15 +36,15 @@ example.com {
3636

3737
- `num` and `max_threads` are accepted but capped at 1 for now (pooling is a future feature). Values > 1 are rejected with a clear error.
3838
- `max_threads` on catch-all workers sets a safety cap for lazy-started instances (defaults to 16).
39-
- `max_consecutive_failures` defaults to -1 (never panic on boot failures).
39+
- `max_consecutive_failures` defaults to 6 (same as HTTP workers).
4040
- `env` and `watch` work the same as HTTP workers.
4141

4242
### Thread reservation
4343

4444
Background workers get dedicated thread slots outside the global `max_threads` budget.
4545
They don't compete with HTTP auto-scaling. For catch-all workers, `max_threads` determines
46-
the reservation (default 16). Named workers with `num 0` (default) are lazy-started and
47-
don't reserve threads.
46+
the reservation (default 16). Named workers with `num 0` (default) are lazy-started but
47+
still reserve 1 thread (`max_threads` defaults to `max(num, 1)`).
4848

4949
Each `php_server` block has its own isolated background worker scope.
5050

@@ -199,7 +199,8 @@ if (function_exists('frankenphp_worker_get_vars')) {
199199
- Background workers also get `$_SERVER['argv']` = `[entrypoint, name]` for CLI compatibility
200200
- Crash recovery: automatic restart with exponential backoff
201201
- Graceful shutdown via `frankenphp_worker_get_signaling_stream()` and `stream_select()`
202-
- Worker restarts stop running background workers; the next `get_vars()` call starts them again
202+
- Grace period: on restart/shutdown, background workers receive `"stop\n"` on the signaling stream and have 5 seconds to exit gracefully. Workers still blocked after 5 seconds are abandoned (their threads exit when the blocking call returns).
203+
- Worker restarts stop and immediately restart background workers (same as HTTP workers)
203204
- Use `error_log()` or `frankenphp_log()` for logging - avoid `echo`
204205

205206
For advanced use cases (amphp, ReactPHP), the signaling stream can be registered directly

frankenphp.c

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,101 @@ __thread int worker_stop_fds[2] = {-1, -1};
9393
__thread php_stream *worker_signaling_stream = NULL;
9494
__thread HashTable *sandboxed_env = NULL;
9595

96+
/* Best-effort force-kill for stuck background workers after grace period.
97+
* - Linux ZTS: arm PHP's per-thread timer -> "max execution time" fatal
98+
* - Windows: CancelSynchronousIo + QueueUserAPC -> interrupts I/O and sleeps
99+
* - macOS/other: no-op (threads abandoned, exit when blocking call returns) */
100+
static int force_kill_num_threads = 0;
101+
#ifdef ZEND_MAX_EXECUTION_TIMERS
102+
static timer_t *thread_php_timers = NULL;
103+
static bool *thread_php_timer_saved = NULL;
104+
#elif defined(PHP_WIN32)
105+
static HANDLE *thread_handles = NULL;
106+
static bool *thread_handle_saved = NULL;
107+
static void CALLBACK frankenphp_noop_apc(ULONG_PTR param) { (void)param; }
108+
#endif
109+
110+
void frankenphp_init_force_kill(int num_threads) {
111+
force_kill_num_threads = num_threads;
112+
#ifdef ZEND_MAX_EXECUTION_TIMERS
113+
thread_php_timers = calloc(num_threads, sizeof(timer_t));
114+
thread_php_timer_saved = calloc(num_threads, sizeof(bool));
115+
#elif defined(PHP_WIN32)
116+
thread_handles = calloc(num_threads, sizeof(HANDLE));
117+
thread_handle_saved = calloc(num_threads, sizeof(bool));
118+
#endif
119+
}
120+
121+
void frankenphp_save_php_timer(uintptr_t idx) {
122+
if (idx >= (uintptr_t)force_kill_num_threads) {
123+
return;
124+
}
125+
#ifdef ZEND_MAX_EXECUTION_TIMERS
126+
if (thread_php_timers && EG(pid)) {
127+
thread_php_timers[idx] = EG(max_execution_timer_timer);
128+
thread_php_timer_saved[idx] = true;
129+
}
130+
#elif defined(PHP_WIN32)
131+
if (thread_handles) {
132+
DuplicateHandle(GetCurrentProcess(), GetCurrentThread(),
133+
GetCurrentProcess(), &thread_handles[idx], 0, FALSE,
134+
DUPLICATE_SAME_ACCESS);
135+
thread_handle_saved[idx] = true;
136+
}
137+
#endif
138+
(void)idx;
139+
}
140+
141+
void frankenphp_force_kill_thread(uintptr_t idx) {
142+
if (idx >= (uintptr_t)force_kill_num_threads) {
143+
return;
144+
}
145+
#ifdef ZEND_MAX_EXECUTION_TIMERS
146+
if (thread_php_timers && thread_php_timer_saved[idx]) {
147+
struct itimerspec its;
148+
its.it_value.tv_sec = 0;
149+
its.it_value.tv_nsec = 1;
150+
its.it_interval.tv_sec = 0;
151+
its.it_interval.tv_nsec = 0;
152+
timer_settime(thread_php_timers[idx], 0, &its, NULL);
153+
}
154+
#elif defined(PHP_WIN32)
155+
if (thread_handles && thread_handle_saved[idx]) {
156+
CancelSynchronousIo(thread_handles[idx]);
157+
QueueUserAPC((PAPCFUNC)frankenphp_noop_apc, thread_handles[idx], 0);
158+
}
159+
#endif
160+
(void)idx;
161+
}
162+
163+
void frankenphp_destroy_force_kill(void) {
164+
#ifdef ZEND_MAX_EXECUTION_TIMERS
165+
if (thread_php_timers) {
166+
free(thread_php_timers);
167+
thread_php_timers = NULL;
168+
}
169+
if (thread_php_timer_saved) {
170+
free(thread_php_timer_saved);
171+
thread_php_timer_saved = NULL;
172+
}
173+
#elif defined(PHP_WIN32)
174+
if (thread_handles) {
175+
for (int i = 0; i < force_kill_num_threads; i++) {
176+
if (thread_handle_saved && thread_handle_saved[i]) {
177+
CloseHandle(thread_handles[i]);
178+
}
179+
}
180+
free(thread_handles);
181+
thread_handles = NULL;
182+
}
183+
if (thread_handle_saved) {
184+
free(thread_handle_saved);
185+
thread_handle_saved = NULL;
186+
}
187+
#endif
188+
force_kill_num_threads = 0;
189+
}
190+
96191
/* Per-thread cache for get_vars results.
97192
* Maps worker name (string) -> {version, cached_zval}.
98193
* When the version matches, the cached zval is returned with a refcount bump,
@@ -1673,6 +1768,9 @@ static void *php_thread(void *arg) {
16731768
#endif
16741769
#endif
16751770

1771+
/* Save PHP's timer handle for best-effort force-kill after grace period */
1772+
frankenphp_save_php_timer(thread_index);
1773+
16761774
// loop until Go signals to stop
16771775
char *scriptName = NULL;
16781776
while ((scriptName = go_frankenphp_before_script_execution(thread_index))) {

frankenphp.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,4 +195,9 @@ int frankenphp_get_current_memory_limit();
195195

196196
void register_extensions(zend_module_entry **m, int len);
197197

198+
void frankenphp_init_force_kill(int num_threads);
199+
void frankenphp_save_php_timer(uintptr_t thread_index);
200+
void frankenphp_force_kill_thread(uintptr_t thread_index);
201+
void frankenphp_destroy_force_kill(void);
202+
198203
#endif

phpmainthread.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,9 @@ func initPHPThreads(numThreads int, numMaxThreads int, phpIni map[string]string)
5454
return nil, err
5555
}
5656

57+
// initialize force-kill support for stuck background workers
58+
C.frankenphp_init_force_kill(C.int(mainThread.maxThreads))
59+
5760
// initialize all other threads
5861
phpThreads = make([]*phpThread, mainThread.maxThreads)
5962
phpThreads[0] = initialThread
@@ -95,6 +98,7 @@ func drainPHPThreads() {
9598
}
9699

97100
doneWG.Wait()
101+
C.frankenphp_destroy_force_kill()
98102
mainThread.state.Set(state.Done)
99103
mainThread.state.WaitFor(state.Reserved)
100104
phpThreads = nil

worker.go

Lines changed: 59 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@ import (
1616
"github.com/dunglas/frankenphp/internal/state"
1717
)
1818

19+
// backgroundWorkerGracePeriod is the time background workers have to stop
20+
// gracefully after receiving the stop signal before being force-killed.
21+
const backgroundWorkerGracePeriod = 5 * time.Second
22+
1923
// represents a worker script and can have many threads assigned to it
2024
type worker struct {
2125
mercureContext
@@ -183,9 +187,10 @@ func DrainWorkers() {
183187

184188
func drainWorkerThreads() []*phpThread {
185189
var (
186-
ready sync.WaitGroup
187-
drainedThreads []*phpThread
188-
stoppedBackgroundWorkers []*worker
190+
ready sync.WaitGroup
191+
drainedThreads []*phpThread
192+
bgThreads []*phpThread
193+
bgWorkers []*worker
189194
)
190195

191196
for _, worker := range workers {
@@ -195,14 +200,20 @@ func drainWorkerThreads() []*phpThread {
195200

196201
for _, thread := range threads {
197202
if worker.isBackgroundWorker {
198-
thread.shutdown()
199-
stoppedBackgroundWorkers = append(stoppedBackgroundWorkers, worker)
203+
// Signal background workers to stop via the signaling stream
204+
if !thread.state.RequestSafeStateChange(state.ShuttingDown) {
205+
continue
206+
}
207+
if fd := worker.backgroundStopFdWrite.Load(); fd >= 0 {
208+
C.frankenphp_worker_write_stop_fd(C.int(fd))
209+
}
210+
close(thread.drainChan)
211+
bgThreads = append(bgThreads, thread)
212+
bgWorkers = append(bgWorkers, worker)
200213
continue
201214
}
202215

203216
if !thread.state.RequestSafeStateChange(state.Restarting) {
204-
// no state change allowed == thread is shutting down
205-
// we'll proceed to restart all other threads anyway
206217
continue
207218
}
208219

@@ -219,9 +230,39 @@ func drainWorkerThreads() []*phpThread {
219230

220231
ready.Wait()
221232

222-
if len(stoppedBackgroundWorkers) > 0 {
223-
stopped := make(map[*worker]struct{}, len(stoppedBackgroundWorkers))
224-
for _, w := range stoppedBackgroundWorkers {
233+
// Wait for background workers with a grace period.
234+
// Well-written workers check the signaling stream and stop promptly.
235+
// Stuck workers (e.g., blocking C calls) are abandoned after the timeout;
236+
// new threads are created on restart, and the old thread exits when the
237+
// blocking call eventually returns.
238+
if len(bgThreads) > 0 {
239+
bgDone := make(chan struct{})
240+
go func() {
241+
for _, thread := range bgThreads {
242+
thread.state.WaitFor(state.Done)
243+
}
244+
close(bgDone)
245+
}()
246+
247+
select {
248+
case <-bgDone:
249+
// all stopped gracefully
250+
case <-time.After(backgroundWorkerGracePeriod):
251+
// Best-effort force-kill: arm PHP's max_execution_time timer on
252+
// stuck threads. Linux ZTS: arms PHP's timer. Windows: interrupts
253+
// I/O and alertable waits. Other platforms: no-op.
254+
// Safe because after 5s, stuck threads are guaranteed to be in C code.
255+
for _, thread := range bgThreads {
256+
if !thread.state.Is(state.Done) {
257+
C.frankenphp_force_kill_thread(C.uintptr_t(thread.threadIndex))
258+
}
259+
}
260+
globalLogger.Warn("background workers did not stop within grace period, force-killing stuck threads")
261+
}
262+
263+
// Clean up registry entries for stopped workers
264+
stopped := make(map[*worker]struct{}, len(bgWorkers))
265+
for _, w := range bgWorkers {
225266
if w.backgroundRegistry != nil && w.backgroundWorker != nil {
226267
w.backgroundRegistry.remove(w.name, w.backgroundWorker)
227268
}
@@ -234,6 +275,14 @@ func drainWorkerThreads() []*phpThread {
234275
}
235276
}
236277
workers = filtered
278+
279+
// Reset drained background threads for restart
280+
for _, thread := range bgThreads {
281+
thread.drainChan = make(chan struct{})
282+
if mainThread.state.Is(state.Ready) {
283+
thread.state.Set(state.Reserved)
284+
}
285+
}
237286
}
238287

239288
return drainedThreads

0 commit comments

Comments
 (0)