diff --git a/scripts/test_mcp_rapid_init.py b/scripts/test_mcp_rapid_init.py new file mode 100755 index 0000000..fe4f65d --- /dev/null +++ b/scripts/test_mcp_rapid_init.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python3 +"""Integration test for the poll/getline FILE* buffering fix. + +Spawns the MCP server binary, sends initialize + notifications/initialized + +tools/list all at once (no delays), and asserts that the tools/list response +arrives within 5 seconds. + +Usage: + python3 scripts/test_mcp_rapid_init.py [/path/to/binary] + +Exit codes: + 0 - PASS + 1 - FAIL +""" + +import subprocess +import sys +import os + +TIMEOUT_S = 5 + +MESSAGES = ( + b'{"jsonrpc":"2.0","id":1,"method":"initialize",' + b'"params":{"protocolVersion":"2025-11-25","capabilities":{}}}\n' + b'{"jsonrpc":"2.0","method":"notifications/initialized"}\n' + b'{"jsonrpc":"2.0","id":2,"method":"tools/list","params":{}}\n' +) + + +def main(): + if len(sys.argv) >= 2: + binary = sys.argv[1] + else: + # Default: look for build artifact relative to this script's directory + script_dir = os.path.dirname(os.path.abspath(__file__)) + repo_root = os.path.dirname(script_dir) + binary = os.path.join(repo_root, "build", "c", "codebase-memory-mcp") + + if not os.path.isfile(binary): + print(f"FAIL: binary not found at {binary}") + sys.exit(1) + + if not os.access(binary, os.X_OK): + print(f"FAIL: binary not executable: {binary}") + sys.exit(1) + + proc = subprocess.Popen( + [binary], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL, + ) + + try: + # Write all 3 messages in one call and close stdin to signal EOF + stdout_data, _ = proc.communicate(input=MESSAGES, timeout=TIMEOUT_S) + except subprocess.TimeoutExpired: + proc.kill() + proc.wait() + print( + f"FAIL: server did not respond within {TIMEOUT_S}s " + f"(poll/getline buffering bug not fixed)" + ) + sys.exit(1) + + output = stdout_data.decode("utf-8", errors="replace") + + # Expect exactly 2 JSON responses: id:1 (initialize) and id:2 (tools/list). + # notifications/initialized has no id and produces no response. + lines = [ln.strip() for ln in output.splitlines() if ln.strip()] + import json as _json + json_lines = [] + for ln in lines: + try: + json_lines.append(_json.loads(ln)) + except _json.JSONDecodeError: + pass + + ids = {obj.get("id") for obj in json_lines if "id" in obj} + if 1 not in ids: + print("FAIL: missing initialize response (id:1) in server output") + print(f"Server output was:\n{output!r}") + sys.exit(1) + if 2 not in ids: + print("FAIL: missing tools/list response (id:2) in server output") + print(f"Server output was:\n{output!r}") + sys.exit(1) + if "tools" not in output: + print("FAIL: tools/list response body missing 'tools' key") + print(f"Server output was:\n{output!r}") + sys.exit(1) + + print("PASS") + sys.exit(0) + + +if __name__ == "__main__": + main() diff --git a/src/mcp/mcp.c b/src/mcp/mcp.c index bdfdae8..6ccccbb 100644 --- a/src/mcp/mcp.c +++ b/src/mcp/mcp.c @@ -28,6 +28,7 @@ #include #include #include +#include #endif #include #include // int64_t @@ -2375,8 +2376,23 @@ int cbm_mcp_server_run(cbm_mcp_server_t *srv, FILE *in, FILE *out) { for (;;) { /* Poll with idle timeout so we can evict unused stores between requests. - * MCP is request-response (one line at a time), so mixing poll() on the - * raw fd with getline() on the buffered FILE* is safe in practice. */ + * + * IMPORTANT: poll() operates on the raw fd, but getline() reads from a + * buffered FILE*. When a client sends multiple messages in rapid + * succession, the first getline() call may drain ALL kernel data into + * libc's internal FILE* buffer. Subsequent poll() calls then see an + * empty kernel fd and block for STORE_IDLE_TIMEOUT_S seconds even + * though the next messages are already in the FILE* buffer. + * + * Fix (Unix): use a three-phase approach — + * Phase 1: non-blocking poll (timeout=0) to check the kernel fd. + * Phase 2: if Phase 1 returns 0, peek the FILE* buffer via fgetc/ + * ungetc to detect data buffered by a prior getline() call. + * The fd is temporarily set O_NONBLOCK so fgetc() returns + * immediately (EAGAIN → EOF + ferror) instead of blocking + * when the FILE* buffer is empty, which would otherwise + * bypass the Phase 3 idle eviction timeout. + * Phase 3: only if both phases confirm no data, do blocking poll. */ #ifdef _WIN32 /* Windows: WaitForSingleObject on stdin handle */ HANDLE hStdin = (HANDLE)_get_osfhandle(fd); @@ -2389,16 +2405,62 @@ int cbm_mcp_server_run(cbm_mcp_server_t *srv, FILE *in, FILE *out) { continue; } #else + /* Phase 1: non-blocking poll — catches data already in the kernel fd + * AND handles the case where a prior getline() drained the kernel fd + * into libc's FILE* buffer (raw fd appears empty but data is buffered). + * We always try a zero-timeout poll first; if it misses buffered data, + * phase 2 below catches it via an explicit FILE* peek. */ struct pollfd pfd = {.fd = fd, .events = POLLIN}; - int pr = poll(&pfd, 1, STORE_IDLE_TIMEOUT_S * 1000); + int pr = poll(&pfd, 1, 0); /* non-blocking */ if (pr < 0) { break; /* error or signal */ } if (pr == 0) { - /* Timeout — evict idle store to free resources */ - cbm_mcp_server_evict_idle(srv, STORE_IDLE_TIMEOUT_S); - continue; + /* Raw fd appears empty. Check whether libc has already buffered + * data from a previous over-read by peeking one byte via fgetc. + * IMPORTANT: temporarily set O_NONBLOCK so fgetc() returns + * immediately when the FILE* buffer AND kernel fd are both empty + * (EAGAIN → EOF + ferror). Without this, fgetc() on a blocking fd + * would block indefinitely, preventing the Phase 3 idle eviction + * timeout from ever firing. */ + int saved_flags = fcntl(fd, F_GETFL); + if (saved_flags < 0) { + /* fcntl failed (should not happen on a valid fd) — skip the + * FILE* peek and fall straight through to the blocking poll so + * idle eviction still fires on timeout. */ + pr = poll(&pfd, 1, STORE_IDLE_TIMEOUT_S * 1000); + if (pr < 0) { + break; + } + if (pr == 0) { + cbm_mcp_server_evict_idle(srv, STORE_IDLE_TIMEOUT_S); + continue; + } + } else { + (void)fcntl(fd, F_SETFL, saved_flags | O_NONBLOCK); + int c = fgetc(in); + (void)fcntl(fd, F_SETFL, saved_flags); /* restore blocking */ + if (c == EOF) { + if (feof(in)) { + break; /* true EOF */ + } + /* No buffered data (EAGAIN from non-blocking read) — clear + * the ferror indicator set by EAGAIN, then blocking poll. */ + clearerr(in); + pr = poll(&pfd, 1, STORE_IDLE_TIMEOUT_S * 1000); + if (pr < 0) { + break; + } + if (pr == 0) { + cbm_mcp_server_evict_idle(srv, STORE_IDLE_TIMEOUT_S); + continue; + } + } else { + /* Buffered data found — push back and fall through to getline */ + (void)ungetc(c, in); + } + } } #endif diff --git a/tests/test_mcp.c b/tests/test_mcp.c index a634127..bd07cba 100644 --- a/tests/test_mcp.c +++ b/tests/test_mcp.c @@ -1215,6 +1215,83 @@ TEST(snippet_include_neighbors_enabled) { PASS(); } +/* ══════════════════════════════════════════════════════════════════ + * POLL/GETLINE FILE* BUFFERING FIX + * ══════════════════════════════════════════════════════════════════ */ + +#ifndef _WIN32 +#include +#include + +/* Signal handler used by alarm() to abort the test if it hangs */ +static void alarm_handler(int sig) { + (void)sig; + /* Writing to stderr is async-signal-safe */ + const char msg[] = "FAIL: mcp_server_run_rapid_messages timed out (>5s)\n"; + write(STDERR_FILENO, msg, sizeof(msg) - 1); + _exit(1); +} + +TEST(mcp_server_run_rapid_messages) { + /* Simulate a client sending initialize + notifications/initialized + + * tools/list all at once (no delays), which exercises the FILE* + * buffering fix: the first getline() over-reads kernel data into the + * libc buffer; without the fix, subsequent poll() calls block for 60s. + * + * We use alarm(5) to abort the test process if the server hangs. */ + int fds[2]; + ASSERT_EQ(pipe(fds), 0); + + /* Write all 3 messages to the write end in one shot */ + const char *msgs = + "{\"jsonrpc\":\"2.0\",\"id\":1,\"method\":\"initialize\"," + "\"params\":{\"protocolVersion\":\"2025-11-25\",\"capabilities\":{}}}\n" + "{\"jsonrpc\":\"2.0\",\"method\":\"notifications/initialized\"}\n" + "{\"jsonrpc\":\"2.0\",\"id\":2,\"method\":\"tools/list\",\"params\":{}}\n"; + ssize_t written = write(fds[1], msgs, strlen(msgs)); + ASSERT_TRUE(written > 0); + close(fds[1]); /* EOF signals end of input to the server */ + + FILE *in_fp = fdopen(fds[0], "r"); + ASSERT_NOT_NULL(in_fp); + + FILE *out_fp = tmpfile(); + ASSERT_NOT_NULL(out_fp); + + cbm_mcp_server_t *srv = cbm_mcp_server_new(NULL); + ASSERT_NOT_NULL(srv); + + /* Install alarm to fail the test if cbm_mcp_server_run blocks */ + signal(SIGALRM, alarm_handler); + alarm(5); + + int rc = cbm_mcp_server_run(srv, in_fp, out_fp); + + alarm(0); /* cancel alarm */ + signal(SIGALRM, SIG_DFL); + + ASSERT_EQ(rc, 0); + + /* Verify both responses are present: + * id:1 — initialize response + * id:2 — tools/list response (notifications/initialized produces none) + * and that the tools list payload is included. */ + rewind(out_fp); + char buf[4096] = {0}; + size_t nread = fread(buf, 1, sizeof(buf) - 1, out_fp); + ASSERT_TRUE(nread > 0); + ASSERT_NOT_NULL(strstr(buf, "\"id\":1")); + ASSERT_NOT_NULL(strstr(buf, "\"id\":2")); + ASSERT_NOT_NULL(strstr(buf, "tools")); + + cbm_mcp_server_free(srv); + fclose(out_fp); + /* in_fp already EOF; fclose cleans up */ + fclose(in_fp); + PASS(); +} +#endif /* !_WIN32 */ + /* ══════════════════════════════════════════════════════════════════ * SUITE * ══════════════════════════════════════════════════════════════════ */ @@ -1287,6 +1364,11 @@ SUITE(mcp) { RUN_TEST(parse_file_uri_windows); RUN_TEST(parse_file_uri_invalid); + /* Poll/getline FILE* buffering fix */ +#ifndef _WIN32 + RUN_TEST(mcp_server_run_rapid_messages); +#endif + /* Snippet resolution (port of snippet_test.go) */ RUN_TEST(snippet_exact_qn); RUN_TEST(snippet_qn_suffix);