From 72986a847eacfc0cec7834a7d44e9ba9f0a17fa1 Mon Sep 17 00:00:00 2001 From: Ravish Date: Wed, 8 Apr 2026 17:37:40 -0700 Subject: [PATCH 1/2] mcp: log out-of-band errors instead of dropping them Two errors that occur outside the request/response path were previously unobservable: - Keepalive ping failures (mcp/shared.go startKeepalive) silently closed the session with no record of why. - jsonrpc2 internal errors (mcp/transport.go connect) were printed via the global log.Printf, bypassing the configured slog.Logger. Both sites now report through the existing *slog.Logger that Server and Client already guarantee non-nil via ensureLogger. No new public API surface; the logger is threaded into the unexported helpers as a parameter. Includes a regression test (TestKeepAliveFailure_Logged) that asserts the keepalive failure produces a log line on the configured logger, and was verified to fail when the new log call is removed. Per @jba's suggestion on #865, this is the smaller no-new-API alternative to the ErrorHandler callback proposal. #865 remains open pending community input on whether a structured callback is also wanted. Refs #218 --- mcp/client.go | 4 +-- mcp/mcp_test.go | 64 ++++++++++++++++++++++++++++++++++++++++++++++++ mcp/server.go | 4 +-- mcp/shared.go | 9 +++++-- mcp/transport.go | 10 +++++--- 5 files changed, 82 insertions(+), 9 deletions(-) diff --git a/mcp/client.go b/mcp/client.go index 4860b62e..c82e189d 100644 --- a/mcp/client.go +++ b/mcp/client.go @@ -253,7 +253,7 @@ func (c *Client) capabilities(protocolVersion string) *ClientCapabilities { // server, calls or notifications will return an error wrapping // [ErrConnectionClosed]. func (c *Client) Connect(ctx context.Context, t Transport, opts *ClientSessionOptions) (cs *ClientSession, err error) { - cs, err = connect(ctx, t, c, (*clientSessionState)(nil), nil) + cs, err = connect(ctx, t, c, (*clientSessionState)(nil), nil, c.opts.Logger) if err != nil { return nil, err } @@ -405,7 +405,7 @@ func (cs *ClientSession) registerElicitationWaiter(elicitationID string) (await // startKeepalive starts the keepalive mechanism for this client session. func (cs *ClientSession) startKeepalive(interval time.Duration) { - startKeepalive(cs, interval, &cs.keepaliveCancel) + startKeepalive(cs, interval, &cs.keepaliveCancel, cs.client.opts.Logger) } // AddRoots adds the given roots to the client, diff --git a/mcp/mcp_test.go b/mcp/mcp_test.go index 00c4dc95..9af0aa1b 100644 --- a/mcp/mcp_test.go +++ b/mcp/mcp_test.go @@ -1817,6 +1817,70 @@ func TestKeepAliveFailure(t *testing.T) { }) } +// TestKeepAliveFailure_Logged verifies that a keepalive ping failure is +// reported via the configured slog.Logger instead of being silently dropped. +// Regression test for #218. +func TestKeepAliveFailure_Logged(t *testing.T) { + synctest.Test(t, func(t *testing.T) { + ctx := context.Background() + + ct, st := NewInMemoryTransports() + + // Server without keepalive. + s := NewServer(testImpl, nil) + AddTool(s, greetTool(), sayHi) + ss, err := s.Connect(ctx, st, nil) + if err != nil { + t.Fatal(err) + } + + // Client with a short keepalive and a logger that writes to a + // concurrency-safe buffer so we can assert on its output. + buf := &syncBuffer{} + clientOpts := &ClientOptions{ + KeepAlive: 50 * time.Millisecond, + Logger: slog.New(slog.NewTextHandler(buf, &slog.HandlerOptions{Level: slog.LevelError})), + } + c := NewClient(testImpl, clientOpts) + cs, err := c.Connect(ctx, ct, nil) + if err != nil { + t.Fatal(err) + } + defer cs.Close() + + synctest.Wait() + + // Trigger a ping failure by closing the server side. + ss.Close() + + time.Sleep(100 * time.Millisecond) + synctest.Wait() + + got := buf.String() + if !strings.Contains(got, "keepalive ping failed") { + t.Errorf("expected keepalive failure to be logged, got log output:\n%s", got) + } + }) +} + +// syncBuffer is a goroutine-safe bytes.Buffer for capturing slog output in tests. +type syncBuffer struct { + mu sync.Mutex + buf bytes.Buffer +} + +func (b *syncBuffer) Write(p []byte) (int, error) { + b.mu.Lock() + defer b.mu.Unlock() + return b.buf.Write(p) +} + +func (b *syncBuffer) String() string { + b.mu.Lock() + defer b.mu.Unlock() + return b.buf.String() +} + func TestAddTool_DuplicateNoPanicAndNoDuplicate(t *testing.T) { // Adding the same tool pointer twice should not panic and should not // produce duplicates in the server's tool list. diff --git a/mcp/server.go b/mcp/server.go index 2357959c..28504376 100644 --- a/mcp/server.go +++ b/mcp/server.go @@ -1026,7 +1026,7 @@ func (s *Server) Connect(ctx context.Context, t Transport, opts *ServerSessionOp } s.opts.Logger.Info("server connecting") - ss, err := connect(ctx, t, s, state, onClose) + ss, err := connect(ctx, t, s, state, onClose, s.opts.Logger) if err != nil { s.opts.Logger.Error("server connect error", "error", err) return nil, err @@ -1531,7 +1531,7 @@ func (ss *ServerSession) Wait() error { // startKeepalive starts the keepalive mechanism for this server session. func (ss *ServerSession) startKeepalive(interval time.Duration) { - startKeepalive(ss, interval, &ss.keepaliveCancel) + startKeepalive(ss, interval, &ss.keepaliveCancel, ss.server.opts.Logger) } // pageToken is the internal structure for the opaque pagination cursor. diff --git a/mcp/shared.go b/mcp/shared.go index 5662b121..05397a14 100644 --- a/mcp/shared.go +++ b/mcp/shared.go @@ -582,7 +582,10 @@ type keepaliveSession interface { // startKeepalive starts the keepalive mechanism for a session. // It assigns the cancel function to the provided cancelPtr and starts a goroutine // that sends ping messages at the specified interval. -func startKeepalive(session keepaliveSession, interval time.Duration, cancelPtr *context.CancelFunc) { +// +// logger must be non-nil; ping failures (which terminate the keepalive loop and +// close the session) are reported via logger so they are not silently dropped. +func startKeepalive(session keepaliveSession, interval time.Duration, cancelPtr *context.CancelFunc, logger *slog.Logger) { ctx, cancel := context.WithCancel(context.Background()) // Assign cancel function before starting goroutine to avoid race condition. // We cannot return it because the caller may need to cancel during the @@ -602,7 +605,9 @@ func startKeepalive(session keepaliveSession, interval time.Duration, cancelPtr err := session.Ping(pingCtx, nil) pingCancel() if err != nil { - // Ping failed, close the session + // Ping failed; log it before closing the session so the + // failure is observable to operators. See #218. + logger.Error("keepalive ping failed; closing session", "error", err) _ = session.Close() return } diff --git a/mcp/transport.go b/mcp/transport.go index 23dccf8e..9288b372 100644 --- a/mcp/transport.go +++ b/mcp/transport.go @@ -10,7 +10,7 @@ import ( "errors" "fmt" "io" - "log" + "log/slog" "net" "os" "sync" @@ -152,7 +152,9 @@ type handler interface { handle(ctx context.Context, req *jsonrpc.Request) (any, error) } -func connect[H handler, State any](ctx context.Context, t Transport, b binder[H, State], s State, onClose func()) (H, error) { +// connect wires a transport to a binder. logger must be non-nil; it receives +// jsonrpc2 internal errors that would otherwise be dropped (see #218). +func connect[H handler, State any](ctx context.Context, t Transport, b binder[H, State], s State, onClose func(), logger *slog.Logger) (H, error) { var zero H mcpConn, err := t.Connect(ctx) if err != nil { @@ -178,7 +180,9 @@ func connect[H handler, State any](ctx context.Context, t Transport, b binder[H, OnDone: func() { b.disconnect(h) }, - OnInternalError: func(err error) { log.Printf("jsonrpc2 error: %v", err) }, + OnInternalError: func(err error) { + logger.Error("jsonrpc2 internal error", "error", err) + }, }) assert(preempter.conn != nil, "unbound preempter") return h, nil From 0586871684df5515cbfeb9f0945bb6dfcff7db53 Mon Sep 17 00:00:00 2001 From: Ravish Date: Fri, 10 Apr 2026 12:03:38 -0700 Subject: [PATCH 2/2] mcp: drop syncBuffer wrapper in test; use plain bytes.Buffer slog already serializes Write calls internally, so the mutex wrapper is unnecessary. --- mcp/mcp_test.go | 26 ++++---------------------- 1 file changed, 4 insertions(+), 22 deletions(-) diff --git a/mcp/mcp_test.go b/mcp/mcp_test.go index 9af0aa1b..5df8b38a 100644 --- a/mcp/mcp_test.go +++ b/mcp/mcp_test.go @@ -1835,11 +1835,11 @@ func TestKeepAliveFailure_Logged(t *testing.T) { } // Client with a short keepalive and a logger that writes to a - // concurrency-safe buffer so we can assert on its output. - buf := &syncBuffer{} + // buffer so we can assert on its output. + var buf bytes.Buffer clientOpts := &ClientOptions{ KeepAlive: 50 * time.Millisecond, - Logger: slog.New(slog.NewTextHandler(buf, &slog.HandlerOptions{Level: slog.LevelError})), + Logger: slog.New(slog.NewTextHandler(&buf, &slog.HandlerOptions{Level: slog.LevelError})), } c := NewClient(testImpl, clientOpts) cs, err := c.Connect(ctx, ct, nil) @@ -1856,31 +1856,13 @@ func TestKeepAliveFailure_Logged(t *testing.T) { time.Sleep(100 * time.Millisecond) synctest.Wait() - got := buf.String() + got := buf.String() // slog serializes Write calls internally if !strings.Contains(got, "keepalive ping failed") { t.Errorf("expected keepalive failure to be logged, got log output:\n%s", got) } }) } -// syncBuffer is a goroutine-safe bytes.Buffer for capturing slog output in tests. -type syncBuffer struct { - mu sync.Mutex - buf bytes.Buffer -} - -func (b *syncBuffer) Write(p []byte) (int, error) { - b.mu.Lock() - defer b.mu.Unlock() - return b.buf.Write(p) -} - -func (b *syncBuffer) String() string { - b.mu.Lock() - defer b.mu.Unlock() - return b.buf.String() -} - func TestAddTool_DuplicateNoPanicAndNoDuplicate(t *testing.T) { // Adding the same tool pointer twice should not panic and should not // produce duplicates in the server's tool list.