Skip to content

Commit 4c00594

Browse files
ravygmaciej-kisiel
andauthored
mcp: log out-of-band errors instead of dropping them (#887)
## Summary Two error sites in the MCP runtime were previously unobservable: - **`mcp/shared.go` `startKeepalive`** — when a keepalive ping failed, the session was closed silently with no indication of why. - **`mcp/transport.go` `connect`** — `jsonrpc2.OnInternalError` printed via the package-level `log.Printf`, bypassing the `*slog.Logger` configured on `ServerOptions`/`ClientOptions`. Both sites now report through the existing `*slog.Logger` that `Server` and `Client` already guarantee non-nil via `ensureLogger`. The logger is threaded into the unexported `startKeepalive` and `connect` helpers as a parameter — **no new public API surface**. ## Context Per @jba's [suggestion on #865](#865 (comment)): > Maybe we can just use the logger in all the places we currently drop the error. Then it's still reported, but we don't have to add any API. This PR is that smaller alternative. It addresses the concrete observability gap in #218 without expanding `ServerOptions`. PR #865 (the `ErrorHandler` callback proposal) remains open pending community input on whether a structured callback API is also wanted on top of logging. `notifySessions` already logs at `Warn` level on delivery errors and is intentionally left untouched. ## Test plan - [x] New regression test `TestKeepAliveFailure_Logged` asserts the keepalive failure produces a log line on the configured logger. - [x] Verified the test fails when the new `logger.Error(...)` call is removed (sanity check that it pins the behavior). - [x] `go test ./mcp/... -count=1` passes. - [x] `go vet ./...` clean. - [x] `go build ./...` clean. Refs #218 --------- Co-authored-by: Maciej Kisiel <mkisiel@google.com>
1 parent 15e93a2 commit 4c00594

File tree

5 files changed

+64
-9
lines changed

5 files changed

+64
-9
lines changed

mcp/client.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@ func (c *Client) capabilities(protocolVersion string) *ClientCapabilities {
253253
// server, calls or notifications will return an error wrapping
254254
// [ErrConnectionClosed].
255255
func (c *Client) Connect(ctx context.Context, t Transport, opts *ClientSessionOptions) (cs *ClientSession, err error) {
256-
cs, err = connect(ctx, t, c, (*clientSessionState)(nil), nil)
256+
cs, err = connect(ctx, t, c, (*clientSessionState)(nil), nil, c.opts.Logger)
257257
if err != nil {
258258
return nil, err
259259
}
@@ -405,7 +405,7 @@ func (cs *ClientSession) registerElicitationWaiter(elicitationID string) (await
405405

406406
// startKeepalive starts the keepalive mechanism for this client session.
407407
func (cs *ClientSession) startKeepalive(interval time.Duration) {
408-
startKeepalive(cs, interval, &cs.keepaliveCancel)
408+
startKeepalive(cs, interval, &cs.keepaliveCancel, cs.client.opts.Logger)
409409
}
410410

411411
// AddRoots adds the given roots to the client,

mcp/mcp_test.go

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1817,6 +1817,52 @@ func TestKeepAliveFailure(t *testing.T) {
18171817
})
18181818
}
18191819

1820+
// TestKeepAliveFailure_Logged verifies that a keepalive ping failure is
1821+
// reported via the configured slog.Logger instead of being silently dropped.
1822+
// Regression test for #218.
1823+
func TestKeepAliveFailure_Logged(t *testing.T) {
1824+
synctest.Test(t, func(t *testing.T) {
1825+
ctx := context.Background()
1826+
1827+
ct, st := NewInMemoryTransports()
1828+
1829+
// Server without keepalive.
1830+
s := NewServer(testImpl, nil)
1831+
AddTool(s, greetTool(), sayHi)
1832+
ss, err := s.Connect(ctx, st, nil)
1833+
if err != nil {
1834+
t.Fatal(err)
1835+
}
1836+
1837+
// Client with a short keepalive and a logger that writes to a
1838+
// buffer so we can assert on its output.
1839+
var buf bytes.Buffer
1840+
clientOpts := &ClientOptions{
1841+
KeepAlive: 50 * time.Millisecond,
1842+
Logger: slog.New(slog.NewTextHandler(&buf, &slog.HandlerOptions{Level: slog.LevelError})),
1843+
}
1844+
c := NewClient(testImpl, clientOpts)
1845+
cs, err := c.Connect(ctx, ct, nil)
1846+
if err != nil {
1847+
t.Fatal(err)
1848+
}
1849+
defer cs.Close()
1850+
1851+
synctest.Wait()
1852+
1853+
// Trigger a ping failure by closing the server side.
1854+
ss.Close()
1855+
1856+
time.Sleep(100 * time.Millisecond)
1857+
synctest.Wait()
1858+
1859+
got := buf.String() // slog serializes Write calls internally
1860+
if !strings.Contains(got, "keepalive ping failed") {
1861+
t.Errorf("expected keepalive failure to be logged, got log output:\n%s", got)
1862+
}
1863+
})
1864+
}
1865+
18201866
func TestAddTool_DuplicateNoPanicAndNoDuplicate(t *testing.T) {
18211867
// Adding the same tool pointer twice should not panic and should not
18221868
// produce duplicates in the server's tool list.

mcp/server.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1026,7 +1026,7 @@ func (s *Server) Connect(ctx context.Context, t Transport, opts *ServerSessionOp
10261026
}
10271027

10281028
s.opts.Logger.Info("server connecting")
1029-
ss, err := connect(ctx, t, s, state, onClose)
1029+
ss, err := connect(ctx, t, s, state, onClose, s.opts.Logger)
10301030
if err != nil {
10311031
s.opts.Logger.Error("server connect error", "error", err)
10321032
return nil, err
@@ -1531,7 +1531,7 @@ func (ss *ServerSession) Wait() error {
15311531

15321532
// startKeepalive starts the keepalive mechanism for this server session.
15331533
func (ss *ServerSession) startKeepalive(interval time.Duration) {
1534-
startKeepalive(ss, interval, &ss.keepaliveCancel)
1534+
startKeepalive(ss, interval, &ss.keepaliveCancel, ss.server.opts.Logger)
15351535
}
15361536

15371537
// pageToken is the internal structure for the opaque pagination cursor.

mcp/shared.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -582,7 +582,10 @@ type keepaliveSession interface {
582582
// startKeepalive starts the keepalive mechanism for a session.
583583
// It assigns the cancel function to the provided cancelPtr and starts a goroutine
584584
// that sends ping messages at the specified interval.
585-
func startKeepalive(session keepaliveSession, interval time.Duration, cancelPtr *context.CancelFunc) {
585+
//
586+
// logger must be non-nil; ping failures (which terminate the keepalive loop and
587+
// close the session) are reported via logger so they are not silently dropped.
588+
func startKeepalive(session keepaliveSession, interval time.Duration, cancelPtr *context.CancelFunc, logger *slog.Logger) {
586589
ctx, cancel := context.WithCancel(context.Background())
587590
// Assign cancel function before starting goroutine to avoid race condition.
588591
// We cannot return it because the caller may need to cancel during the
@@ -602,7 +605,9 @@ func startKeepalive(session keepaliveSession, interval time.Duration, cancelPtr
602605
err := session.Ping(pingCtx, nil)
603606
pingCancel()
604607
if err != nil {
605-
// Ping failed, close the session
608+
// Ping failed; log it before closing the session so the
609+
// failure is observable to operators. See #218.
610+
logger.Error("keepalive ping failed; closing session", "error", err)
606611
_ = session.Close()
607612
return
608613
}

mcp/transport.go

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import (
1010
"errors"
1111
"fmt"
1212
"io"
13-
"log"
13+
"log/slog"
1414
"net"
1515
"os"
1616
"sync"
@@ -152,7 +152,9 @@ type handler interface {
152152
handle(ctx context.Context, req *jsonrpc.Request) (any, error)
153153
}
154154

155-
func connect[H handler, State any](ctx context.Context, t Transport, b binder[H, State], s State, onClose func()) (H, error) {
155+
// connect wires a transport to a binder. logger must be non-nil; it receives
156+
// jsonrpc2 internal errors that would otherwise be dropped (see #218).
157+
func connect[H handler, State any](ctx context.Context, t Transport, b binder[H, State], s State, onClose func(), logger *slog.Logger) (H, error) {
156158
var zero H
157159
mcpConn, err := t.Connect(ctx)
158160
if err != nil {
@@ -178,7 +180,9 @@ func connect[H handler, State any](ctx context.Context, t Transport, b binder[H,
178180
OnDone: func() {
179181
b.disconnect(h)
180182
},
181-
OnInternalError: func(err error) { log.Printf("jsonrpc2 error: %v", err) },
183+
OnInternalError: func(err error) {
184+
logger.Error("jsonrpc2 internal error", "error", err)
185+
},
182186
})
183187
assert(preempter.conn != nil, "unbound preempter")
184188
return h, nil

0 commit comments

Comments
 (0)