Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 26 additions & 8 deletions pkg/sentry/syscalls/linux/sys_poll.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,12 @@ package linux
import (
"fmt"
"time"
"unsafe"

"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/marshal/primitive"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/ktime"
Expand All @@ -34,6 +36,15 @@ import (
// unrecoverable.
const fileCap = 1024 * 1024

var (
// sizeofPollFD is the size of linux.PollFD struct in bytes.
sizeofPollFD = (*linux.PollFD)(nil).SizeBytes()

// reventsOffsetInPollFD is the byte offset of the REvents field within
// linux.PollFD.
reventsOffsetInPollFD = int(unsafe.Offsetof(linux.PollFD{}.REvents))
)

// Masks for "readable", "writable", and "exceptional" events as defined by
// select(2).
const (
Expand Down Expand Up @@ -184,21 +195,28 @@ func doPoll(t *kernel.Task, addr hostarch.Addr, nfds uint, timeout time.Duration
return timeout, 0, err
}

// Compatibility warning: Linux adds POLLHUP and POLLERR just before
// polling, in fs/select.c:do_pollfd(). Since pfd is copied out after
// polling, changing event masks here is an application-visible difference.
// (Linux also doesn't copy out event masks at all, only revents.)
Comment thread
ayushr2 marked this conversation as resolved.
// Linux adds POLLHUP and POLLERR just before polling, in
// fs/select.c:do_pollfd(). We can modify pfd[i].Events because
// it is not copied out after polling (consistent with Linux).
for i := range pfd {
pfd[i].Events |= linux.POLLHUP | linux.POLLERR
}
remainingTimeout, n, err := pollBlock(t, pfd, timeout)
err = linuxerr.ConvertIntr(err, linuxerr.EINTR)

// The poll entries are copied out regardless of whether
// any are set or not. This aligns with the Linux behavior.
// Copy out only the revents field, matching Linux behavior.
// Linux's do_sys_poll() only writes back revents via
// unsafe_put_user(fds->revents, &ufds->revents), never the
// events field. Writing back the full struct would corrupt
// the caller's events mask (e.g. libevent's poll backend),
// causing busy-loops when event_del() fails to fully remove
// an fd from the pollfd array due to stale POLLHUP/POLLERR bits.
if nfds > 0 && err == nil {
if _, err := linux.CopyPollFDSliceOut(t, addr, pfd); err != nil {
return remainingTimeout, 0, err
for i := range pfd {
off := hostarch.Addr(i*sizeofPollFD + reventsOffsetInPollFD)
if _, copyErr := primitive.CopyInt16Out(t, addr+off, pfd[i].REvents); copyErr != nil {
return remainingTimeout, 0, copyErr
}
}
}

Expand Down
49 changes: 49 additions & 0 deletions test/syscalls/linux/poll.cc
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,55 @@ TEST_F(PollTest, UnpollableFile) {
EXPECT_EQ(poll_fd.revents, POLLIN | POLLOUT);
}

// Test that poll(2) does not write back the events field to userspace.
// Linux's do_sys_poll() only writes back revents via unsafe_put_user(),
// never the events field. This verifies that POLLHUP/POLLERR are not
// injected into the events field.
TEST_F(PollTest, EventsFieldNotModified) {
// Create a pipe.
int fds[2];
ASSERT_THAT(pipe(fds), SyscallSucceeds());

FileDescriptor fd0(fds[0]);
FileDescriptor fd1(fds[1]);

// Close the writer fd so the reader gets POLLHUP.
fd1.reset();

// Poll with only POLLIN in events.
struct pollfd poll_fd = {fd0.get(), POLLIN, 0};
EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 0), SyscallSucceedsWithValue(1));

// revents should contain POLLHUP (since writer is closed).
EXPECT_NE(poll_fd.revents & POLLHUP, 0);

// The events field must remain unchanged (only POLLIN, no POLLHUP/POLLERR).
EXPECT_EQ(poll_fd.events, POLLIN);
}

// Test that poll(2) does not write back the events field when POLLERR occurs.
TEST_F(PollTest, EventsFieldNotModifiedOnError) {
// Create a pipe.
int fds[2];
ASSERT_THAT(pipe(fds), SyscallSucceeds());

FileDescriptor fd0(fds[0]);
FileDescriptor fd1(fds[1]);

// Close the reader fd so the writer gets POLLERR.
fd0.reset();

// Poll with only POLLOUT in events.
struct pollfd poll_fd = {fd1.get(), POLLOUT, 0};
EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 0), SyscallSucceedsWithValue(1));

// revents should contain POLLERR (since reader is closed).
EXPECT_NE(poll_fd.revents & POLLERR, 0);

// The events field must remain unchanged (only POLLOUT, no POLLERR/POLLHUP).
EXPECT_EQ(poll_fd.events, POLLOUT);
}

} // namespace
} // namespace testing
} // namespace gvisor