| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422 |
- /* Copyright libuv contributors. All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
- #include "uv.h"
- #include "internal.h"
- #include <errno.h>
- #include <sys/epoll.h>
- int uv__epoll_init(uv_loop_t* loop) {
- int fd;
- fd = epoll_create1(O_CLOEXEC);
- /* epoll_create1() can fail either because it's not implemented (old kernel)
- * or because it doesn't understand the O_CLOEXEC flag.
- */
- if (fd == -1 && (errno == ENOSYS || errno == EINVAL)) {
- fd = epoll_create(256);
- if (fd != -1)
- uv__cloexec(fd, 1);
- }
- loop->backend_fd = fd;
- if (fd == -1)
- return UV__ERR(errno);
- return 0;
- }
- void uv__platform_invalidate_fd(uv_loop_t* loop, int fd) {
- struct epoll_event* events;
- struct epoll_event dummy;
- uintptr_t i;
- uintptr_t nfds;
- assert(loop->watchers != NULL);
- assert(fd >= 0);
- events = (struct epoll_event*) loop->watchers[loop->nwatchers];
- nfds = (uintptr_t) loop->watchers[loop->nwatchers + 1];
- if (events != NULL)
- /* Invalidate events with same file descriptor */
- for (i = 0; i < nfds; i++)
- if (events[i].data.fd == fd)
- events[i].data.fd = -1;
- /* Remove the file descriptor from the epoll.
- * This avoids a problem where the same file description remains open
- * in another process, causing repeated junk epoll events.
- *
- * We pass in a dummy epoll_event, to work around a bug in old kernels.
- */
- if (loop->backend_fd >= 0) {
- /* Work around a bug in kernels 3.10 to 3.19 where passing a struct that
- * has the EPOLLWAKEUP flag set generates spurious audit syslog warnings.
- */
- memset(&dummy, 0, sizeof(dummy));
- epoll_ctl(loop->backend_fd, EPOLL_CTL_DEL, fd, &dummy);
- }
- }
- int uv__io_check_fd(uv_loop_t* loop, int fd) {
- struct epoll_event e;
- int rc;
- memset(&e, 0, sizeof(e));
- e.events = POLLIN;
- e.data.fd = -1;
- rc = 0;
- if (epoll_ctl(loop->backend_fd, EPOLL_CTL_ADD, fd, &e))
- if (errno != EEXIST)
- rc = UV__ERR(errno);
- if (rc == 0)
- if (epoll_ctl(loop->backend_fd, EPOLL_CTL_DEL, fd, &e))
- abort();
- return rc;
- }
- void uv__io_poll(uv_loop_t* loop, int timeout) {
- /* A bug in kernels < 2.6.37 makes timeouts larger than ~30 minutes
- * effectively infinite on 32 bits architectures. To avoid blocking
- * indefinitely, we cap the timeout and poll again if necessary.
- *
- * Note that "30 minutes" is a simplification because it depends on
- * the value of CONFIG_HZ. The magic constant assumes CONFIG_HZ=1200,
- * that being the largest value I have seen in the wild (and only once.)
- */
- static const int max_safe_timeout = 1789569;
- static int no_epoll_pwait_cached;
- static int no_epoll_wait_cached;
- int no_epoll_pwait;
- int no_epoll_wait;
- struct epoll_event events[1024];
- struct epoll_event* pe;
- struct epoll_event e;
- int real_timeout;
- QUEUE* q;
- uv__io_t* w;
- sigset_t sigset;
- uint64_t sigmask;
- uint64_t base;
- int have_signals;
- int nevents;
- int count;
- int nfds;
- int fd;
- int op;
- int i;
- int user_timeout;
- int reset_timeout;
- if (loop->nfds == 0) {
- assert(QUEUE_EMPTY(&loop->watcher_queue));
- return;
- }
- memset(&e, 0, sizeof(e));
- while (!QUEUE_EMPTY(&loop->watcher_queue)) {
- q = QUEUE_HEAD(&loop->watcher_queue);
- QUEUE_REMOVE(q);
- QUEUE_INIT(q);
- w = QUEUE_DATA(q, uv__io_t, watcher_queue);
- assert(w->pevents != 0);
- assert(w->fd >= 0);
- assert(w->fd < (int) loop->nwatchers);
- e.events = w->pevents;
- e.data.fd = w->fd;
- if (w->events == 0)
- op = EPOLL_CTL_ADD;
- else
- op = EPOLL_CTL_MOD;
- /* XXX Future optimization: do EPOLL_CTL_MOD lazily if we stop watching
- * events, skip the syscall and squelch the events after epoll_wait().
- */
- if (epoll_ctl(loop->backend_fd, op, w->fd, &e)) {
- if (errno != EEXIST)
- abort();
- assert(op == EPOLL_CTL_ADD);
- /* We've reactivated a file descriptor that's been watched before. */
- if (epoll_ctl(loop->backend_fd, EPOLL_CTL_MOD, w->fd, &e))
- abort();
- }
- w->events = w->pevents;
- }
- sigmask = 0;
- if (loop->flags & UV_LOOP_BLOCK_SIGPROF) {
- sigemptyset(&sigset);
- sigaddset(&sigset, SIGPROF);
- sigmask |= 1 << (SIGPROF - 1);
- }
- assert(timeout >= -1);
- base = loop->time;
- count = 48; /* Benchmarks suggest this gives the best throughput. */
- real_timeout = timeout;
- if (uv__get_internal_fields(loop)->flags & UV_METRICS_IDLE_TIME) {
- reset_timeout = 1;
- user_timeout = timeout;
- timeout = 0;
- } else {
- reset_timeout = 0;
- user_timeout = 0;
- }
- /* You could argue there is a dependency between these two but
- * ultimately we don't care about their ordering with respect
- * to one another. Worst case, we make a few system calls that
- * could have been avoided because another thread already knows
- * they fail with ENOSYS. Hardly the end of the world.
- */
- no_epoll_pwait = uv__load_relaxed(&no_epoll_pwait_cached);
- no_epoll_wait = uv__load_relaxed(&no_epoll_wait_cached);
- for (;;) {
- /* Only need to set the provider_entry_time if timeout != 0. The function
- * will return early if the loop isn't configured with UV_METRICS_IDLE_TIME.
- */
- if (timeout != 0)
- uv__metrics_set_provider_entry_time(loop);
- /* See the comment for max_safe_timeout for an explanation of why
- * this is necessary. Executive summary: kernel bug workaround.
- */
- if (sizeof(int32_t) == sizeof(long) && timeout >= max_safe_timeout)
- timeout = max_safe_timeout;
- if (sigmask != 0 && no_epoll_pwait != 0)
- if (pthread_sigmask(SIG_BLOCK, &sigset, NULL))
- abort();
- if (no_epoll_wait != 0 || (sigmask != 0 && no_epoll_pwait == 0)) {
- nfds = epoll_pwait(loop->backend_fd,
- events,
- ARRAY_SIZE(events),
- timeout,
- &sigset);
- if (nfds == -1 && errno == ENOSYS) {
- uv__store_relaxed(&no_epoll_pwait_cached, 1);
- no_epoll_pwait = 1;
- }
- } else {
- nfds = epoll_wait(loop->backend_fd,
- events,
- ARRAY_SIZE(events),
- timeout);
- if (nfds == -1 && errno == ENOSYS) {
- uv__store_relaxed(&no_epoll_wait_cached, 1);
- no_epoll_wait = 1;
- }
- }
- if (sigmask != 0 && no_epoll_pwait != 0)
- if (pthread_sigmask(SIG_UNBLOCK, &sigset, NULL))
- abort();
- /* Update loop->time unconditionally. It's tempting to skip the update when
- * timeout == 0 (i.e. non-blocking poll) but there is no guarantee that the
- * operating system didn't reschedule our process while in the syscall.
- */
- SAVE_ERRNO(uv__update_time(loop));
- if (nfds == 0) {
- assert(timeout != -1);
- if (reset_timeout != 0) {
- timeout = user_timeout;
- reset_timeout = 0;
- }
- if (timeout == -1)
- continue;
- if (timeout == 0)
- return;
- /* We may have been inside the system call for longer than |timeout|
- * milliseconds so we need to update the timestamp to avoid drift.
- */
- goto update_timeout;
- }
- if (nfds == -1) {
- if (errno == ENOSYS) {
- /* epoll_wait() or epoll_pwait() failed, try the other system call. */
- assert(no_epoll_wait == 0 || no_epoll_pwait == 0);
- continue;
- }
- if (errno != EINTR)
- abort();
- if (reset_timeout != 0) {
- timeout = user_timeout;
- reset_timeout = 0;
- }
- if (timeout == -1)
- continue;
- if (timeout == 0)
- return;
- /* Interrupted by a signal. Update timeout and poll again. */
- goto update_timeout;
- }
- have_signals = 0;
- nevents = 0;
- {
- /* Squelch a -Waddress-of-packed-member warning with gcc >= 9. */
- union {
- struct epoll_event* events;
- uv__io_t* watchers;
- } x;
- x.events = events;
- assert(loop->watchers != NULL);
- loop->watchers[loop->nwatchers] = x.watchers;
- loop->watchers[loop->nwatchers + 1] = (void*) (uintptr_t) nfds;
- }
- for (i = 0; i < nfds; i++) {
- pe = events + i;
- fd = pe->data.fd;
- /* Skip invalidated events, see uv__platform_invalidate_fd */
- if (fd == -1)
- continue;
- assert(fd >= 0);
- assert((unsigned) fd < loop->nwatchers);
- w = loop->watchers[fd];
- if (w == NULL) {
- /* File descriptor that we've stopped watching, disarm it.
- *
- * Ignore all errors because we may be racing with another thread
- * when the file descriptor is closed.
- */
- epoll_ctl(loop->backend_fd, EPOLL_CTL_DEL, fd, pe);
- continue;
- }
- /* Give users only events they're interested in. Prevents spurious
- * callbacks when previous callback invocation in this loop has stopped
- * the current watcher. Also, filters out events that users has not
- * requested us to watch.
- */
- pe->events &= w->pevents | POLLERR | POLLHUP;
- /* Work around an epoll quirk where it sometimes reports just the
- * EPOLLERR or EPOLLHUP event. In order to force the event loop to
- * move forward, we merge in the read/write events that the watcher
- * is interested in; uv__read() and uv__write() will then deal with
- * the error or hangup in the usual fashion.
- *
- * Note to self: happens when epoll reports EPOLLIN|EPOLLHUP, the user
- * reads the available data, calls uv_read_stop(), then sometime later
- * calls uv_read_start() again. By then, libuv has forgotten about the
- * hangup and the kernel won't report EPOLLIN again because there's
- * nothing left to read. If anything, libuv is to blame here. The
- * current hack is just a quick bandaid; to properly fix it, libuv
- * needs to remember the error/hangup event. We should get that for
- * free when we switch over to edge-triggered I/O.
- */
- if (pe->events == POLLERR || pe->events == POLLHUP)
- pe->events |=
- w->pevents & (POLLIN | POLLOUT | UV__POLLRDHUP | UV__POLLPRI);
- if (pe->events != 0) {
- /* Run signal watchers last. This also affects child process watchers
- * because those are implemented in terms of signal watchers.
- */
- if (w == &loop->signal_io_watcher) {
- have_signals = 1;
- } else {
- uv__metrics_update_idle_time(loop);
- w->cb(loop, w, pe->events);
- }
- nevents++;
- }
- }
- if (reset_timeout != 0) {
- timeout = user_timeout;
- reset_timeout = 0;
- }
- if (have_signals != 0) {
- uv__metrics_update_idle_time(loop);
- loop->signal_io_watcher.cb(loop, &loop->signal_io_watcher, POLLIN);
- }
- loop->watchers[loop->nwatchers] = NULL;
- loop->watchers[loop->nwatchers + 1] = NULL;
- if (have_signals != 0)
- return; /* Event loop should cycle now so don't poll again. */
- if (nevents != 0) {
- if (nfds == ARRAY_SIZE(events) && --count != 0) {
- /* Poll for more events but don't block this time. */
- timeout = 0;
- continue;
- }
- return;
- }
- if (timeout == 0)
- return;
- if (timeout == -1)
- continue;
- update_timeout:
- assert(timeout > 0);
- real_timeout -= (loop->time - base);
- if (real_timeout <= 0)
- return;
- timeout = real_timeout;
- }
- }
|