main-loop.c 13.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
/*
 * QEMU System Emulator
 *
 * Copyright (c) 2003-2008 Fabrice Bellard
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

25
#include "qemu-common.h"
26
#include "qemu/timer.h"
27 28
#include "qemu/sockets.h"	// struct in_addr needed for libslirp.h
#include "slirp/libslirp.h"
29
#include "qemu/main-loop.h"
30
#include "block/aio.h"
31 32 33

#ifndef _WIN32

34
#include "qemu/compatfd.h"
35

36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
/* If we have signalfd, we mask out the signals we want to handle and then
 * use signalfd to listen for them.  We rely on whatever the current signal
 * handler is to dispatch the signals when we receive them.
 */
static void sigfd_handler(void *opaque)
{
    int fd = (intptr_t)opaque;
    struct qemu_signalfd_siginfo info;
    struct sigaction action;
    ssize_t len;

    while (1) {
        do {
            len = read(fd, &info, sizeof(info));
        } while (len == -1 && errno == EINTR);

        if (len == -1 && errno == EAGAIN) {
            break;
        }

        if (len != sizeof(info)) {
            printf("read from sigfd returned %zd: %m\n", len);
            return;
        }

        sigaction(info.ssi_signo, NULL, &action);
        if ((action.sa_flags & SA_SIGINFO) && action.sa_sigaction) {
            action.sa_sigaction(info.ssi_signo,
                                (siginfo_t *)&info, NULL);
        } else if (action.sa_handler) {
            action.sa_handler(info.ssi_signo);
        }
    }
}

static int qemu_signal_init(void)
{
    int sigfd;
    sigset_t set;

    /*
     * SIG_IPI must be blocked in the main thread and must not be caught
     * by sigwait() in the signal thread. Otherwise, the cpu thread will
     * not catch it reliably.
     */
    sigemptyset(&set);
    sigaddset(&set, SIG_IPI);
    sigaddset(&set, SIGIO);
    sigaddset(&set, SIGALRM);
    sigaddset(&set, SIGBUS);
    pthread_sigmask(SIG_BLOCK, &set, NULL);

Lai Jiangshan's avatar
Lai Jiangshan committed
88
    sigdelset(&set, SIG_IPI);
89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104
    sigfd = qemu_signalfd(&set);
    if (sigfd == -1) {
        fprintf(stderr, "failed to create signalfd\n");
        return -errno;
    }

    fcntl_setfl(sigfd, O_NONBLOCK);

    qemu_set_fd_handler2(sigfd, NULL, sigfd_handler, NULL,
                         (void *)(intptr_t)sigfd);

    return 0;
}

#else /* _WIN32 */

105
static int qemu_signal_init(void)
106 107 108
{
    return 0;
}
109 110 111
#endif

static AioContext *qemu_aio_context;
112

113 114 115 116 117
AioContext *qemu_get_aio_context(void)
{
    return qemu_aio_context;
}

118 119
void qemu_notify_event(void)
{
120
    if (!qemu_aio_context) {
121 122
        return;
    }
123
    aio_notify(qemu_aio_context);
124 125
}

126 127
static GArray *gpollfds;

128
int qemu_init_main_loop(void)
129 130
{
    int ret;
131
    GSource *src;
132

133 134
    init_clocks();

135 136 137 138 139
    ret = qemu_signal_init();
    if (ret) {
        return ret;
    }

140
    gpollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD));
141
    qemu_aio_context = aio_context_new();
142 143 144
    src = aio_get_g_source(qemu_aio_context);
    g_source_attach(src, NULL);
    g_source_unref(src);
145 146 147 148 149
    return 0;
}

static int max_priority;

150
#ifndef _WIN32
151 152 153
static int glib_pollfds_idx;
static int glib_n_poll_fds;

154
static void glib_pollfds_fill(int64_t *cur_timeout)
155 156
{
    GMainContext *context = g_main_context_default();
157
    int timeout = 0;
158
    int64_t timeout_ns;
159
    int n;
160 161 162

    g_main_context_prepare(context, &max_priority);

163 164 165 166 167 168 169 170 171 172
    glib_pollfds_idx = gpollfds->len;
    n = glib_n_poll_fds;
    do {
        GPollFD *pfds;
        glib_n_poll_fds = n;
        g_array_set_size(gpollfds, glib_pollfds_idx + glib_n_poll_fds);
        pfds = &g_array_index(gpollfds, GPollFD, glib_pollfds_idx);
        n = g_main_context_query(context, max_priority, &timeout, pfds,
                                 glib_n_poll_fds);
    } while (n != glib_n_poll_fds);
173

174 175 176 177
    if (timeout < 0) {
        timeout_ns = -1;
    } else {
        timeout_ns = (int64_t)timeout * (int64_t)SCALE_MS;
178
    }
179 180

    *cur_timeout = qemu_soonest_timeout(timeout_ns, *cur_timeout);
181 182
}

183
static void glib_pollfds_poll(void)
184 185
{
    GMainContext *context = g_main_context_default();
186
    GPollFD *pfds = &g_array_index(gpollfds, GPollFD, glib_pollfds_idx);
187

188
    if (g_main_context_check(context, max_priority, pfds, glib_n_poll_fds)) {
189 190 191 192
        g_main_context_dispatch(context);
    }
}

193 194
#define MAX_MAIN_LOOP_SPIN (1000)

195
static int os_host_main_loop_wait(int64_t timeout)
196 197
{
    int ret;
198
    static int spin_counter;
199

200
    glib_pollfds_fill(&timeout);
201

202 203 204 205 206 207
    /* If the I/O thread is very busy or we are incorrectly busy waiting in
     * the I/O thread, this can lead to starvation of the BQL such that the
     * VCPU threads never run.  To make sure we can detect the later case,
     * print a message to the screen.  If we run into this condition, create
     * a fake timeout in order to give the VCPU threads a chance to run.
     */
208
    if (!timeout && (spin_counter > MAX_MAIN_LOOP_SPIN)) {
209 210 211 212 213 214 215 216 217
        static bool notified;

        if (!notified) {
            fprintf(stderr,
                    "main-loop: WARNING: I/O thread spun for %d iterations\n",
                    MAX_MAIN_LOOP_SPIN);
            notified = true;
        }

218
        timeout = SCALE_MS;
219 220
    }

221
    if (timeout) {
222
        spin_counter = 0;
223
        qemu_mutex_unlock_iothread();
224 225
    } else {
        spin_counter++;
226 227
    }

228
    ret = qemu_poll_ns((GPollFD *)gpollfds->data, gpollfds->len, timeout);
229

230
    if (timeout) {
231 232 233
        qemu_mutex_lock_iothread();
    }

234
    glib_pollfds_poll();
235 236 237
    return ret;
}
#else
238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276
/***********************************************************/
/* Polling handling */

typedef struct PollingEntry {
    PollingFunc *func;
    void *opaque;
    struct PollingEntry *next;
} PollingEntry;

static PollingEntry *first_polling_entry;

int qemu_add_polling_cb(PollingFunc *func, void *opaque)
{
    PollingEntry **ppe, *pe;
    pe = g_malloc0(sizeof(PollingEntry));
    pe->func = func;
    pe->opaque = opaque;
    for(ppe = &first_polling_entry; *ppe != NULL; ppe = &(*ppe)->next);
    *ppe = pe;
    return 0;
}

void qemu_del_polling_cb(PollingFunc *func, void *opaque)
{
    PollingEntry **ppe, *pe;
    for(ppe = &first_polling_entry; *ppe != NULL; ppe = &(*ppe)->next) {
        pe = *ppe;
        if (pe->func == func && pe->opaque == opaque) {
            *ppe = pe->next;
            g_free(pe);
            break;
        }
    }
}

/***********************************************************/
/* Wait objects support */
typedef struct WaitObjects {
    int num;
277
    int revents[MAXIMUM_WAIT_OBJECTS + 1];
278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293
    HANDLE events[MAXIMUM_WAIT_OBJECTS + 1];
    WaitObjectFunc *func[MAXIMUM_WAIT_OBJECTS + 1];
    void *opaque[MAXIMUM_WAIT_OBJECTS + 1];
} WaitObjects;

static WaitObjects wait_objects = {0};

int qemu_add_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque)
{
    WaitObjects *w = &wait_objects;
    if (w->num >= MAXIMUM_WAIT_OBJECTS) {
        return -1;
    }
    w->events[w->num] = handle;
    w->func[w->num] = func;
    w->opaque[w->num] = opaque;
294
    w->revents[w->num] = 0;
295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312
    w->num++;
    return 0;
}

void qemu_del_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque)
{
    int i, found;
    WaitObjects *w = &wait_objects;

    found = 0;
    for (i = 0; i < w->num; i++) {
        if (w->events[i] == handle) {
            found = 1;
        }
        if (found) {
            w->events[i] = w->events[i + 1];
            w->func[i] = w->func[i + 1];
            w->opaque[i] = w->opaque[i + 1];
313
            w->revents[i] = w->revents[i + 1];
314 315 316 317 318 319 320
        }
    }
    if (found) {
        w->num--;
    }
}

321 322
void qemu_fd_register(int fd)
{
323 324
    WSAEventSelect(fd, event_notifier_get_handle(&qemu_aio_context->notifier),
                   FD_READ | FD_ACCEPT | FD_CLOSE |
325 326 327
                   FD_CONNECT | FD_WRITE | FD_OOB);
}

328 329 330 331 332 333 334 335 336 337
static int pollfds_fill(GArray *pollfds, fd_set *rfds, fd_set *wfds,
                        fd_set *xfds)
{
    int nfds = -1;
    int i;

    for (i = 0; i < pollfds->len; i++) {
        GPollFD *pfd = &g_array_index(pollfds, GPollFD, i);
        int fd = pfd->fd;
        int events = pfd->events;
338
        if (events & G_IO_IN) {
339 340 341
            FD_SET(fd, rfds);
            nfds = MAX(nfds, fd);
        }
342
        if (events & G_IO_OUT) {
343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364
            FD_SET(fd, wfds);
            nfds = MAX(nfds, fd);
        }
        if (events & G_IO_PRI) {
            FD_SET(fd, xfds);
            nfds = MAX(nfds, fd);
        }
    }
    return nfds;
}

static void pollfds_poll(GArray *pollfds, int nfds, fd_set *rfds,
                         fd_set *wfds, fd_set *xfds)
{
    int i;

    for (i = 0; i < pollfds->len; i++) {
        GPollFD *pfd = &g_array_index(pollfds, GPollFD, i);
        int fd = pfd->fd;
        int revents = 0;

        if (FD_ISSET(fd, rfds)) {
365
            revents |= G_IO_IN;
366 367
        }
        if (FD_ISSET(fd, wfds)) {
368
            revents |= G_IO_OUT;
369 370 371 372 373 374 375 376
        }
        if (FD_ISSET(fd, xfds)) {
            revents |= G_IO_PRI;
        }
        pfd->revents = revents & pfd->events;
    }
}

377
static int os_host_main_loop_wait(int64_t timeout)
378
{
379
    GMainContext *context = g_main_context_default();
380
    GPollFD poll_fds[1024 * 2]; /* this is probably overkill */
381
    int select_ret = 0;
382
    int g_poll_ret, ret, i, n_poll_fds;
383
    PollingEntry *pe;
384
    WaitObjects *w = &wait_objects;
385
    gint poll_timeout;
386
    int64_t poll_timeout_ns;
387
    static struct timeval tv0;
388 389
    fd_set rfds, wfds, xfds;
    int nfds;
390 391 392 393 394 395

    /* XXX: need to suppress polling by better using win32 events */
    ret = 0;
    for (pe = first_polling_entry; pe != NULL; pe = pe->next) {
        ret |= pe->func(pe->opaque);
    }
396 397 398
    if (ret != 0) {
        return ret;
    }
399

400 401 402 403 404 405 406 407 408 409 410 411 412 413
    FD_ZERO(&rfds);
    FD_ZERO(&wfds);
    FD_ZERO(&xfds);
    nfds = pollfds_fill(gpollfds, &rfds, &wfds, &xfds);
    if (nfds >= 0) {
        select_ret = select(nfds + 1, &rfds, &wfds, &xfds, &tv0);
        if (select_ret != 0) {
            timeout = 0;
        }
        if (select_ret > 0) {
            pollfds_poll(gpollfds, nfds, &rfds, &wfds, &xfds);
        }
    }

414
    g_main_context_prepare(context, &max_priority);
415
    n_poll_fds = g_main_context_query(context, max_priority, &poll_timeout,
416 417 418
                                      poll_fds, ARRAY_SIZE(poll_fds));
    g_assert(n_poll_fds <= ARRAY_SIZE(poll_fds));

419
    for (i = 0; i < w->num; i++) {
420
        poll_fds[n_poll_fds + i].fd = (DWORD_PTR)w->events[i];
421
        poll_fds[n_poll_fds + i].events = G_IO_IN;
422 423
    }

424 425 426 427
    if (poll_timeout < 0) {
        poll_timeout_ns = -1;
    } else {
        poll_timeout_ns = (int64_t)poll_timeout * (int64_t)SCALE_MS;
428 429
    }

430 431
    poll_timeout_ns = qemu_soonest_timeout(poll_timeout_ns, timeout);

432
    qemu_mutex_unlock_iothread();
433 434
    g_poll_ret = qemu_poll_ns(poll_fds, n_poll_fds + w->num, poll_timeout_ns);

435
    qemu_mutex_lock_iothread();
436
    if (g_poll_ret > 0) {
437
        for (i = 0; i < w->num; i++) {
438
            w->revents[i] = poll_fds[n_poll_fds + i].revents;
439
        }
440 441 442
        for (i = 0; i < w->num; i++) {
            if (w->revents[i] && w->func[i]) {
                w->func[i](w->opaque[i]);
443 444 445 446
            }
        }
    }

447 448 449 450
    if (g_main_context_check(context, max_priority, poll_fds, n_poll_fds)) {
        g_main_context_dispatch(context);
    }

451
    return select_ret || g_poll_ret;
452 453 454 455 456
}
#endif

int main_loop_wait(int nonblocking)
{
457 458
    int ret;
    uint32_t timeout = UINT32_MAX;
459
    int64_t timeout_ns;
460 461 462 463 464 465

    if (nonblocking) {
        timeout = 0;
    }

    /* poll any events */
466
    g_array_set_size(gpollfds, 0); /* reset for new iteration */
467 468
    /* XXX: separate device handlers from system ones */
#ifdef CONFIG_SLIRP
469
    slirp_pollfds_fill(gpollfds, &timeout);
470
#endif
471
    qemu_iohandler_fill(gpollfds);
472 473 474 475 476 477 478 479 480 481 482 483

    if (timeout == UINT32_MAX) {
        timeout_ns = -1;
    } else {
        timeout_ns = (uint64_t)timeout * (int64_t)(SCALE_MS);
    }

    timeout_ns = qemu_soonest_timeout(timeout_ns,
                                      timerlistgroup_deadline_ns(
                                          &main_loop_tlg));

    ret = os_host_main_loop_wait(timeout_ns);
484
    qemu_iohandler_poll(gpollfds, ret);
485
#ifdef CONFIG_SLIRP
Stefan Hajnoczi's avatar
Stefan Hajnoczi committed
486
    slirp_pollfds_poll(gpollfds, (ret < 0));
487 488
#endif

489
    qemu_clock_run_all_timers();
490 491 492

    return ret;
}
493 494 495 496 497 498 499 500

/* Functions to operate on the main QEMU AioContext.  */

QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque)
{
    return aio_bh_new(qemu_aio_context, cb, opaque);
}

501 502
bool qemu_aio_wait(void)
{
503
    return aio_poll(qemu_aio_context, true);
504 505
}

506
#ifdef CONFIG_POSIX
507 508 509 510 511
void qemu_aio_set_fd_handler(int fd,
                             IOHandler *io_read,
                             IOHandler *io_write,
                             void *opaque)
{
512
    aio_set_fd_handler(qemu_aio_context, fd, io_read, io_write, opaque);
513
}
514
#endif
515 516

void qemu_aio_set_event_notifier(EventNotifier *notifier,
517
                                 EventNotifierHandler *io_read)
518
{
519
    aio_set_event_notifier(qemu_aio_context, notifier, io_read);
520
}