[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH v3 2/2] sockets: Handle race condition between b
From: |
Knut Omang |
Subject: |
Re: [Qemu-devel] [PATCH v3 2/2] sockets: Handle race condition between binds to the same port |
Date: |
Tue, 20 Jun 2017 22:59:48 +0200 |
On Fri, 2017-06-16 at 15:45 +0100, Daniel P. Berrange wrote:
> On Wed, Jun 14, 2017 at 06:53:52PM +0200, Knut Omang wrote:
> > If an offset of ports is specified to the inet_listen_saddr function(),
> > and two or more processes tries to bind from these ports at the same time,
> > occasionally more than one process may be able to bind to the same
> > port. The condition is detected by listen() but too late to avoid a failure.
> >
> > This function is called by socket_listen() and used
> > by all socket listening code in QEMU, so all cases where any form of dynamic
> > port selection is used should be subject to this issue.
> >
> > Add code to close and re-establish the socket when this
> > condition is observed, hiding the race condition from the user.
> >
> > This has been developed and tested by means of the
> > test-listen unit test in the previous commit.
> > Enable the test for make check now that it passes.
> >
> > Signed-off-by: Knut Omang <address@hidden>
> > Reviewed-by: Bhavesh Davda <address@hidden>
> > Reviewed-by: Yuval Shaia <address@hidden>
> > Reviewed-by: Girish Moodalbail <address@hidden>
> > ---
> > tests/Makefile.include | 2 +-
> > util/qemu-sockets.c | 159 ++++++++++++++++++++++++++++--------------
> > 2 files changed, 108 insertions(+), 53 deletions(-)
> >
> > diff --git a/tests/Makefile.include b/tests/Makefile.include
> > index 22bb97e..c38f94e 100644
> > --- a/tests/Makefile.include
> > +++ b/tests/Makefile.include
> > @@ -127,7 +127,7 @@ check-unit-y += tests/test-bufferiszero$(EXESUF)
> > gcov-files-check-bufferiszero-y = util/bufferiszero.c
> > check-unit-y += tests/test-uuid$(EXESUF)
> > check-unit-y += tests/ptimer-test$(EXESUF)
> > -#check-unit-y += tests/test-listen$(EXESUF)
> > +check-unit-y += tests/test-listen$(EXESUF)
> > gcov-files-ptimer-test-y = hw/core/ptimer.c
> > check-unit-y += tests/test-qapi-util$(EXESUF)
> > gcov-files-test-qapi-util-y = qapi/qapi-util.c
> > diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c
> > index 852773d..7b118b4 100644
> > --- a/util/qemu-sockets.c
> > +++ b/util/qemu-sockets.c
> > @@ -149,6 +149,94 @@ int inet_ai_family_from_address(InetSocketAddress
> > *addr,
> > return PF_UNSPEC;
> > }
> >
> > +static int create_fast_reuse_socket(struct addrinfo *e, Error **errp)
> > +{
> > + int slisten = qemu_socket(e->ai_family, e->ai_socktype,
> > e->ai_protocol);
> > + if (slisten < 0) {
> > + if (!e->ai_next) {
> > + error_setg_errno(errp, errno, "Failed to create socket");
> > + }
> > + return -1;
> > + }
> > +
> > + socket_set_fast_reuse(slisten);
> > + return slisten;
> > +}
> > +
> > +static int try_bind(int socket, InetSocketAddress *saddr, struct addrinfo
> > *e)
> > +{
> > +#ifndef IPV6_V6ONLY
> > + return bind(socket, e->ai_addr, e->ai_addrlen);
> > +#else
> > + /*
> > + * Deals with first & last cases in matrix in comment
> > + * for inet_ai_family_from_address().
> > + */
> > + int v6only =
> > + ((!saddr->has_ipv4 && !saddr->has_ipv6) ||
> > + (saddr->has_ipv4 && saddr->ipv4 &&
> > + saddr->has_ipv6 && saddr->ipv6)) ? 0 : 1;
> > + int stat;
> > +
> > + rebind:
> > + if (e->ai_family == PF_INET6) {
> > + qemu_setsockopt(socket, IPPROTO_IPV6, IPV6_V6ONLY, &v6only,
> > + sizeof(v6only));
> > + }
> > +
> > + stat = bind(socket, e->ai_addr, e->ai_addrlen);
> > + if (!stat) {
> > + return 0;
> > + }
> > +
> > + /* If we got EADDRINUSE from an IPv6 bind & v6only is unset,
> > + * it could be that the IPv4 port is already claimed, so retry
> > + * with v6only set
> > + */
> > + if (e->ai_family == PF_INET6 && errno == EADDRINUSE && !v6only) {
> > + v6only = 1;
> > + goto rebind;
> > + }
> > + return stat;
> > +#endif
> > +}
> > +
> > +static int try_bind_listen(int *socket, InetSocketAddress *saddr,
> > + struct addrinfo *e, int port, Error **errp)
> > +{
> > + int s = *socket;
> > + int ret;
> > +
> > + inet_setport(e, port);
> > + ret = try_bind(s, saddr, e);
> > + if (ret) {
> > + if (errno != EADDRINUSE) {
> > + error_setg_errno(errp, errno, "Failed to bind socket");
> > + }
> > + return errno;
> > + }
> > + if (listen(s, 1) == 0) {
> > + return 0;
> > + }
> > + if (errno == EADDRINUSE) {
> > + /* We got to bind the socket to a port but someone else managed
> > + * to bind to the same port and beat us to listen on it!
> > + * Recreate the socket and return EADDRINUSE to preserve the
> > + * expected state by the caller:
> > + */
> > + closesocket(s);
> > + s = create_fast_reuse_socket(e, errp);
> > + if (s < 0) {
> > + return errno;
> > + }
> > + *socket = s;
> > + errno = EADDRINUSE;
> > + return errno;
> > + }
> > + error_setg_errno(errp, errno, "Failed to listen on socket");
> > + return errno;
> > +}
> > +
> > static int inet_listen_saddr(InetSocketAddress *saddr,
> > int port_offset,
> > bool update_addr,
> > @@ -158,7 +246,9 @@ static int inet_listen_saddr(InetSocketAddress *saddr,
> > char port[33];
> > char uaddr[INET6_ADDRSTRLEN+1];
> > char uport[33];
> > - int slisten, rc, port_min, port_max, p;
> > + int rc, port_min, port_max, p;
> > + int slisten = 0;
> > + int saved_errno = 0;
> > Error *err = NULL;
> >
> > memset(&ai,0, sizeof(ai));
> > @@ -210,75 +300,40 @@ static int inet_listen_saddr(InetSocketAddress *saddr,
> > return -1;
> > }
> >
> > - /* create socket + bind */
> > + /* create socket + bind/listen */
> > for (e = res; e != NULL; e = e->ai_next) {
> > getnameinfo((struct sockaddr*)e->ai_addr,e->ai_addrlen,
> > uaddr,INET6_ADDRSTRLEN,uport,32,
> > NI_NUMERICHOST | NI_NUMERICSERV);
> > - slisten = qemu_socket(e->ai_family, e->ai_socktype,
> > e->ai_protocol);
> > +
> > + slisten = create_fast_reuse_socket(e, &err);
> > if (slisten < 0) {
> > - if (!e->ai_next) {
> > - error_setg_errno(errp, errno, "Failed to create socket");
> > - }
> > continue;
> > }
> >
> > - socket_set_fast_reuse(slisten);
> > -
> > port_min = inet_getport(e);
> > port_max = saddr->has_to ? saddr->to + port_offset : port_min;
> > for (p = port_min; p <= port_max; p++) {
> > -#ifdef IPV6_V6ONLY
> > - /*
> > - * Deals with first & last cases in matrix in comment
> > - * for inet_ai_family_from_address().
> > - */
> > - int v6only =
> > - ((!saddr->has_ipv4 && !saddr->has_ipv6) ||
> > - (saddr->has_ipv4 && saddr->ipv4 &&
> > - saddr->has_ipv6 && saddr->ipv6)) ? 0 : 1;
> > -#endif
> > - inet_setport(e, p);
> > -#ifdef IPV6_V6ONLY
> > - rebind:
> > - if (e->ai_family == PF_INET6) {
> > - qemu_setsockopt(slisten, IPPROTO_IPV6, IPV6_V6ONLY,
> > &v6only,
> > - sizeof(v6only));
> > - }
> > -#endif
> > - if (bind(slisten, e->ai_addr, e->ai_addrlen) == 0) {
> > - goto listen;
> > - }
> > -
> > -#ifdef IPV6_V6ONLY
> > - /* If we got EADDRINUSE from an IPv6 bind & V6ONLY is unset,
> > - * it could be that the IPv4 port is already claimed, so retry
> > - * with V6ONLY set
> > - */
> > - if (e->ai_family == PF_INET6 && errno == EADDRINUSE &&
> > !v6only) {
> > - v6only = 1;
> > - goto rebind;
> > - }
> > -#endif
> > -
> > - if (p == port_max) {
> > - if (!e->ai_next) {
> > - error_setg_errno(errp, errno, "Failed to bind socket");
> > - }
> > + int eno = try_bind_listen(&slisten, saddr, e, p, &err);
> > + if (!eno) {
> > + goto listen_ok;
> > + } else if (eno != EADDRINUSE) {
> > + goto listen_failed;
> > }
> > }
> > + }
> > + error_setg_errno(errp, errno, "Failed to find available port");
> > +
> > +listen_failed:
> > + saved_errno = errno;
> > + if (slisten >= 0) {
> > closesocket(slisten);
> > }
> > freeaddrinfo(res);
> > + errno = saved_errno;
> > return -1;
> >
> > -listen:
> > - if (listen(slisten,1) != 0) {
> > - error_setg_errno(errp, errno, "Failed to listen on socket");
> > - closesocket(slisten);
> > - freeaddrinfo(res);
> > - return -1;
> > - }
> > +listen_ok:
> > if (update_addr) {
> > g_free(saddr->host);
> > saddr->host = g_strdup(uaddr);
>
> I find this patch rather hard to review for correctness, because it has
> mixed up a huge amount of code movement / refactoring, with the bug
> fix. Can you split this up into 2 (or possibly more) patches, so we have
> 1 (or more) no-functional-change refactoring steps, and then the bug fix
> on its own.
Ok, I'll see what I can do - I realize the patch became a great
deal more complicated by the refactoring from v2 to v3 to
accomodate the IPv6 changes.
Thanks,
Knut
>
> Regards,
> Daniel