From d2b9f5b017df990087cd00fe5f195b829c243b73 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Thu, 15 Mar 2012 21:15:38 -0700 Subject: [PATCH] netlink-socket: Increase Netlink socket receive buffer size. Open vSwitch userspace can set up flows at a high rate, but it is somewhat "bursty" in opportunities to set up flows, by which I mean that OVS sets up a batch of flows, then goes off and does some other work for a while, then sets up another batch of flows, and so on. The result is that, if a large number of packets that need flow setups come in all at once, then some of them can overflow the relatively small kernel-to-user buffers. This commit increases the kernel-to-user buffers from the default of approximately 120 kB each to 1 MB each. In one somewhat synthetic test case that I ran based on an "hping3" that generated a load of about 20,000 new flows per second (including both requests and replies), this reduced the packets dropped at the kernel-to-user interface from about 30% to none. I expect that it will similarly improve packet loss in workloads where flow arrival is not easily predictable. (This has little effect on workloads generated by "ovs-benchmark rate" because that benchmark is effectively "self-clocking", that is, a new flow is triggered only by a reply to a request made earlier, which means that the number of buffered packets at any given has a known, constant upper limit.) Bug #10210. Signed-off-by: Ben Pfaff --- include/sparse/sys/socket.h | 5 +++-- lib/netlink-socket.c | 10 +++++++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/include/sparse/sys/socket.h b/include/sparse/sys/socket.h index 89e3c2dc2..1ed195b25 100644 --- a/include/sparse/sys/socket.h +++ b/include/sparse/sys/socket.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 Nicira Networks. + * Copyright (c) 2011, 2012 Nicira Networks. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -74,7 +74,8 @@ enum { SO_SNDBUF, SO_SNDLOWAT, SO_SNDTIMEO, - SO_TYPE + SO_TYPE, + SO_RCVBUFFORCE }; enum { diff --git a/lib/netlink-socket.c b/lib/netlink-socket.c index bc4623534..df6f1d8b5 100644 --- a/lib/netlink-socket.c +++ b/lib/netlink-socket.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2009, 2010, 2011 Nicira Networks. + * Copyright (c) 2008, 2009, 2010, 2011, 2012 Nicira Networks. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -89,6 +89,7 @@ nl_sock_create(int protocol, struct nl_sock **sockp) struct nl_sock *sock; struct sockaddr_nl local, remote; socklen_t local_size; + int rcvbuf; int retval = 0; if (!max_iovs) { @@ -122,6 +123,13 @@ nl_sock_create(int protocol, struct nl_sock **sockp) sock->protocol = protocol; sock->dump = NULL; + rcvbuf = 1024 * 1024; + if (setsockopt(sock->fd, SOL_SOCKET, SO_RCVBUFFORCE, + &rcvbuf, sizeof rcvbuf)) { + VLOG_WARN_RL(&rl, "setting %d-byte socket receive buffer failed (%s)", + rcvbuf, strerror(errno)); + } + retval = get_socket_rcvbuf(sock->fd); if (retval < 0) { retval = -retval; -- 2.43.0