Commit
796223f5 (netdev: Add new "struct netdev_rx" for capturing packets
from a netdev) refactored send and receive into separate netdevs. As a
result, send and receive now use different socket descriptors (except for tap
interfaces which are treated specially). An unintended side effect was that
all sent packets are looped back and received, which had previously been
avoided as the kernel specifically prevents this from happening on a single
socket descriptor.
To resolve the situation, a socket filter is added to the receive socket
so that it only accepts inbound packets.
Simon Horman co-discovered and initially reported this issue.
Signed-off-by: Murphy McCauley <murphy.mccauley@gmail.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
Tested-by: Simon Horman <horms@verge.net.au>
Reviewed-by: Simon Horman <horms@verge.net.au>
Luca Giraudo lgiraudo@nicira.com
Martin Casado casado@nicira.com
Mehak Mahajan mmahajan@nicira.com
Luca Giraudo lgiraudo@nicira.com
Martin Casado casado@nicira.com
Mehak Mahajan mmahajan@nicira.com
+Murphy McCauley murphy.mccauley@gmail.com
Natasha Gude natasha@nicira.com
Neil McKee neil.mckee@inmon.com
Paraneetharan Chandrasekaran paraneetharanc@gmail.com
Natasha Gude natasha@nicira.com
Neil McKee neil.mckee@inmon.com
Paraneetharan Chandrasekaran paraneetharanc@gmail.com
Mike Bursell mike.bursell@citrix.com
Mike Kruze mkruze@nicira.com
Min Chen ustcer.tonychan@gmail.com
Mike Bursell mike.bursell@citrix.com
Mike Kruze mkruze@nicira.com
Min Chen ustcer.tonychan@gmail.com
-Murphy McCauley murphy.mccauley@gmail.com
Mikael Doverhag mdoverhag@nicira.com
Niklas Andersson nandersson@nicira.com
Padmanabhan Krishnan kprad1@yahoo.com
Mikael Doverhag mdoverhag@nicira.com
Niklas Andersson nandersson@nicira.com
Padmanabhan Krishnan kprad1@yahoo.com
- * Copyright (c) 2011, 2012 Nicira, Inc.
+ * Copyright (c) 2011, 2012, 2013 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
SO_SNDLOWAT,
SO_SNDTIMEO,
SO_TYPE,
SO_SNDLOWAT,
SO_SNDTIMEO,
SO_TYPE,
+ SO_RCVBUFFORCE,
+ SO_ATTACH_FILTER
#include <fcntl.h>
#include <arpa/inet.h>
#include <inttypes.h>
#include <fcntl.h>
#include <arpa/inet.h>
#include <inttypes.h>
+#include <linux/filter.h>
#include <linux/gen_stats.h>
#include <linux/if_ether.h>
#include <linux/if_tun.h>
#include <linux/gen_stats.h>
#include <linux/if_ether.h>
#include <linux/if_tun.h>
} else {
struct sockaddr_ll sll;
int ifindex;
} else {
struct sockaddr_ll sll;
int ifindex;
+ /* Result of tcpdump -dd inbound */
+ static struct sock_filter filt[] = {
+ { 0x28, 0, 0, 0xfffff004 }, /* ldh [0] */
+ { 0x15, 0, 1, 0x00000004 }, /* jeq #4 jt 2 jf 3 */
+ { 0x6, 0, 0, 0x00000000 }, /* ret #0 */
+ { 0x6, 0, 0, 0x0000ffff } /* ret #65535 */
+ };
+ static struct sock_fprog fprog = { ARRAY_SIZE(filt), filt };
/* Create file descriptor. */
fd = socket(PF_PACKET, SOCK_RAW, 0);
/* Create file descriptor. */
fd = socket(PF_PACKET, SOCK_RAW, 0);
netdev_get_name(netdev_), strerror(error));
goto error;
}
netdev_get_name(netdev_), strerror(error));
goto error;
}
+
+ /* Filter for only inbound packets. */
+ error = setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &fprog,
+ sizeof fprog);
+ if (error) {
+ error = errno;
+ VLOG_ERR("%s: failed attach filter (%s)",
+ netdev_get_name(netdev_), strerror(error));
+ goto error;
+ }
}
rx = xmalloc(sizeof *rx);
}
rx = xmalloc(sizeof *rx);
/* Use the tap fd to send to this device. This is essential for
* tap devices, because packets sent to a tap device with an
* AF_PACKET socket will loop back to be *received* again on the
/* Use the tap fd to send to this device. This is essential for
* tap devices, because packets sent to a tap device with an
* AF_PACKET socket will loop back to be *received* again on the
+ * tap device. This doesn't occur on other interface types
+ * because we attach a socket filter to the rx socket. */
struct netdev_linux *netdev = netdev_linux_cast(netdev_);
retval = write(netdev->state.tap.fd, data, size);
struct netdev_linux *netdev = netdev_linux_cast(netdev_);
retval = write(netdev->state.tap.fd, data, size);