From 38f7147c3e7310ab3ea87420e120e2b1f2268975 Mon Sep 17 00:00:00 2001 From: Ethan Jackson Date: Thu, 7 Jun 2012 15:27:22 -0700 Subject: [PATCH 1/1] packets: Use RARPs for learning packets. Traditionally Open vSwitch had used 802.2 SNAP packets to update upstream switch learning tables when necessary. This approach had advantages in that debugging information could be embedded in the packet helping hapless admins figure out what's going on. However, since both qemu and VMware use RARP for this purpose, it seems appropriate to fall in line with the defacto standard. Requested-by: Ben Basler Signed-off-by: Ethan Jackson --- NEWS | 3 ++ lib/bond.c | 3 +- lib/packets.c | 87 ++++++++++++++------------------------------- lib/packets.h | 23 ++++++++++-- ofproto/fail-open.c | 2 +- vswitchd/INTERNALS | 17 ++++----- 6 files changed, 62 insertions(+), 73 deletions(-) diff --git a/NEWS b/NEWS index 66bf72795..8cd38dd76 100644 --- a/NEWS +++ b/NEWS @@ -5,6 +5,9 @@ post-v1.7.0 - Added support for arbitrary ethernet masks - Additional protocols are not mirrored and dropped when forward-bpdu is false. For a full list, see the ovs-vswitchd.conf.db man page. + - Open vSwitch now sends RARP packets in situations where it previously + sent a custom protocol, making it consistent with behavior of QEMU and + VMware. v1.7.0 - xx xxx xxxx diff --git a/lib/bond.c b/lib/bond.c index cffdae249..8b32bdf98 100644 --- a/lib/bond.c +++ b/lib/bond.c @@ -531,8 +531,7 @@ bond_compose_learning_packet(struct bond *bond, slave = choose_output_slave(bond, &flow, vlan); packet = ofpbuf_new(0); - compose_benign_packet(packet, "Open vSwitch Bond Failover", 0xf177, - eth_src); + compose_benign_packet(packet, eth_src); if (vlan) { eth_push_vlan(packet, htons(vlan)); } diff --git a/lib/packets.c b/lib/packets.c index a8208f598..9b61d0477 100644 --- a/lib/packets.c +++ b/lib/packets.c @@ -120,27 +120,37 @@ eth_addr_from_string(const char *s, uint8_t ea[ETH_ADDR_LEN]) } } -/* Fills 'b' with an 802.2 SNAP packet with Ethernet source address 'eth_src', - * the Nicira OUI as SNAP organization and 'snap_type' as SNAP type. The text - * string in 'tag' is enclosed as the packet payload. - * +/* Fills 'b' with a Reverse ARP packet with Ethernet source address 'eth_src'. * This function is used by Open vSwitch to compose packets in cases where - * context is important but content doesn't (or shouldn't) matter. For this - * purpose, 'snap_type' should be a random number and 'tag' should be an - * English phrase that explains the purpose of the packet. (The English phrase - * gives hapless admins running Wireshark the opportunity to figure out what's - * going on.) */ + * context is important but content doesn't (or shouldn't) matter. + * + * The returned packet has enough headroom to insert an 802.1Q VLAN header if + * desired. */ void -compose_benign_packet(struct ofpbuf *b, const char *tag, uint16_t snap_type, - const uint8_t eth_src[ETH_ADDR_LEN]) +compose_benign_packet(struct ofpbuf *b, const uint8_t eth_src[ETH_ADDR_LEN]) { - size_t tag_size = strlen(tag) + 1; - char *payload; + struct eth_header *eth; + struct rarp_header *rarp; - payload = snap_compose(b, eth_addr_broadcast, eth_src, 0x002320, snap_type, - tag_size + ETH_ADDR_LEN); - memcpy(payload, tag, tag_size); - memcpy(payload + tag_size, eth_src, ETH_ADDR_LEN); + ofpbuf_clear(b); + ofpbuf_prealloc_tailroom(b, ETH_HEADER_LEN + VLAN_HEADER_LEN + + RARP_HEADER_LEN); + ofpbuf_reserve(b, VLAN_HEADER_LEN); + eth = ofpbuf_put_uninit(b, sizeof *eth); + memcpy(eth->eth_dst, eth_addr_broadcast, ETH_ADDR_LEN); + memcpy(eth->eth_src, eth_src, ETH_ADDR_LEN); + eth->eth_type = htons(ETH_TYPE_RARP); + + rarp = ofpbuf_put_uninit(b, sizeof *rarp); + rarp->hw_addr_space = htons(ARP_HTYPE_ETH); + rarp->proto_addr_space = htons(ETH_TYPE_IP); + rarp->hw_addr_length = ETH_ADDR_LEN; + rarp->proto_addr_length = sizeof rarp->src_proto_addr; + rarp->opcode = htons(RARP_REQUEST_REVERSE); + memcpy(rarp->src_hw_addr, eth_src, ETH_ADDR_LEN); + rarp->src_proto_addr = htonl(0); + memcpy(rarp->target_hw_addr, eth_src, ETH_ADDR_LEN); + rarp->target_proto_addr = htonl(0); } /* Insert VLAN header according to given TCI. Packet passed must be Ethernet @@ -424,49 +434,6 @@ eth_compose(struct ofpbuf *b, const uint8_t eth_dst[ETH_ADDR_LEN], return data; } -/* Populates 'b' with an Ethernet LLC+SNAP packet headed with the given - * 'eth_dst', 'eth_src', 'snap_org', and 'snap_type'. A payload of 'size' - * bytes is allocated in 'b' and returned. This payload may be populated with - * appropriate information by the caller. - * - * The returned packet has enough headroom to insert an 802.1Q VLAN header if - * desired. */ -void * -snap_compose(struct ofpbuf *b, const uint8_t eth_dst[ETH_ADDR_LEN], - const uint8_t eth_src[ETH_ADDR_LEN], - unsigned int oui, uint16_t snap_type, size_t size) -{ - struct eth_header *eth; - struct llc_snap_header *llc_snap; - void *payload; - - /* Compose basic packet structure. (We need the payload size to stick into - * the 802.2 header.) */ - ofpbuf_clear(b); - ofpbuf_prealloc_tailroom(b, ETH_HEADER_LEN + VLAN_HEADER_LEN - + LLC_SNAP_HEADER_LEN + size); - ofpbuf_reserve(b, VLAN_HEADER_LEN); - eth = ofpbuf_put_zeros(b, ETH_HEADER_LEN); - llc_snap = ofpbuf_put_zeros(b, LLC_SNAP_HEADER_LEN); - payload = ofpbuf_put_uninit(b, size); - - /* Compose 802.2 header. */ - memcpy(eth->eth_dst, eth_dst, ETH_ADDR_LEN); - memcpy(eth->eth_src, eth_src, ETH_ADDR_LEN); - eth->eth_type = htons(b->size - ETH_HEADER_LEN); - - /* Compose LLC, SNAP headers. */ - llc_snap->llc.llc_dsap = LLC_DSAP_SNAP; - llc_snap->llc.llc_ssap = LLC_SSAP_SNAP; - llc_snap->llc.llc_cntl = LLC_CNTL_SNAP; - llc_snap->snap.snap_org[0] = oui >> 16; - llc_snap->snap.snap_org[1] = oui >> 8; - llc_snap->snap.snap_org[2] = oui; - llc_snap->snap.snap_type = htons(snap_type); - - return payload; -} - static void packet_set_ipv4_addr(struct ofpbuf *packet, ovs_be32 *addr, ovs_be32 new_addr) { diff --git a/lib/packets.h b/lib/packets.h index 4a0fcae14..c1d404d45 100644 --- a/lib/packets.h +++ b/lib/packets.h @@ -135,8 +135,7 @@ static inline void eth_addr_nicira_random(uint8_t ea[ETH_ADDR_LEN]) bool eth_addr_is_reserved(const uint8_t ea[ETH_ADDR_LEN]); bool eth_addr_from_string(const char *, uint8_t ea[ETH_ADDR_LEN]); -void compose_benign_packet(struct ofpbuf *, const char *tag, - uint16_t snap_type, +void compose_benign_packet(struct ofpbuf *, const uint8_t eth_src[ETH_ADDR_LEN]); void eth_push_vlan(struct ofpbuf *, ovs_be16 tci); @@ -182,6 +181,7 @@ void eth_addr_bitand(const uint8_t src[ETH_ADDR_LEN], #define ETH_TYPE_VLAN 0x8100 #define ETH_TYPE_IPV6 0x86dd #define ETH_TYPE_LACP 0x8809 +#define ETH_TYPE_RARP 0x8035 /* Minimum value for an Ethernet type. Values below this are IEEE 802.2 frame * lengths. */ @@ -228,6 +228,25 @@ struct llc_snap_header { } __attribute__((packed)); BUILD_ASSERT_DECL(LLC_SNAP_HEADER_LEN == sizeof(struct llc_snap_header)); +#define ARP_HTYPE_ETH 0x0001 +#define RARP_REQUEST_REVERSE 0x0003 + +#define RARP_HEADER_LEN 28 +/* RARP header only for Ethernet-IP. */ +struct rarp_header { + ovs_be16 hw_addr_space; /* ARP_HTYPE_ETH. */ + ovs_be16 proto_addr_space; /* ETH_TYPE_IP. */ + uint8_t hw_addr_length; /* ETH_ADDR_LEN. */ + uint8_t proto_addr_length; /* IPV4_ADDR_LEN. */ + ovs_be16 opcode; /* RARP_REQUEST_REVERSE. */ + uint8_t src_hw_addr[ETH_ADDR_LEN]; + ovs_be32 src_proto_addr; + uint8_t target_hw_addr[ETH_ADDR_LEN]; + ovs_be32 target_proto_addr; +} __attribute__((packed)); +BUILD_ASSERT_DECL(RARP_HEADER_LEN == sizeof(struct rarp_header)); + + #define VLAN_VID_MASK 0x0fff #define VLAN_VID_SHIFT 0 diff --git a/ofproto/fail-open.c b/ofproto/fail-open.c index 99bbccd31..f3a99c131 100644 --- a/ofproto/fail-open.c +++ b/ofproto/fail-open.c @@ -121,7 +121,7 @@ send_bogus_packet_ins(struct fail_open *fo) ofpbuf_init(&b, 128); eth_addr_nicira_random(mac); - compose_benign_packet(&b, "Open vSwitch Controller Probe", 0xa033, mac); + compose_benign_packet(&b, mac); memset(&pin, 0, sizeof pin); pin.packet = b.data; diff --git a/vswitchd/INTERNALS b/vswitchd/INTERNALS index 645752d0c..7aa3c2967 100644 --- a/vswitchd/INTERNALS +++ b/vswitchd/INTERNALS @@ -57,14 +57,15 @@ enabled all output packets are blackholed anyway. When a slave becomes disabled, the vswitch immediately chooses a new output port for traffic that was destined for that slave (see -bond_enable_slave()). It also sends a "gratuitous learning packet" on -the bond port (on the newly chosen slave) for each MAC address that -the vswitch has learned on a port other than the bond (see -bond_send_learning_packets()), to teach the physical switch that the -new slave should be used in place of the one that is now disabled. -(This behavior probably makes sense only for a vswitch that has only -one port (the bond) connected to a physical switch; vswitchd should -probably provide a way to disable or configure it in other scenarios.) +bond_enable_slave()). It also sends a "gratuitous learning packet", +specifically a RARP, on the bond port (on the newly chosen slave) for +each MAC address that the vswitch has learned on a port other than the +bond (see bond_send_learning_packets()), to teach the physical switch +that the new slave should be used in place of the one that is now +disabled. (This behavior probably makes sense only for a vswitch that +has only one port (the bond) connected to a physical switch; vswitchd +should probably provide a way to disable or configure it in other +scenarios.) Bond Packet Input ----------------- -- 2.43.0