X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=lib%2Fpackets.c;h=6244c3f5fac85049a7788a32fb83639802ebb8ae;hb=003ce655b7116d18c86a74c50391e54990346931;hp=84ca590bda90d23af12b1f8a9d82f2813728faa1;hpb=e0edde6fee279cdbbf3c179f5f50adaf0c7c7f1e;p=sliver-openvswitch.git diff --git a/lib/packets.c b/lib/packets.c index 84ca590bd..6244c3f5f 100644 --- a/lib/packets.c +++ b/lib/packets.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009, 2010, 2011, 2012 Nicira, Inc. + * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,16 +16,21 @@ #include #include "packets.h" -#include #include #include #include +#include #include #include "byte-order.h" #include "csum.h" +#include "crc32c.h" #include "flow.h" +#include "hmap.h" #include "dynamic-string.h" #include "ofpbuf.h" +#include "ovs-thread.h" +#include "odp-util.h" +#include "unaligned.h" const struct in6_addr in6addr_exact = IN6ADDR_EXACT_INIT; @@ -43,11 +48,88 @@ dpid_from_string(const char *s, uint64_t *dpidp) return *dpidp != 0; } +/* Returns true if 'ea' is a reserved address, that a bridge must never + * forward, false otherwise. + * + * If you change this function's behavior, please update corresponding + * documentation in vswitch.xml at the same time. */ +bool +eth_addr_is_reserved(const uint8_t ea[ETH_ADDR_LEN]) +{ + struct eth_addr_node { + struct hmap_node hmap_node; + const uint64_t ea64; + }; + + static struct eth_addr_node nodes[] = { + /* STP, IEEE pause frames, and other reserved protocols. */ + { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000000ULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000001ULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000002ULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000003ULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000004ULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000005ULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000006ULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000007ULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000008ULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000009ULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x0180c200000aULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x0180c200000bULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x0180c200000cULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x0180c200000dULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x0180c200000eULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x0180c200000fULL }, + + /* Extreme protocols. */ + { HMAP_NODE_NULL_INITIALIZER, 0x00e02b000000ULL }, /* EDP. */ + { HMAP_NODE_NULL_INITIALIZER, 0x00e02b000004ULL }, /* EAPS. */ + { HMAP_NODE_NULL_INITIALIZER, 0x00e02b000006ULL }, /* EAPS. */ + + /* Cisco protocols. */ + { HMAP_NODE_NULL_INITIALIZER, 0x01000c000000ULL }, /* ISL. */ + { HMAP_NODE_NULL_INITIALIZER, 0x01000cccccccULL }, /* PAgP, UDLD, CDP, + * DTP, VTP. */ + { HMAP_NODE_NULL_INITIALIZER, 0x01000ccccccdULL }, /* PVST+. */ + { HMAP_NODE_NULL_INITIALIZER, 0x01000ccdcdcdULL }, /* STP Uplink Fast, + * FlexLink. */ + + /* Cisco CFM. */ + { HMAP_NODE_NULL_INITIALIZER, 0x01000cccccc0ULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x01000cccccc1ULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x01000cccccc2ULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x01000cccccc3ULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x01000cccccc4ULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x01000cccccc5ULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x01000cccccc6ULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x01000cccccc7ULL }, + }; + + static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; + struct eth_addr_node *node; + static struct hmap addrs; + uint64_t ea64; + + if (ovsthread_once_start(&once)) { + hmap_init(&addrs); + for (node = nodes; node < &nodes[ARRAY_SIZE(nodes)]; node++) { + hmap_insert(&addrs, &node->hmap_node, hash_uint64(node->ea64)); + } + ovsthread_once_done(&once); + } + + ea64 = eth_addr_to_uint64(ea); + HMAP_FOR_EACH_IN_BUCKET (node, hmap_node, hash_uint64(ea64), &addrs) { + if (node->ea64 == ea64) { + return true; + } + } + return false; +} + bool eth_addr_from_string(const char *s, uint8_t ea[ETH_ADDR_LEN]) { - if (sscanf(s, ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(ea)) - == ETH_ADDR_SCAN_COUNT) { + if (ovs_scan(s, ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(ea))) { return true; } else { memset(ea, 0, ETH_ADDR_LEN); @@ -55,83 +137,221 @@ eth_addr_from_string(const char *s, uint8_t ea[ETH_ADDR_LEN]) } } -/* Fills 'b' with an 802.2 SNAP packet with Ethernet source address 'eth_src', - * the Nicira OUI as SNAP organization and 'snap_type' as SNAP type. The text - * string in 'tag' is enclosed as the packet payload. - * +/* Fills 'b' with a Reverse ARP packet with Ethernet source address 'eth_src'. * This function is used by Open vSwitch to compose packets in cases where - * context is important but content doesn't (or shouldn't) matter. For this - * purpose, 'snap_type' should be a random number and 'tag' should be an - * English phrase that explains the purpose of the packet. (The English phrase - * gives hapless admins running Wireshark the opportunity to figure out what's - * going on.) */ + * context is important but content doesn't (or shouldn't) matter. + * + * The returned packet has enough headroom to insert an 802.1Q VLAN header if + * desired. */ void -compose_benign_packet(struct ofpbuf *b, const char *tag, uint16_t snap_type, - const uint8_t eth_src[ETH_ADDR_LEN]) +compose_rarp(struct ofpbuf *b, const uint8_t eth_src[ETH_ADDR_LEN]) { - size_t tag_size = strlen(tag) + 1; - char *payload; + struct eth_header *eth; + struct arp_eth_header *arp; - payload = snap_compose(b, eth_addr_broadcast, eth_src, 0x002320, snap_type, - tag_size + ETH_ADDR_LEN); - memcpy(payload, tag, tag_size); - memcpy(payload + tag_size, eth_src, ETH_ADDR_LEN); + ofpbuf_clear(b); + ofpbuf_prealloc_tailroom(b, 2 + ETH_HEADER_LEN + VLAN_HEADER_LEN + + ARP_ETH_HEADER_LEN); + ofpbuf_reserve(b, 2 + VLAN_HEADER_LEN); + eth = ofpbuf_put_uninit(b, sizeof *eth); + memcpy(eth->eth_dst, eth_addr_broadcast, ETH_ADDR_LEN); + memcpy(eth->eth_src, eth_src, ETH_ADDR_LEN); + eth->eth_type = htons(ETH_TYPE_RARP); + + arp = ofpbuf_put_uninit(b, sizeof *arp); + arp->ar_hrd = htons(ARP_HRD_ETHERNET); + arp->ar_pro = htons(ARP_PRO_IP); + arp->ar_hln = sizeof arp->ar_sha; + arp->ar_pln = sizeof arp->ar_spa; + arp->ar_op = htons(ARP_OP_RARP); + memcpy(arp->ar_sha, eth_src, ETH_ADDR_LEN); + put_16aligned_be32(&arp->ar_spa, htonl(0)); + memcpy(arp->ar_tha, eth_src, ETH_ADDR_LEN); + put_16aligned_be32(&arp->ar_tpa, htonl(0)); + + ofpbuf_set_frame(b, eth); + ofpbuf_set_l3(b, arp); } /* Insert VLAN header according to given TCI. Packet passed must be Ethernet * packet. Ignores the CFI bit of 'tci' using 0 instead. * - * Also sets 'packet->l2' to point to the new Ethernet header. */ + * Also adjusts the layer offsets accordingly. */ void -eth_push_vlan(struct ofpbuf *packet, ovs_be16 tci) +eth_push_vlan(struct ofpbuf *packet, ovs_be16 tpid, ovs_be16 tci) { - struct eth_header *eh = packet->data; struct vlan_eth_header *veh; /* Insert new 802.1Q header. */ - struct vlan_eth_header tmp; - memcpy(tmp.veth_dst, eh->eth_dst, ETH_ADDR_LEN); - memcpy(tmp.veth_src, eh->eth_src, ETH_ADDR_LEN); - tmp.veth_type = htons(ETH_TYPE_VLAN); - tmp.veth_tci = tci & htons(~VLAN_CFI); - tmp.veth_next_type = eh->eth_type; - - veh = ofpbuf_push_uninit(packet, VLAN_HEADER_LEN); - memcpy(veh, &tmp, sizeof tmp); - - packet->l2 = packet->data; + veh = ofpbuf_resize_l2(packet, VLAN_HEADER_LEN); + memmove(veh, (char *)veh + VLAN_HEADER_LEN, 2 * ETH_ADDR_LEN); + veh->veth_type = tpid; + veh->veth_tci = tci & htons(~VLAN_CFI); } /* Removes outermost VLAN header (if any is present) from 'packet'. * - * 'packet->l2' must initially point to 'packet''s Ethernet header. */ + * 'packet->l2_5' should initially point to 'packet''s outer-most MPLS header + * or may be NULL if there are no MPLS headers. */ void eth_pop_vlan(struct ofpbuf *packet) { - struct vlan_eth_header *veh = packet->l2; - if (packet->size >= sizeof *veh + struct vlan_eth_header *veh = ofpbuf_l2(packet); + + if (veh && ofpbuf_size(packet) >= sizeof *veh && veh->veth_type == htons(ETH_TYPE_VLAN)) { - struct eth_header tmp; - memcpy(tmp.eth_dst, veh->veth_dst, ETH_ADDR_LEN); - memcpy(tmp.eth_src, veh->veth_src, ETH_ADDR_LEN); - tmp.eth_type = veh->veth_next_type; + memmove((char *)veh + VLAN_HEADER_LEN, veh, 2 * ETH_ADDR_LEN); + ofpbuf_resize_l2(packet, -VLAN_HEADER_LEN); + } +} + +/* Set ethertype of the packet. */ +static void +set_ethertype(struct ofpbuf *packet, ovs_be16 eth_type) +{ + struct eth_header *eh = ofpbuf_l2(packet); - ofpbuf_pull(packet, VLAN_HEADER_LEN); - packet->l2 = (char*)packet->l2 + VLAN_HEADER_LEN; - memcpy(packet->data, &tmp, sizeof tmp); + if (!eh) { + return; + } + + if (eh->eth_type == htons(ETH_TYPE_VLAN)) { + ovs_be16 *p; + char *l2_5 = ofpbuf_l2_5(packet); + + p = ALIGNED_CAST(ovs_be16 *, + (l2_5 ? l2_5 : (char *)ofpbuf_l3(packet)) - 2); + *p = eth_type; + } else { + eh->eth_type = eth_type; + } +} + +static bool is_mpls(struct ofpbuf *packet) +{ + return packet->l2_5_ofs != UINT16_MAX; +} + +/* Set time to live (TTL) of an MPLS label stack entry (LSE). */ +void +set_mpls_lse_ttl(ovs_be32 *lse, uint8_t ttl) +{ + *lse &= ~htonl(MPLS_TTL_MASK); + *lse |= htonl((ttl << MPLS_TTL_SHIFT) & MPLS_TTL_MASK); +} + +/* Set traffic class (TC) of an MPLS label stack entry (LSE). */ +void +set_mpls_lse_tc(ovs_be32 *lse, uint8_t tc) +{ + *lse &= ~htonl(MPLS_TC_MASK); + *lse |= htonl((tc << MPLS_TC_SHIFT) & MPLS_TC_MASK); +} + +/* Set label of an MPLS label stack entry (LSE). */ +void +set_mpls_lse_label(ovs_be32 *lse, ovs_be32 label) +{ + *lse &= ~htonl(MPLS_LABEL_MASK); + *lse |= htonl((ntohl(label) << MPLS_LABEL_SHIFT) & MPLS_LABEL_MASK); +} + +/* Set bottom of stack (BoS) bit of an MPLS label stack entry (LSE). */ +void +set_mpls_lse_bos(ovs_be32 *lse, uint8_t bos) +{ + *lse &= ~htonl(MPLS_BOS_MASK); + *lse |= htonl((bos << MPLS_BOS_SHIFT) & MPLS_BOS_MASK); +} + +/* Compose an MPLS label stack entry (LSE) from its components: + * label, traffic class (TC), time to live (TTL) and + * bottom of stack (BoS) bit. */ +ovs_be32 +set_mpls_lse_values(uint8_t ttl, uint8_t tc, uint8_t bos, ovs_be32 label) +{ + ovs_be32 lse = htonl(0); + set_mpls_lse_ttl(&lse, ttl); + set_mpls_lse_tc(&lse, tc); + set_mpls_lse_bos(&lse, bos); + set_mpls_lse_label(&lse, label); + return lse; +} + +/* Set MPLS label stack entry to outermost MPLS header.*/ +void +set_mpls_lse(struct ofpbuf *packet, ovs_be32 mpls_lse) +{ + /* Packet type should be MPLS to set label stack entry. */ + if (is_mpls(packet)) { + struct mpls_hdr *mh = ofpbuf_l2_5(packet); + + /* Update mpls label stack entry. */ + put_16aligned_be32(&mh->mpls_lse, mpls_lse); + } +} + +/* Push MPLS label stack entry 'lse' onto 'packet' as the the outermost MPLS + * header. If 'packet' does not already have any MPLS labels, then its + * Ethertype is changed to 'ethtype' (which must be an MPLS Ethertype). */ +void +push_mpls(struct ofpbuf *packet, ovs_be16 ethtype, ovs_be32 lse) +{ + char * header; + size_t len; + + if (!eth_type_mpls(ethtype)) { + return; + } + + if (!is_mpls(packet)) { + /* Set MPLS label stack offset. */ + packet->l2_5_ofs = packet->l3_ofs; + } + + set_ethertype(packet, ethtype); + + /* Push new MPLS shim header onto packet. */ + len = packet->l2_5_ofs; + header = ofpbuf_resize_l2_5(packet, MPLS_HLEN); + memmove(header, header + MPLS_HLEN, len); + memcpy(header + len, &lse, sizeof lse); +} + +/* If 'packet' is an MPLS packet, removes its outermost MPLS label stack entry. + * If the label that was removed was the only MPLS label, changes 'packet''s + * Ethertype to 'ethtype' (which ordinarily should not be an MPLS + * Ethertype). */ +void +pop_mpls(struct ofpbuf *packet, ovs_be16 ethtype) +{ + if (is_mpls(packet)) { + struct mpls_hdr *mh = ofpbuf_l2_5(packet); + size_t len = packet->l2_5_ofs; + + set_ethertype(packet, ethtype); + if (get_16aligned_be32(&mh->mpls_lse) & htonl(MPLS_BOS_MASK)) { + ofpbuf_set_l2_5(packet, NULL); + } + /* Shift the l2 header forward. */ + memmove((char*)ofpbuf_data(packet) + MPLS_HLEN, ofpbuf_data(packet), len); + ofpbuf_resize_l2_5(packet, -MPLS_HLEN); } } /* Converts hex digits in 'hex' to an Ethernet packet in '*packetp'. The * caller must free '*packetp'. On success, returns NULL. On failure, returns - * an error message and stores NULL in '*packetp'. */ + * an error message and stores NULL in '*packetp'. + * + * Aligns the L3 header of '*packetp' on a 32-bit boundary. */ const char * eth_from_hex(const char *hex, struct ofpbuf **packetp) { struct ofpbuf *packet; - packet = *packetp = ofpbuf_new(strlen(hex) / 2); + /* Use 2 bytes of headroom to 32-bit align the L3 header. */ + packet = *packetp = ofpbuf_new_with_headroom(strlen(hex) / 2, 2); if (ofpbuf_put_hex(packet, hex, NULL)[0] != '\0') { ofpbuf_delete(packet); @@ -139,7 +359,7 @@ eth_from_hex(const char *hex, struct ofpbuf **packetp) return "Trailing garbage in packet data"; } - if (packet->size < ETH_HEADER_LEN) { + if (ofpbuf_size(packet) < ETH_HEADER_LEN) { ofpbuf_delete(packet); *packetp = NULL; return "Packet data too short for Ethernet"; @@ -148,25 +368,48 @@ eth_from_hex(const char *hex, struct ofpbuf **packetp) return NULL; } +void +eth_format_masked(const uint8_t eth[ETH_ADDR_LEN], + const uint8_t mask[ETH_ADDR_LEN], struct ds *s) +{ + ds_put_format(s, ETH_ADDR_FMT, ETH_ADDR_ARGS(eth)); + if (mask && !eth_mask_is_exact(mask)) { + ds_put_format(s, "/"ETH_ADDR_FMT, ETH_ADDR_ARGS(mask)); + } +} + +void +eth_addr_bitand(const uint8_t src[ETH_ADDR_LEN], + const uint8_t mask[ETH_ADDR_LEN], + uint8_t dst[ETH_ADDR_LEN]) +{ + int i; + + for (i = 0; i < ETH_ADDR_LEN; i++) { + dst[i] = src[i] & mask[i]; + } +} + /* Given the IP netmask 'netmask', returns the number of bits of the IP address - * that it specifies, that is, the number of 1-bits in 'netmask'. 'netmask' - * must be a CIDR netmask (see ip_is_cidr()). */ + * that it specifies, that is, the number of 1-bits in 'netmask'. + * + * If 'netmask' is not a CIDR netmask (see ip_is_cidr()), the return value will + * still be in the valid range but isn't otherwise meaningful. */ int ip_count_cidr_bits(ovs_be32 netmask) { - assert(ip_is_cidr(netmask)); - return 32 - ctz(ntohl(netmask)); + return 32 - ctz32(ntohl(netmask)); } void ip_format_masked(ovs_be32 ip, ovs_be32 mask, struct ds *s) { - ds_put_format(s, IP_FMT, IP_ARGS(&ip)); - if (mask != htonl(UINT32_MAX)) { + ds_put_format(s, IP_FMT, IP_ARGS(ip)); + if (mask != OVS_BE32_MAX) { if (ip_is_cidr(mask)) { ds_put_format(s, "/%d", ip_count_cidr_bits(mask)); } else { - ds_put_format(s, "/"IP_FMT, IP_ARGS(&mask)); + ds_put_format(s, "/"IP_FMT, IP_ARGS(mask)); } } } @@ -252,7 +495,10 @@ ipv6_create_mask(int mask) /* Given the IPv6 netmask 'netmask', returns the number of bits of the IPv6 * address that it specifies, that is, the number of 1-bits in 'netmask'. - * 'netmask' must be a CIDR netmask (see ipv6_is_cidr()). */ + * 'netmask' must be a CIDR netmask (see ipv6_is_cidr()). + * + * If 'netmask' is not a CIDR netmask (see ipv6_is_cidr()), the return value + * will still be in the valid range but isn't otherwise meaningful. */ int ipv6_count_cidr_bits(const struct in6_addr *netmask) { @@ -260,8 +506,6 @@ ipv6_count_cidr_bits(const struct in6_addr *netmask) int count = 0; const uint8_t *netmaskp = &netmask->s6_addr[0]; - assert(ipv6_is_cidr(netmask)); - for (i=0; i<16; i++) { if (netmaskp[i] == 0xff) { count += 8; @@ -307,8 +551,9 @@ ipv6_is_cidr(const struct in6_addr *netmask) /* Populates 'b' with an Ethernet II packet headed with the given 'eth_dst', * 'eth_src' and 'eth_type' parameters. A payload of 'size' bytes is allocated * in 'b' and returned. This payload may be populated with appropriate - * information by the caller. Sets 'b''s 'l2' and 'l3' pointers to the - * Ethernet header and payload respectively. + * information by the caller. Sets 'b''s 'frame' pointer and 'l3' offset to + * the Ethernet header and payload respectively. Aligns b->l3 on a 32-bit + * boundary. * * The returned packet has enough headroom to insert an 802.1Q VLAN header if * desired. */ @@ -322,8 +567,10 @@ eth_compose(struct ofpbuf *b, const uint8_t eth_dst[ETH_ADDR_LEN], ofpbuf_clear(b); - ofpbuf_prealloc_tailroom(b, ETH_HEADER_LEN + VLAN_HEADER_LEN + size); - ofpbuf_reserve(b, VLAN_HEADER_LEN); + /* The magic 2 here ensures that the L3 header (when it is added later) + * will be 32-bit aligned. */ + ofpbuf_prealloc_tailroom(b, 2 + ETH_HEADER_LEN + VLAN_HEADER_LEN + size); + ofpbuf_reserve(b, 2 + VLAN_HEADER_LEN); eth = ofpbuf_put_uninit(b, ETH_HEADER_LEN); data = ofpbuf_put_uninit(b, size); @@ -331,76 +578,170 @@ eth_compose(struct ofpbuf *b, const uint8_t eth_dst[ETH_ADDR_LEN], memcpy(eth->eth_src, eth_src, ETH_ADDR_LEN); eth->eth_type = htons(eth_type); - b->l2 = eth; - b->l3 = data; + ofpbuf_set_frame(b, eth); + ofpbuf_set_l3(b, data); return data; } -/* Populates 'b' with an Ethernet LLC+SNAP packet headed with the given - * 'eth_dst', 'eth_src', 'snap_org', and 'snap_type'. A payload of 'size' - * bytes is allocated in 'b' and returned. This payload may be populated with - * appropriate information by the caller. +static void +packet_set_ipv4_addr(struct ofpbuf *packet, + ovs_16aligned_be32 *addr, ovs_be32 new_addr) +{ + struct ip_header *nh = ofpbuf_l3(packet); + ovs_be32 old_addr = get_16aligned_be32(addr); + size_t l4_size = ofpbuf_l4_size(packet); + + if (nh->ip_proto == IPPROTO_TCP && l4_size >= TCP_HEADER_LEN) { + struct tcp_header *th = ofpbuf_l4(packet); + + th->tcp_csum = recalc_csum32(th->tcp_csum, old_addr, new_addr); + } else if (nh->ip_proto == IPPROTO_UDP && l4_size >= UDP_HEADER_LEN ) { + struct udp_header *uh = ofpbuf_l4(packet); + + if (uh->udp_csum) { + uh->udp_csum = recalc_csum32(uh->udp_csum, old_addr, new_addr); + if (!uh->udp_csum) { + uh->udp_csum = htons(0xffff); + } + } + } + nh->ip_csum = recalc_csum32(nh->ip_csum, old_addr, new_addr); + put_16aligned_be32(addr, new_addr); +} + +/* Returns true, if packet contains at least one routing header where + * segements_left > 0. * - * The returned packet has enough headroom to insert an 802.1Q VLAN header if - * desired. */ -void * -snap_compose(struct ofpbuf *b, const uint8_t eth_dst[ETH_ADDR_LEN], - const uint8_t eth_src[ETH_ADDR_LEN], - unsigned int oui, uint16_t snap_type, size_t size) + * This function assumes that L3 and L4 offsets are set in the packet. */ +static bool +packet_rh_present(struct ofpbuf *packet) { - struct eth_header *eth; - struct llc_snap_header *llc_snap; - void *payload; + const struct ovs_16aligned_ip6_hdr *nh; + int nexthdr; + size_t len; + size_t remaining; + uint8_t *data = ofpbuf_l3(packet); - /* Compose basic packet structure. (We need the payload size to stick into - * the 802.2 header.) */ - ofpbuf_clear(b); - ofpbuf_prealloc_tailroom(b, ETH_HEADER_LEN + VLAN_HEADER_LEN - + LLC_SNAP_HEADER_LEN + size); - ofpbuf_reserve(b, VLAN_HEADER_LEN); - eth = ofpbuf_put_zeros(b, ETH_HEADER_LEN); - llc_snap = ofpbuf_put_zeros(b, LLC_SNAP_HEADER_LEN); - payload = ofpbuf_put_uninit(b, size); - - /* Compose 802.2 header. */ - memcpy(eth->eth_dst, eth_dst, ETH_ADDR_LEN); - memcpy(eth->eth_src, eth_src, ETH_ADDR_LEN); - eth->eth_type = htons(b->size - ETH_HEADER_LEN); + remaining = packet->l4_ofs - packet->l3_ofs; - /* Compose LLC, SNAP headers. */ - llc_snap->llc.llc_dsap = LLC_DSAP_SNAP; - llc_snap->llc.llc_ssap = LLC_SSAP_SNAP; - llc_snap->llc.llc_cntl = LLC_CNTL_SNAP; - llc_snap->snap.snap_org[0] = oui >> 16; - llc_snap->snap.snap_org[1] = oui >> 8; - llc_snap->snap.snap_org[2] = oui; - llc_snap->snap.snap_type = htons(snap_type); + if (remaining < sizeof *nh) { + return false; + } + nh = ALIGNED_CAST(struct ovs_16aligned_ip6_hdr *, data); + data += sizeof *nh; + remaining -= sizeof *nh; + nexthdr = nh->ip6_nxt; + + while (1) { + if ((nexthdr != IPPROTO_HOPOPTS) + && (nexthdr != IPPROTO_ROUTING) + && (nexthdr != IPPROTO_DSTOPTS) + && (nexthdr != IPPROTO_AH) + && (nexthdr != IPPROTO_FRAGMENT)) { + /* It's either a terminal header (e.g., TCP, UDP) or one we + * don't understand. In either case, we're done with the + * packet, so use it to fill in 'nw_proto'. */ + break; + } + + /* We only verify that at least 8 bytes of the next header are + * available, but many of these headers are longer. Ensure that + * accesses within the extension header are within those first 8 + * bytes. All extension headers are required to be at least 8 + * bytes. */ + if (remaining < 8) { + return false; + } + + if (nexthdr == IPPROTO_AH) { + /* A standard AH definition isn't available, but the fields + * we care about are in the same location as the generic + * option header--only the header length is calculated + * differently. */ + const struct ip6_ext *ext_hdr = (struct ip6_ext *)data; + + nexthdr = ext_hdr->ip6e_nxt; + len = (ext_hdr->ip6e_len + 2) * 4; + } else if (nexthdr == IPPROTO_FRAGMENT) { + const struct ovs_16aligned_ip6_frag *frag_hdr + = ALIGNED_CAST(struct ovs_16aligned_ip6_frag *, data); + + nexthdr = frag_hdr->ip6f_nxt; + len = sizeof *frag_hdr; + } else if (nexthdr == IPPROTO_ROUTING) { + const struct ip6_rthdr *rh = (struct ip6_rthdr *)data; + + if (rh->ip6r_segleft > 0) { + return true; + } - return payload; + nexthdr = rh->ip6r_nxt; + len = (rh->ip6r_len + 1) * 8; + } else { + const struct ip6_ext *ext_hdr = (struct ip6_ext *)data; + + nexthdr = ext_hdr->ip6e_nxt; + len = (ext_hdr->ip6e_len + 1) * 8; + } + + if (remaining < len) { + return false; + } + remaining -= len; + data += len; + } + + return false; } static void -packet_set_ipv4_addr(struct ofpbuf *packet, ovs_be32 *addr, ovs_be32 new_addr) +packet_update_csum128(struct ofpbuf *packet, uint8_t proto, + ovs_16aligned_be32 addr[4], const ovs_be32 new_addr[4]) { - struct ip_header *nh = packet->l3; + size_t l4_size = ofpbuf_l4_size(packet); - if (nh->ip_proto == IPPROTO_TCP && packet->l7) { - struct tcp_header *th = packet->l4; + if (proto == IPPROTO_TCP && l4_size >= TCP_HEADER_LEN) { + struct tcp_header *th = ofpbuf_l4(packet); - th->tcp_csum = recalc_csum32(th->tcp_csum, *addr, new_addr); - } else if (nh->ip_proto == IPPROTO_UDP && packet->l7) { - struct udp_header *uh = packet->l4; + th->tcp_csum = recalc_csum128(th->tcp_csum, addr, new_addr); + } else if (proto == IPPROTO_UDP && l4_size >= UDP_HEADER_LEN) { + struct udp_header *uh = ofpbuf_l4(packet); if (uh->udp_csum) { - uh->udp_csum = recalc_csum32(uh->udp_csum, *addr, new_addr); + uh->udp_csum = recalc_csum128(uh->udp_csum, addr, new_addr); if (!uh->udp_csum) { uh->udp_csum = htons(0xffff); } } } - nh->ip_csum = recalc_csum32(nh->ip_csum, *addr, new_addr); - *addr = new_addr; +} + +static void +packet_set_ipv6_addr(struct ofpbuf *packet, uint8_t proto, + ovs_16aligned_be32 addr[4], const ovs_be32 new_addr[4], + bool recalculate_csum) +{ + if (recalculate_csum) { + packet_update_csum128(packet, proto, addr, new_addr); + } + memcpy(addr, new_addr, sizeof(ovs_be32[4])); +} + +static void +packet_set_ipv6_flow_label(ovs_16aligned_be32 *flow_label, ovs_be32 flow_key) +{ + ovs_be32 old_label = get_16aligned_be32(flow_label); + ovs_be32 new_label = (old_label & htonl(~IPV6_LABEL_MASK)) | flow_key; + put_16aligned_be32(flow_label, new_label); +} + +static void +packet_set_ipv6_tc(ovs_16aligned_be32 *flow_label, uint8_t tc) +{ + ovs_be32 old_label = get_16aligned_be32(flow_label); + ovs_be32 new_label = (old_label & htonl(0xF00FFFFF)) | htonl(tc << 20); + put_16aligned_be32(flow_label, new_label); } /* Modifies the IPv4 header fields of 'packet' to be consistent with 'src', @@ -411,13 +752,13 @@ void packet_set_ipv4(struct ofpbuf *packet, ovs_be32 src, ovs_be32 dst, uint8_t tos, uint8_t ttl) { - struct ip_header *nh = packet->l3; + struct ip_header *nh = ofpbuf_l3(packet); - if (nh->ip_src != src) { + if (get_16aligned_be32(&nh->ip_src) != src) { packet_set_ipv4_addr(packet, &nh->ip_src, src); } - if (nh->ip_dst != dst) { + if (get_16aligned_be32(&nh->ip_dst) != dst) { packet_set_ipv4_addr(packet, &nh->ip_dst, dst); } @@ -438,6 +779,33 @@ packet_set_ipv4(struct ofpbuf *packet, ovs_be32 src, ovs_be32 dst, } } +/* Modifies the IPv6 header fields of 'packet' to be consistent with 'src', + * 'dst', 'traffic class', and 'next hop'. Updates 'packet''s L4 checksums as + * appropriate. 'packet' must contain a valid IPv6 packet with correctly + * populated l[34] offsets. */ +void +packet_set_ipv6(struct ofpbuf *packet, uint8_t proto, const ovs_be32 src[4], + const ovs_be32 dst[4], uint8_t key_tc, ovs_be32 key_fl, + uint8_t key_hl) +{ + struct ovs_16aligned_ip6_hdr *nh = ofpbuf_l3(packet); + + if (memcmp(&nh->ip6_src, src, sizeof(ovs_be32[4]))) { + packet_set_ipv6_addr(packet, proto, nh->ip6_src.be32, src, true); + } + + if (memcmp(&nh->ip6_dst, dst, sizeof(ovs_be32[4]))) { + packet_set_ipv6_addr(packet, proto, nh->ip6_dst.be32, dst, + !packet_rh_present(packet)); + } + + packet_set_ipv6_tc(&nh->ip6_flow, key_tc); + + packet_set_ipv6_flow_label(&nh->ip6_flow, key_fl); + + nh->ip6_hlim = key_hl; +} + static void packet_set_port(ovs_be16 *port, ovs_be16 new_port, ovs_be16 *csum) { @@ -449,11 +817,11 @@ packet_set_port(ovs_be16 *port, ovs_be16 new_port, ovs_be16 *csum) /* Sets the TCP source and destination port ('src' and 'dst' respectively) of * the TCP header contained in 'packet'. 'packet' must be a valid TCP packet - * with its l4 marker properly populated. */ + * with its l4 offset properly populated. */ void packet_set_tcp_port(struct ofpbuf *packet, ovs_be16 src, ovs_be16 dst) { - struct tcp_header *th = packet->l4; + struct tcp_header *th = ofpbuf_l4(packet); packet_set_port(&th->tcp_src, src, &th->tcp_csum); packet_set_port(&th->tcp_dst, dst, &th->tcp_csum); @@ -461,11 +829,11 @@ packet_set_tcp_port(struct ofpbuf *packet, ovs_be16 src, ovs_be16 dst) /* Sets the UDP source and destination port ('src' and 'dst' respectively) of * the UDP header contained in 'packet'. 'packet' must be a valid UDP packet - * with its l4 marker properly populated. */ + * with its l4 offset properly populated. */ void packet_set_udp_port(struct ofpbuf *packet, ovs_be16 src, ovs_be16 dst) { - struct udp_header *uh = packet->l4; + struct udp_header *uh = ofpbuf_l4(packet); if (uh->udp_csum) { packet_set_port(&uh->udp_src, src, &uh->udp_csum); @@ -480,28 +848,65 @@ packet_set_udp_port(struct ofpbuf *packet, ovs_be16 src, ovs_be16 dst) } } -/* If 'packet' is a TCP packet, returns the TCP flags. Otherwise, returns 0. - * - * 'flow' must be the flow corresponding to 'packet' and 'packet''s header - * pointers must be properly initialized (e.g. with flow_extract()). */ -uint8_t -packet_get_tcp_flags(const struct ofpbuf *packet, const struct flow *flow) -{ - if ((flow->dl_type == htons(ETH_TYPE_IP) || - flow->dl_type == htons(ETH_TYPE_IPV6)) && - flow->nw_proto == IPPROTO_TCP && packet->l7) { - const struct tcp_header *tcp = packet->l4; - return TCP_FLAGS(tcp->tcp_ctl); - } else { - return 0; +/* Sets the SCTP source and destination port ('src' and 'dst' respectively) of + * the SCTP header contained in 'packet'. 'packet' must be a valid SCTP packet + * with its l4 offset properly populated. */ +void +packet_set_sctp_port(struct ofpbuf *packet, ovs_be16 src, ovs_be16 dst) +{ + struct sctp_header *sh = ofpbuf_l4(packet); + ovs_be32 old_csum, old_correct_csum, new_csum; + uint16_t tp_len = ofpbuf_l4_size(packet); + + old_csum = get_16aligned_be32(&sh->sctp_csum); + put_16aligned_be32(&sh->sctp_csum, 0); + old_correct_csum = crc32c((void *)sh, tp_len); + + sh->sctp_src = src; + sh->sctp_dst = dst; + + new_csum = crc32c((void *)sh, tp_len); + put_16aligned_be32(&sh->sctp_csum, old_csum ^ old_correct_csum ^ new_csum); +} + +const char * +packet_tcp_flag_to_string(uint32_t flag) +{ + switch (flag) { + case TCP_FIN: + return "fin"; + case TCP_SYN: + return "syn"; + case TCP_RST: + return "rst"; + case TCP_PSH: + return "psh"; + case TCP_ACK: + return "ack"; + case TCP_URG: + return "urg"; + case TCP_ECE: + return "ece"; + case TCP_CWR: + return "cwr"; + case TCP_NS: + return "ns"; + case 0x200: + return "[200]"; + case 0x400: + return "[400]"; + case 0x800: + return "[800]"; + default: + return NULL; } } /* Appends a string representation of the TCP flags value 'tcp_flags' - * (e.g. obtained via packet_get_tcp_flags() or TCP_FLAGS) to 's', in the + * (e.g. from struct flow.tcp_flags or obtained via TCP_FLAGS) to 's', in the * format used by tcpdump. */ void -packet_format_tcp_flags(struct ds *s, uint8_t tcp_flags) +packet_format_tcp_flags(struct ds *s, uint16_t tcp_flags) { if (!tcp_flags) { ds_put_cstr(s, "none"); @@ -526,10 +931,22 @@ packet_format_tcp_flags(struct ds *s, uint8_t tcp_flags) if (tcp_flags & TCP_ACK) { ds_put_char(s, '.'); } - if (tcp_flags & 0x40) { - ds_put_cstr(s, "[40]"); + if (tcp_flags & TCP_ECE) { + ds_put_cstr(s, "E"); + } + if (tcp_flags & TCP_CWR) { + ds_put_cstr(s, "C"); + } + if (tcp_flags & TCP_NS) { + ds_put_cstr(s, "N"); + } + if (tcp_flags & 0x200) { + ds_put_cstr(s, "[200]"); + } + if (tcp_flags & 0x400) { + ds_put_cstr(s, "[400]"); } - if (tcp_flags & 0x80) { - ds_put_cstr(s, "[80]"); + if (tcp_flags & 0x800) { + ds_put_cstr(s, "[800]"); } }