X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=lib%2Fpackets.c;h=b95e1e010519f91677fe30c4bf622ba594f633b3;hb=9fbe253e0864ea7cf694335ff1f0fa10b5c04f35;hp=16f4fe63afe254f93e2678c4771013c9b7ac3566;hpb=7cb57d10a9b1cd09866b2755e2c5db757e1b1dfc;p=sliver-openvswitch.git diff --git a/lib/packets.c b/lib/packets.c index 16f4fe63a..b95e1e010 100644 --- a/lib/packets.c +++ b/lib/packets.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009, 2010, 2011, 2012 Nicira, Inc. + * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,10 +16,10 @@ #include #include "packets.h" -#include #include #include #include +#include #include #include "byte-order.h" #include "csum.h" @@ -27,6 +27,7 @@ #include "hmap.h" #include "dynamic-string.h" #include "ofpbuf.h" +#include "ovs-thread.h" const struct in6_addr in6addr_exact = IN6ADDR_EXACT_INIT; @@ -54,27 +55,27 @@ eth_addr_is_reserved(const uint8_t ea[ETH_ADDR_LEN]) { struct eth_addr_node { struct hmap_node hmap_node; - uint64_t ea64; + const uint64_t ea64; }; static struct eth_addr_node nodes[] = { /* STP, IEEE pause frames, and other reserved protocols. */ - { HMAP_NODE_NULL_INITIALIZER, 0x0108c2000000ULL }, - { HMAP_NODE_NULL_INITIALIZER, 0x0108c2000001ULL }, - { HMAP_NODE_NULL_INITIALIZER, 0x0108c2000002ULL }, - { HMAP_NODE_NULL_INITIALIZER, 0x0108c2000003ULL }, - { HMAP_NODE_NULL_INITIALIZER, 0x0108c2000004ULL }, - { HMAP_NODE_NULL_INITIALIZER, 0x0108c2000005ULL }, - { HMAP_NODE_NULL_INITIALIZER, 0x0108c2000006ULL }, - { HMAP_NODE_NULL_INITIALIZER, 0x0108c2000007ULL }, - { HMAP_NODE_NULL_INITIALIZER, 0x0108c2000008ULL }, - { HMAP_NODE_NULL_INITIALIZER, 0x0108c2000009ULL }, - { HMAP_NODE_NULL_INITIALIZER, 0x0108c200000aULL }, - { HMAP_NODE_NULL_INITIALIZER, 0x0108c200000bULL }, - { HMAP_NODE_NULL_INITIALIZER, 0x0108c200000cULL }, - { HMAP_NODE_NULL_INITIALIZER, 0x0108c200000dULL }, - { HMAP_NODE_NULL_INITIALIZER, 0x0108c200000eULL }, - { HMAP_NODE_NULL_INITIALIZER, 0x0108c200000fULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000000ULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000001ULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000002ULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000003ULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000004ULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000005ULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000006ULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000007ULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000008ULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000009ULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x0180c200000aULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x0180c200000bULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x0180c200000cULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x0180c200000dULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x0180c200000eULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x0180c200000fULL }, /* Extreme protocols. */ { HMAP_NODE_NULL_INITIALIZER, 0x00e02b000000ULL }, /* EDP. */ @@ -100,15 +101,18 @@ eth_addr_is_reserved(const uint8_t ea[ETH_ADDR_LEN]) { HMAP_NODE_NULL_INITIALIZER, 0x01000cccccc7ULL }, }; - static struct hmap addrs = HMAP_INITIALIZER(&addrs); + static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; struct eth_addr_node *node; + static struct hmap addrs; uint64_t ea64; - if (hmap_is_empty(&addrs)) { + if (ovsthread_once_start(&once)) { + hmap_init(&addrs); for (node = nodes; node < &nodes[ARRAY_SIZE(nodes)]; node++) { hmap_insert(&addrs, &node->hmap_node, hash_2words(node->ea64, node->ea64 >> 32)); } + ovsthread_once_done(&once); } ea64 = eth_addr_to_uint64(ea); @@ -192,7 +196,8 @@ eth_push_vlan(struct ofpbuf *packet, ovs_be16 tci) /* Removes outermost VLAN header (if any is present) from 'packet'. * - * 'packet->l2' must initially point to 'packet''s Ethernet header. */ + * 'packet->l2_5' should initially point to 'packet''s outer-most MPLS header + * or may be NULL if there are no MPLS headers. */ void eth_pop_vlan(struct ofpbuf *packet) { @@ -211,6 +216,182 @@ eth_pop_vlan(struct ofpbuf *packet) } } +/* Return depth of mpls stack. + * + * 'packet->l2_5' should initially point to 'packet''s outer-most MPLS header + * or may be NULL if there are no MPLS headers. */ +uint16_t +eth_mpls_depth(const struct ofpbuf *packet) +{ + struct mpls_hdr *mh = packet->l2_5; + uint16_t depth; + + if (!mh) { + return 0; + } + + depth = 0; + while (packet->size >= ((char *)mh - (char *)packet->data) + sizeof *mh) { + depth++; + if (mh->mpls_lse & htonl(MPLS_BOS_MASK)) { + break; + } + mh++; + } + + return depth; +} + +/* Set ethertype of the packet. */ +void +set_ethertype(struct ofpbuf *packet, ovs_be16 eth_type) +{ + struct eth_header *eh = packet->data; + + if (eh->eth_type == htons(ETH_TYPE_VLAN)) { + ovs_be16 *p; + p = ALIGNED_CAST(ovs_be16 *, + (char *)(packet->l2_5 ? packet->l2_5 : packet->l3) - 2); + *p = eth_type; + } else { + eh->eth_type = eth_type; + } +} + +static bool is_mpls(struct ofpbuf *packet) +{ + return packet->l2_5 != NULL; +} + +/* Set time to live (TTL) of an MPLS label stack entry (LSE). */ +void +set_mpls_lse_ttl(ovs_be32 *lse, uint8_t ttl) +{ + *lse &= ~htonl(MPLS_TTL_MASK); + *lse |= htonl((ttl << MPLS_TTL_SHIFT) & MPLS_TTL_MASK); +} + +/* Set traffic class (TC) of an MPLS label stack entry (LSE). */ +void +set_mpls_lse_tc(ovs_be32 *lse, uint8_t tc) +{ + *lse &= ~htonl(MPLS_TC_MASK); + *lse |= htonl((tc << MPLS_TC_SHIFT) & MPLS_TC_MASK); +} + +/* Set label of an MPLS label stack entry (LSE). */ +void +set_mpls_lse_label(ovs_be32 *lse, ovs_be32 label) +{ + *lse &= ~htonl(MPLS_LABEL_MASK); + *lse |= htonl((ntohl(label) << MPLS_LABEL_SHIFT) & MPLS_LABEL_MASK); +} + +/* Set bottom of stack (BoS) bit of an MPLS label stack entry (LSE). */ +void +set_mpls_lse_bos(ovs_be32 *lse, uint8_t bos) +{ + *lse &= ~htonl(MPLS_BOS_MASK); + *lse |= htonl((bos << MPLS_BOS_SHIFT) & MPLS_BOS_MASK); +} + +/* Compose an MPLS label stack entry (LSE) from its components: + * label, traffic class (TC), time to live (TTL) and + * bottom of stack (BoS) bit. */ +ovs_be32 +set_mpls_lse_values(uint8_t ttl, uint8_t tc, uint8_t bos, ovs_be32 label) +{ + ovs_be32 lse = htonl(0); + set_mpls_lse_ttl(&lse, ttl); + set_mpls_lse_tc(&lse, tc); + set_mpls_lse_bos(&lse, bos); + set_mpls_lse_label(&lse, label); + return lse; +} + +/* Push an new MPLS stack entry onto the MPLS stack and adjust 'packet->l2' and + * 'packet->l2_5' accordingly. The new entry will be the outermost entry on + * the stack. + * + * Previous to calling this function, 'packet->l2_5' must be set; if the MPLS + * label to be pushed will be the first label in 'packet', then it should be + * the same as 'packet->l3'. */ +static void +push_mpls_lse(struct ofpbuf *packet, struct mpls_hdr *mh) +{ + char * header; + size_t len; + header = ofpbuf_push_uninit(packet, MPLS_HLEN); + len = (char *)packet->l2_5 - (char *)packet->l2; + memmove(header, packet->l2, len); + memcpy(header + len, mh, sizeof *mh); + packet->l2 = (char*)packet->l2 - MPLS_HLEN; + packet->l2_5 = (char*)packet->l2_5 - MPLS_HLEN; +} + +/* Set MPLS label stack entry to outermost MPLS header.*/ +void +set_mpls_lse(struct ofpbuf *packet, ovs_be32 mpls_lse) +{ + struct mpls_hdr *mh = packet->l2_5; + + /* Packet type should be MPLS to set label stack entry. */ + if (is_mpls(packet)) { + /* Update mpls label stack entry. */ + mh->mpls_lse = mpls_lse; + } +} + +/* Push MPLS label stack entry 'lse' onto 'packet' as the the outermost MPLS + * header. If 'packet' does not already have any MPLS labels, then its + * Ethertype is changed to 'ethtype' (which must be an MPLS Ethertype). */ +void +push_mpls(struct ofpbuf *packet, ovs_be16 ethtype, ovs_be32 lse) +{ + struct mpls_hdr mh; + + if (!eth_type_mpls(ethtype)) { + return; + } + + if (!is_mpls(packet)) { + /* Set ethtype and MPLS label stack entry. */ + set_ethertype(packet, ethtype); + packet->l2_5 = packet->l3; + } + + /* Push new MPLS shim header onto packet. */ + mh.mpls_lse = lse; + push_mpls_lse(packet, &mh); +} + +/* If 'packet' is an MPLS packet, removes its outermost MPLS label stack entry. + * If the label that was removed was the only MPLS label, changes 'packet''s + * Ethertype to 'ethtype' (which ordinarily should not be an MPLS + * Ethertype). */ +void +pop_mpls(struct ofpbuf *packet, ovs_be16 ethtype) +{ + struct mpls_hdr *mh = NULL; + + if (is_mpls(packet)) { + size_t len; + mh = packet->l2_5; + len = (char*)packet->l2_5 - (char*)packet->l2; + set_ethertype(packet, ethtype); + if (mh->mpls_lse & htonl(MPLS_BOS_MASK)) { + packet->l2_5 = NULL; + } else { + packet->l2_5 = (char*)packet->l2_5 + MPLS_HLEN; + } + /* Shift the l2 header forward. */ + memmove((char*)packet->data + MPLS_HLEN, packet->data, len); + packet->size -= MPLS_HLEN; + packet->data = (char*)packet->data + MPLS_HLEN; + packet->l2 = (char*)packet->l2 + MPLS_HLEN; + } +} + /* Converts hex digits in 'hex' to an Ethernet packet in '*packetp'. The * caller must free '*packetp'. On success, returns NULL. On failure, returns * an error message and stores NULL in '*packetp'. */ @@ -272,12 +453,12 @@ ip_count_cidr_bits(ovs_be32 netmask) void ip_format_masked(ovs_be32 ip, ovs_be32 mask, struct ds *s) { - ds_put_format(s, IP_FMT, IP_ARGS(&ip)); + ds_put_format(s, IP_FMT, IP_ARGS(ip)); if (mask != htonl(UINT32_MAX)) { if (ip_is_cidr(mask)) { ds_put_format(s, "/%d", ip_count_cidr_bits(mask)); } else { - ds_put_format(s, "/"IP_FMT, IP_ARGS(&mask)); + ds_put_format(s, "/"IP_FMT, IP_ARGS(mask)); } } } @@ -472,6 +653,134 @@ packet_set_ipv4_addr(struct ofpbuf *packet, ovs_be32 *addr, ovs_be32 new_addr) *addr = new_addr; } +/* Returns true, if packet contains at least one routing header where + * segements_left > 0. + * + * This function assumes that L3 and L4 markers are set in the packet. */ +static bool +packet_rh_present(struct ofpbuf *packet) +{ + const struct ip6_hdr *nh; + int nexthdr; + size_t len; + size_t remaining; + uint8_t *data = packet->l3; + + remaining = (uint8_t *)packet->l4 - (uint8_t *)packet->l3; + + if (remaining < sizeof *nh) { + return false; + } + nh = ALIGNED_CAST(struct ip6_hdr *, data); + data += sizeof *nh; + remaining -= sizeof *nh; + nexthdr = nh->ip6_nxt; + + while (1) { + if ((nexthdr != IPPROTO_HOPOPTS) + && (nexthdr != IPPROTO_ROUTING) + && (nexthdr != IPPROTO_DSTOPTS) + && (nexthdr != IPPROTO_AH) + && (nexthdr != IPPROTO_FRAGMENT)) { + /* It's either a terminal header (e.g., TCP, UDP) or one we + * don't understand. In either case, we're done with the + * packet, so use it to fill in 'nw_proto'. */ + break; + } + + /* We only verify that at least 8 bytes of the next header are + * available, but many of these headers are longer. Ensure that + * accesses within the extension header are within those first 8 + * bytes. All extension headers are required to be at least 8 + * bytes. */ + if (remaining < 8) { + return false; + } + + if (nexthdr == IPPROTO_AH) { + /* A standard AH definition isn't available, but the fields + * we care about are in the same location as the generic + * option header--only the header length is calculated + * differently. */ + const struct ip6_ext *ext_hdr = (struct ip6_ext *)data; + + nexthdr = ext_hdr->ip6e_nxt; + len = (ext_hdr->ip6e_len + 2) * 4; + } else if (nexthdr == IPPROTO_FRAGMENT) { + const struct ip6_frag *frag_hdr = ALIGNED_CAST(struct ip6_frag *, + data); + + nexthdr = frag_hdr->ip6f_nxt; + len = sizeof *frag_hdr; + } else if (nexthdr == IPPROTO_ROUTING) { + const struct ip6_rthdr *rh = (struct ip6_rthdr *)data; + + if (rh->ip6r_segleft > 0) { + return true; + } + + nexthdr = rh->ip6r_nxt; + len = (rh->ip6r_len + 1) * 8; + } else { + const struct ip6_ext *ext_hdr = (struct ip6_ext *)data; + + nexthdr = ext_hdr->ip6e_nxt; + len = (ext_hdr->ip6e_len + 1) * 8; + } + + if (remaining < len) { + return false; + } + remaining -= len; + data += len; + } + + return false; +} + +static void +packet_update_csum128(struct ofpbuf *packet, uint8_t proto, + ovs_be32 addr[4], const ovs_be32 new_addr[4]) +{ + if (proto == IPPROTO_TCP && packet->l7) { + struct tcp_header *th = packet->l4; + + th->tcp_csum = recalc_csum128(th->tcp_csum, addr, new_addr); + } else if (proto == IPPROTO_UDP && packet->l7) { + struct udp_header *uh = packet->l4; + + if (uh->udp_csum) { + uh->udp_csum = recalc_csum128(uh->udp_csum, addr, new_addr); + if (!uh->udp_csum) { + uh->udp_csum = htons(0xffff); + } + } + } +} + +static void +packet_set_ipv6_addr(struct ofpbuf *packet, uint8_t proto, + struct in6_addr *addr, const ovs_be32 new_addr[4], + bool recalculate_csum) +{ + if (recalculate_csum) { + packet_update_csum128(packet, proto, (ovs_be32 *)addr, new_addr); + } + memcpy(addr, new_addr, sizeof(*addr)); +} + +static void +packet_set_ipv6_flow_label(ovs_be32 *flow_label, ovs_be32 flow_key) +{ + *flow_label = (*flow_label & htonl(~IPV6_LABEL_MASK)) | flow_key; +} + +static void +packet_set_ipv6_tc(ovs_be32 *flow_label, uint8_t tc) +{ + *flow_label = (*flow_label & htonl(0xF00FFFFF)) | htonl(tc << 20); +} + /* Modifies the IPv4 header fields of 'packet' to be consistent with 'src', * 'dst', 'tos', and 'ttl'. Updates 'packet''s L4 checksums as appropriate. * 'packet' must contain a valid IPv4 packet with correctly populated l[347] @@ -507,6 +816,33 @@ packet_set_ipv4(struct ofpbuf *packet, ovs_be32 src, ovs_be32 dst, } } +/* Modifies the IPv6 header fields of 'packet' to be consistent with 'src', + * 'dst', 'traffic class', and 'next hop'. Updates 'packet''s L4 checksums as + * appropriate. 'packet' must contain a valid IPv6 packet with correctly + * populated l[347] markers. */ +void +packet_set_ipv6(struct ofpbuf *packet, uint8_t proto, const ovs_be32 src[4], + const ovs_be32 dst[4], uint8_t key_tc, ovs_be32 key_fl, + uint8_t key_hl) +{ + struct ip6_hdr *nh = packet->l3; + + if (memcmp(&nh->ip6_src, src, sizeof(ovs_be32[4]))) { + packet_set_ipv6_addr(packet, proto, &nh->ip6_src, src, true); + } + + if (memcmp(&nh->ip6_dst, dst, sizeof(ovs_be32[4]))) { + packet_set_ipv6_addr(packet, proto, &nh->ip6_dst, dst, + !packet_rh_present(packet)); + } + + packet_set_ipv6_tc(&nh->ip6_flow, key_tc); + + packet_set_ipv6_flow_label(&nh->ip6_flow, key_fl); + + nh->ip6_hlim = key_hl; +} + static void packet_set_port(ovs_be16 *port, ovs_be16 new_port, ovs_be16 *csum) { @@ -556,8 +892,7 @@ packet_set_udp_port(struct ofpbuf *packet, ovs_be16 src, ovs_be16 dst) uint8_t packet_get_tcp_flags(const struct ofpbuf *packet, const struct flow *flow) { - if ((flow->dl_type == htons(ETH_TYPE_IP) || - flow->dl_type == htons(ETH_TYPE_IPV6)) && + if (dl_type_is_ip_any(flow->dl_type) && flow->nw_proto == IPPROTO_TCP && packet->l7) { const struct tcp_header *tcp = packet->l4; return TCP_FLAGS(tcp->tcp_ctl);