X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=datapath%2Fflow.c;h=152d9be749725f967eae0503d77a739a2252488d;hb=eba9291a25a9d38f3382510de52e619a65b2f711;hp=95297a52ce5953493a21408adab503763ebd5089;hpb=515c382dafb3cc0f0499f477eddc0566602210cf;p=sliver-openvswitch.git diff --git a/datapath/flow.c b/datapath/flow.c index 95297a52c..152d9be74 100644 --- a/datapath/flow.c +++ b/datapath/flow.c @@ -8,7 +8,7 @@ #include "flow.h" #include "datapath.h" -#include +#include #include #include #include @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include @@ -49,13 +48,13 @@ static int check_header(struct sk_buff *skb, int len) return 0; } -static inline bool arphdr_ok(struct sk_buff *skb) +static bool arphdr_ok(struct sk_buff *skb) { return pskb_may_pull(skb, skb_network_offset(skb) + sizeof(struct arp_eth_header)); } -static inline int check_iphdr(struct sk_buff *skb) +static int check_iphdr(struct sk_buff *skb) { unsigned int nh_ofs = skb_network_offset(skb); unsigned int ip_len; @@ -74,7 +73,7 @@ static inline int check_iphdr(struct sk_buff *skb) return 0; } -static inline bool tcphdr_ok(struct sk_buff *skb) +static bool tcphdr_ok(struct sk_buff *skb) { int th_ofs = skb_transport_offset(skb); int tcp_len; @@ -90,13 +89,13 @@ static inline bool tcphdr_ok(struct sk_buff *skb) return true; } -static inline bool udphdr_ok(struct sk_buff *skb) +static bool udphdr_ok(struct sk_buff *skb) { return pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct udphdr)); } -static inline bool icmphdr_ok(struct sk_buff *skb) +static bool icmphdr_ok(struct sk_buff *skb) { return pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct icmphdr)); @@ -116,8 +115,8 @@ u64 flow_used_time(unsigned long flow_jiffies) } #define SW_FLOW_KEY_OFFSET(field) \ - offsetof(struct sw_flow_key, field) + \ - FIELD_SIZEOF(struct sw_flow_key, field) + (offsetof(struct sw_flow_key, field) + \ + FIELD_SIZEOF(struct sw_flow_key, field)) /** * skip_exthdr - skip any IPv6 extension headers @@ -126,10 +125,10 @@ u64 flow_used_time(unsigned long flow_jiffies) * @nexthdrp: Initially, points to the type of the extension header at @start. * This function updates it to point to the extension header at the final * offset. - * @tos_frag: Points to the @tos_frag member in a &struct sw_flow_key. This + * @frag: Points to the @frag member in a &struct sw_flow_key. This * function sets an appropriate %OVS_FRAG_TYPE_* value. * - * This is based on ipv6_skip_exthdr() but adds the updates to *@tos_frag. + * This is based on ipv6_skip_exthdr() but adds the updates to *@frag. * * When there is more than one fragment header, this version reports whether * the final fragment header that it examines is a first fragment. @@ -137,7 +136,7 @@ u64 flow_used_time(unsigned long flow_jiffies) * Returns the final payload offset, or -1 on error. */ static int skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp, - u8 *tos_frag) + u8 *frag) { u8 nexthdr = *nexthdrp; @@ -160,12 +159,11 @@ static int skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp, if (fp == NULL) return -1; - *tos_frag &= ~OVS_FRAG_TYPE_MASK; if (ntohs(*fp) & ~0x7) { - *tos_frag |= OVS_FRAG_TYPE_LATER; + *frag = OVS_FRAG_TYPE_LATER; break; } - *tos_frag |= OVS_FRAG_TYPE_FIRST; + *frag = OVS_FRAG_TYPE_FIRST; hdrlen = 8; } else if (nexthdr == NEXTHDR_AUTH) hdrlen = (hp->hdrlen+2)<<2; @@ -190,7 +188,7 @@ static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key, uint8_t nexthdr; int err; - *key_lenp = SW_FLOW_KEY_OFFSET(ipv6.addr); + *key_lenp = SW_FLOW_KEY_OFFSET(ipv6.label); err = check_header(skb, nh_ofs + sizeof(*nh)); if (unlikely(err)) @@ -201,11 +199,13 @@ static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key, payload_ofs = (u8 *)(nh + 1) - skb->data; key->ip.proto = NEXTHDR_NONE; - key->ip.tos_frag = ipv6_get_dsfield(nh) & ~INET_ECN_MASK; + key->ip.tos = ipv6_get_dsfield(nh); + key->ip.ttl = nh->hop_limit; + key->ipv6.label = *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL); ipv6_addr_copy(&key->ipv6.addr.src, &nh->saddr); ipv6_addr_copy(&key->ipv6.addr.dst, &nh->daddr); - payload_ofs = skip_exthdr(skb, payload_ofs, &nexthdr, &key->ip.tos_frag); + payload_ofs = skip_exthdr(skb, payload_ofs, &nexthdr, &key->ip.frag); if (unlikely(payload_ofs < 0)) return -EINVAL; @@ -286,7 +286,7 @@ static struct hlist_head __rcu *find_bucket(struct flow_table * table, u32 hash) static struct flex_array __rcu *alloc_buckets(unsigned int n_buckets) { - struct flex_array __rcu * buckets; + struct flex_array __rcu *buckets; int i, err; buckets = flex_array_alloc(sizeof(struct hlist_head *), @@ -307,7 +307,7 @@ static struct flex_array __rcu *alloc_buckets(unsigned int n_buckets) return buckets; } -static void free_buckets(struct flex_array * buckets) +static void free_buckets(struct flex_array *buckets) { flex_array_free(buckets); } @@ -368,10 +368,10 @@ static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu) void flow_tbl_deferred_destroy(struct flow_table *table) { - if (!table) - return; + if (!table) + return; - call_rcu(&table->rcu, flow_tbl_destroy_rcu_cb); + call_rcu(&table->rcu, flow_tbl_destroy_rcu_cb); } struct sw_flow *flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *last) @@ -565,7 +565,8 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key, icmp_len -= sizeof(*nd); offset = 0; while (icmp_len >= 8) { - struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)(nd->opt + offset); + struct nd_opt_hdr *nd_opt = + (struct nd_opt_hdr *)(nd->opt + offset); int opt_len = nd_opt->nd_opt_len * 8; if (unlikely(!opt_len || opt_len > icmp_len)) @@ -688,16 +689,17 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key, key->ipv4.addr.dst = nh->daddr; key->ip.proto = nh->protocol; - key->ip.tos_frag = nh->tos & ~INET_ECN_MASK; + key->ip.tos = nh->tos; + key->ip.ttl = nh->ttl; offset = nh->frag_off & htons(IP_OFFSET); if (offset) { - key->ip.tos_frag |= OVS_FRAG_TYPE_LATER; + key->ip.frag = OVS_FRAG_TYPE_LATER; goto out; } if (nh->frag_off & htons(IP_MF) || skb_shinfo(skb)->gso_type & SKB_GSO_UDP) - key->ip.tos_frag |= OVS_FRAG_TYPE_FIRST; + key->ip.frag = OVS_FRAG_TYPE_FIRST; /* Transport layer. */ if (key->ip.proto == IPPROTO_TCP) { @@ -719,8 +721,8 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key, if (icmphdr_ok(skb)) { struct icmphdr *icmp = icmp_hdr(skb); /* The ICMP type and code fields use the 16-bit - * transport port fields, so we need to store them - * in 16-bit network byte order. */ + * transport port fields, so we need to store + * them in 16-bit network byte order. */ key->ipv4.tp.src = htons(icmp->type); key->ipv4.tp.dst = htons(icmp->code); } @@ -761,10 +763,10 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key, goto out; } - if ((key->ip.tos_frag & OVS_FRAG_TYPE_MASK) == OVS_FRAG_TYPE_LATER) + if (key->ip.frag == OVS_FRAG_TYPE_LATER) goto out; if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP) - key->ip.tos_frag |= OVS_FRAG_TYPE_FIRST; + key->ip.frag = OVS_FRAG_TYPE_FIRST; /* Transport layer. */ if (key->ip.proto == NEXTHDR_TCP) { @@ -798,10 +800,10 @@ out: u32 flow_hash(const struct sw_flow_key *key, int key_len) { - return jhash2((u32*)key, DIV_ROUND_UP(key_len, sizeof(u32)), hash_seed); + return jhash2((u32 *)key, DIV_ROUND_UP(key_len, sizeof(u32)), hash_seed); } -struct sw_flow * flow_tbl_lookup(struct flow_table *table, +struct sw_flow *flow_tbl_lookup(struct flow_table *table, struct sw_flow_key *key, int key_len) { struct sw_flow *flow; @@ -840,15 +842,6 @@ void flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) } } -static int parse_tos_frag(struct sw_flow_key *swkey, u8 tos, u8 frag) -{ - if (tos & INET_ECN_MASK || frag > OVS_FRAG_TYPE_MAX) - return -EINVAL; - - swkey->ip.tos_frag = tos | frag; - return 0; -} - /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ const u32 ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { [OVS_KEY_ATTR_PRIORITY] = 4, @@ -887,7 +880,6 @@ int flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, const struct nlattr *attr) { int error = 0; - enum ovs_frag_type frag_type; const struct nlattr *nla; u16 prev_type; int rem; @@ -911,9 +903,10 @@ int flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, const struct ovs_key_arp *arp_key; const struct ovs_key_nd *nd_key; - int type = nla_type(nla); + int type = nla_type(nla); - if (type > OVS_KEY_ATTR_MAX || nla_len(nla) != ovs_key_lens[type]) + if (type > OVS_KEY_ATTR_MAX || + nla_len(nla) != ovs_key_lens[type]) goto invalid; #define TRANSITION(PREV_TYPE, TYPE) (((PREV_TYPE) << 16) | (TYPE)) @@ -966,23 +959,28 @@ int flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, if (swkey->eth.type != htons(ETH_P_IP)) goto invalid; ipv4_key = nla_data(nla); - swkey->ip.proto = ipv4_key->ipv4_proto; - if (parse_tos_frag(swkey, ipv4_key->ipv4_tos, - ipv4_key->ipv4_frag)) + if (ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) goto invalid; + swkey->ip.proto = ipv4_key->ipv4_proto; + swkey->ip.tos = ipv4_key->ipv4_tos; + swkey->ip.ttl = ipv4_key->ipv4_ttl; + swkey->ip.frag = ipv4_key->ipv4_frag; swkey->ipv4.addr.src = ipv4_key->ipv4_src; swkey->ipv4.addr.dst = ipv4_key->ipv4_dst; break; case TRANSITION(OVS_KEY_ATTR_ETHERTYPE, OVS_KEY_ATTR_IPV6): - key_len = SW_FLOW_KEY_OFFSET(ipv6.addr); + key_len = SW_FLOW_KEY_OFFSET(ipv6.label); if (swkey->eth.type != htons(ETH_P_IPV6)) goto invalid; ipv6_key = nla_data(nla); - swkey->ip.proto = ipv6_key->ipv6_proto; - if (parse_tos_frag(swkey, ipv6_key->ipv6_tos, - ipv6_key->ipv6_frag)) + if (ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) goto invalid; + swkey->ipv6.label = ipv6_key->ipv6_label; + swkey->ip.proto = ipv6_key->ipv6_proto; + swkey->ip.tos = ipv6_key->ipv6_tclass; + swkey->ip.ttl = ipv6_key->ipv6_hlimit; + swkey->ip.frag = ipv6_key->ipv6_frag; memcpy(&swkey->ipv6.addr.src, ipv6_key->ipv6_src, sizeof(swkey->ipv6.addr.src)); memcpy(&swkey->ipv6.addr.dst, ipv6_key->ipv6_dst, @@ -1078,7 +1076,6 @@ int flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, if (rem) goto invalid; - frag_type = swkey->ip.tos_frag & OVS_FRAG_TYPE_MASK; switch (prev_type) { case OVS_KEY_ATTR_UNSPEC: goto invalid; @@ -1100,7 +1097,7 @@ int flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, goto ok; case OVS_KEY_ATTR_IPV4: - if (frag_type == OVS_FRAG_TYPE_LATER) + if (swkey->ip.frag == OVS_FRAG_TYPE_LATER) goto ok; if (swkey->ip.proto == IPPROTO_TCP || swkey->ip.proto == IPPROTO_UDP || @@ -1109,7 +1106,7 @@ int flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, goto ok; case OVS_KEY_ATTR_IPV6: - if (frag_type == OVS_FRAG_TYPE_LATER) + if (swkey->ip.frag == OVS_FRAG_TYPE_LATER) goto ok; if (swkey->ip.proto == IPPROTO_TCP || swkey->ip.proto == IPPROTO_UDP || @@ -1120,7 +1117,7 @@ int flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, case OVS_KEY_ATTR_ICMPV6: if (swkey->ipv6.tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION) || swkey->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT) || - frag_type == OVS_FRAG_TYPE_LATER) + swkey->ip.frag == OVS_FRAG_TYPE_LATER) goto invalid; goto ok; @@ -1128,7 +1125,7 @@ int flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, case OVS_KEY_ATTR_UDP: case OVS_KEY_ATTR_ICMP: case OVS_KEY_ATTR_ND: - if (frag_type == OVS_FRAG_TYPE_LATER) + if (swkey->ip.frag == OVS_FRAG_TYPE_LATER) goto invalid; goto ok; @@ -1143,7 +1140,6 @@ invalid: error = -EINVAL; ok: - WARN_ON_ONCE(!key_len && !error); *key_lenp = key_len; return error; } @@ -1164,43 +1160,35 @@ int flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, __be64 *tun_id, const struct nlattr *attr) { const struct nlattr *nla; - u16 prev_type; int rem; *in_port = USHRT_MAX; *tun_id = 0; *priority = 0; - prev_type = OVS_KEY_ATTR_UNSPEC; nla_for_each_nested(nla, attr, rem) { - int type = nla_type(nla); + int type = nla_type(nla); - if (type > OVS_KEY_ATTR_MAX || nla_len(nla) != ovs_key_lens[type]) - return -EINVAL; - - switch (TRANSITION(prev_type, type)) { - case TRANSITION(OVS_KEY_ATTR_UNSPEC, OVS_KEY_ATTR_PRIORITY): - *priority = nla_get_u32(nla); - break; + if (type <= OVS_KEY_ATTR_MAX && ovs_key_lens[type] != 0) { + if (nla_len(nla) != ovs_key_lens[type]) + return -EINVAL; - case TRANSITION(OVS_KEY_ATTR_UNSPEC, OVS_KEY_ATTR_TUN_ID): - case TRANSITION(OVS_KEY_ATTR_PRIORITY, OVS_KEY_ATTR_TUN_ID): - *tun_id = nla_get_be64(nla); - break; + switch (type) { + case OVS_KEY_ATTR_PRIORITY: + *priority = nla_get_u32(nla); + break; - case TRANSITION(OVS_KEY_ATTR_UNSPEC, OVS_KEY_ATTR_IN_PORT): - case TRANSITION(OVS_KEY_ATTR_PRIORITY, OVS_KEY_ATTR_IN_PORT): - case TRANSITION(OVS_KEY_ATTR_TUN_ID, OVS_KEY_ATTR_IN_PORT): - if (nla_get_u32(nla) >= DP_MAX_PORTS) - return -EINVAL; - *in_port = nla_get_u32(nla); - break; + case OVS_KEY_ATTR_TUN_ID: + *tun_id = nla_get_be64(nla); + break; - default: - return 0; + case OVS_KEY_ATTR_IN_PORT: + if (nla_get_u32(nla) >= DP_MAX_PORTS) + return -EINVAL; + *in_port = nla_get_u32(nla); + break; + } } - - prev_type = type; } if (rem) return -EINVAL; @@ -1212,11 +1200,6 @@ int flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb) struct ovs_key_ethernet *eth_key; struct nlattr *nla; - /* This is an imperfect sanity-check that FLOW_BUFSIZE doesn't need - * to be updated, but will at least raise awareness when new - * datapath key types are added. */ - BUILD_BUG_ON(__OVS_KEY_ATTR_MAX != 15); - if (swkey->phy.priority) NLA_PUT_U32(skb, OVS_KEY_ATTR_PRIORITY, swkey->phy.priority); @@ -1253,12 +1236,12 @@ int flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb) if (!nla) goto nla_put_failure; ipv4_key = nla_data(nla); - memset(ipv4_key, 0, sizeof(struct ovs_key_ipv4)); ipv4_key->ipv4_src = swkey->ipv4.addr.src; ipv4_key->ipv4_dst = swkey->ipv4.addr.dst; ipv4_key->ipv4_proto = swkey->ip.proto; - ipv4_key->ipv4_tos = swkey->ip.tos_frag & ~INET_ECN_MASK; - ipv4_key->ipv4_frag = swkey->ip.tos_frag & OVS_FRAG_TYPE_MASK; + ipv4_key->ipv4_tos = swkey->ip.tos; + ipv4_key->ipv4_ttl = swkey->ip.ttl; + ipv4_key->ipv4_frag = swkey->ip.frag; } else if (swkey->eth.type == htons(ETH_P_IPV6)) { struct ovs_key_ipv6 *ipv6_key; @@ -1266,14 +1249,15 @@ int flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb) if (!nla) goto nla_put_failure; ipv6_key = nla_data(nla); - memset(ipv6_key, 0, sizeof(struct ovs_key_ipv6)); memcpy(ipv6_key->ipv6_src, &swkey->ipv6.addr.src, sizeof(ipv6_key->ipv6_src)); memcpy(ipv6_key->ipv6_dst, &swkey->ipv6.addr.dst, sizeof(ipv6_key->ipv6_dst)); + ipv6_key->ipv6_label = swkey->ipv6.label; ipv6_key->ipv6_proto = swkey->ip.proto; - ipv6_key->ipv6_tos = swkey->ip.tos_frag & ~INET_ECN_MASK; - ipv6_key->ipv6_frag = swkey->ip.tos_frag & OVS_FRAG_TYPE_MASK; + ipv6_key->ipv6_tclass = swkey->ip.tos; + ipv6_key->ipv6_hlimit = swkey->ip.ttl; + ipv6_key->ipv6_frag = swkey->ip.frag; } else if (swkey->eth.type == htons(ETH_P_ARP)) { struct ovs_key_arp *arp_key; @@ -1291,7 +1275,7 @@ int flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb) if ((swkey->eth.type == htons(ETH_P_IP) || swkey->eth.type == htons(ETH_P_IPV6)) && - (swkey->ip.tos_frag & OVS_FRAG_TYPE_MASK) != OVS_FRAG_TYPE_LATER) { + swkey->ip.frag != OVS_FRAG_TYPE_LATER) { if (swkey->ip.proto == IPPROTO_TCP) { struct ovs_key_tcp *tcp_key;