/*
- * Copyright (c) 2007-2011 Nicira Networks.
+ * Copyright (c) 2007-2011 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
#include <linux/in.h>
#include <linux/rcupdate.h>
#include <linux/if_arp.h>
-#include <linux/if_ether.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/tcp.h>
#include "vlan.h"
static struct kmem_cache *flow_cache;
-static unsigned int hash_seed __read_mostly;
static int check_header(struct sk_buff *skb, int len)
{
{
u8 tcp_flags = 0;
- if (flow->key.eth.type == htons(ETH_P_IP) &&
- flow->key.ip.proto == IPPROTO_TCP) {
+ if ((flow->key.eth.type == htons(ETH_P_IP) ||
+ flow->key.eth.type == htons(ETH_P_IPV6)) &&
+ flow->key.ip.proto == IPPROTO_TCP &&
+ likely(skb->len >= skb_transport_offset(skb) + sizeof(struct tcphdr))) {
u8 *tcp = (u8 *)tcp_hdr(skb);
tcp_flags = *(tcp + TCP_FLAGS_OFFSET) & TCP_FLAG_MASK;
}
int actions_len = nla_len(actions);
struct sw_flow_actions *sfa;
- /* At least DP_MAX_PORTS actions are required to be able to flood a
- * packet to every port. Factor of 2 allows for setting VLAN tags,
- * etc. */
- if (actions_len > 2 * DP_MAX_PORTS * nla_total_size(4))
+ if (actions_len > MAX_ACTIONS_BUFSIZE)
return ERR_PTR(-EINVAL);
sfa = kmalloc(sizeof(*sfa) + actions_len, GFP_KERNEL);
static struct hlist_head *find_bucket(struct flow_table *table, u32 hash)
{
+ hash = jhash_1word(hash, table->hash_seed);
return flex_array_get(table->buckets,
(hash & (table->n_buckets - 1)));
}
}
table->n_buckets = new_size;
table->count = 0;
+ table->node_ver = 0;
+ table->keep_flows = false;
+ get_random_bytes(&table->hash_seed, sizeof(u32));
return table;
}
if (!table)
return;
+ if (table->keep_flows)
+ goto skip_flows;
+
for (i = 0; i < table->n_buckets; i++) {
struct sw_flow *flow;
struct hlist_head *head = flex_array_get(table->buckets, i);
struct hlist_node *node, *n;
+ int ver = table->node_ver;
- hlist_for_each_entry_safe(flow, node, n, head, hash_node) {
- hlist_del_init_rcu(&flow->hash_node);
+ hlist_for_each_entry_safe(flow, node, n, head, hash_node[ver]) {
+ hlist_del_rcu(&flow->hash_node[ver]);
flow_free(flow);
}
}
+skip_flows:
free_buckets(table->buckets);
kfree(table);
}
struct sw_flow *flow;
struct hlist_head *head;
struct hlist_node *n;
+ int ver;
int i;
+ ver = table->node_ver;
while (*bucket < table->n_buckets) {
i = 0;
head = flex_array_get(table->buckets, *bucket);
- hlist_for_each_entry_rcu(flow, n, head, hash_node) {
+ hlist_for_each_entry_rcu(flow, n, head, hash_node[ver]) {
if (i < *last) {
i++;
continue;
return NULL;
}
-struct flow_table *ovs_flow_tbl_expand(struct flow_table *table)
+static void __flow_tbl_insert(struct flow_table *table, struct sw_flow *flow)
{
- struct flow_table *new_table;
- int n_buckets = table->n_buckets * 2;
+ struct hlist_head *head;
+ head = find_bucket(table, flow->hash);
+ hlist_add_head_rcu(&flow->hash_node[table->node_ver], head);
+ table->count++;
+}
+
+static void flow_table_copy_flows(struct flow_table *old, struct flow_table *new)
+{
+ int old_ver;
int i;
- new_table = ovs_flow_tbl_alloc(n_buckets);
- if (!new_table)
- return ERR_PTR(-ENOMEM);
+ old_ver = old->node_ver;
+ new->node_ver = !old_ver;
- for (i = 0; i < table->n_buckets; i++) {
+ /* Insert in new table. */
+ for (i = 0; i < old->n_buckets; i++) {
struct sw_flow *flow;
struct hlist_head *head;
- struct hlist_node *n, *pos;
+ struct hlist_node *n;
- head = flex_array_get(table->buckets, i);
+ head = flex_array_get(old->buckets, i);
- hlist_for_each_entry_safe(flow, n, pos, head, hash_node) {
- hlist_del_init_rcu(&flow->hash_node);
- ovs_flow_tbl_insert(new_table, flow);
- }
+ hlist_for_each_entry(flow, n, head, hash_node[old_ver])
+ __flow_tbl_insert(new, flow);
}
+ old->keep_flows = true;
+}
+
+static struct flow_table *__flow_tbl_rehash(struct flow_table *table, int n_buckets)
+{
+ struct flow_table *new_table;
+
+ new_table = ovs_flow_tbl_alloc(n_buckets);
+ if (!new_table)
+ return ERR_PTR(-ENOMEM);
+
+ flow_table_copy_flows(table, new_table);
return new_table;
}
+struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table)
+{
+ return __flow_tbl_rehash(table, table->n_buckets);
+}
+
+struct flow_table *ovs_flow_tbl_expand(struct flow_table *table)
+{
+ return __flow_tbl_rehash(table, table->n_buckets * 2);
+}
+
/* RCU callback used by ovs_flow_deferred_free. */
static void rcu_free_flow_callback(struct rcu_head *rcu)
{
memset(key, 0, sizeof(*key));
key->phy.priority = skb->priority;
- key->phy.tun_id = OVS_CB(skb)->tun_id;
+ if (OVS_CB(skb)->tun_key)
+ memcpy(&key->phy.tun.tun_key, OVS_CB(skb)->tun_key, sizeof(key->phy.tun.tun_key));
key->phy.in_port = in_port;
skb_reset_mac_header(skb);
/* We only match on the lower 8 bits of the opcode. */
if (ntohs(arp->ar_op) <= 0xff)
key->ip.proto = ntohs(arp->ar_op);
-
- if (key->ip.proto == ARPOP_REQUEST
- || key->ip.proto == ARPOP_REPLY) {
- memcpy(&key->ipv4.addr.src, arp->ar_sip, sizeof(key->ipv4.addr.src));
- memcpy(&key->ipv4.addr.dst, arp->ar_tip, sizeof(key->ipv4.addr.dst));
- memcpy(key->ipv4.arp.sha, arp->ar_sha, ETH_ALEN);
- memcpy(key->ipv4.arp.tha, arp->ar_tha, ETH_ALEN);
- key_len = SW_FLOW_KEY_OFFSET(ipv4.arp);
- }
+ memcpy(&key->ipv4.addr.src, arp->ar_sip, sizeof(key->ipv4.addr.src));
+ memcpy(&key->ipv4.addr.dst, arp->ar_tip, sizeof(key->ipv4.addr.dst));
+ memcpy(key->ipv4.arp.sha, arp->ar_sha, ETH_ALEN);
+ memcpy(key->ipv4.arp.tha, arp->ar_tha, ETH_ALEN);
+ key_len = SW_FLOW_KEY_OFFSET(ipv4.arp);
}
} else if (key->eth.type == htons(ETH_P_IPV6)) {
int nh_len; /* IPv6 Header + Extensions */
return error;
}
-u32 ovs_flow_hash(const struct sw_flow_key *key, int key_len)
+static u32 ovs_flow_hash(const struct sw_flow_key *key, int key_start, int key_len)
{
- return jhash2((u32 *)key, DIV_ROUND_UP(key_len, sizeof(u32)), hash_seed);
+ return jhash2((u32 *)((u8 *)key + key_start),
+ DIV_ROUND_UP(key_len - key_start, sizeof(u32)), 0);
+}
+
+static int flow_key_start(struct sw_flow_key *key)
+{
+ if (key->phy.tun.tun_key.ipv4_dst)
+ return 0;
+ else
+ return offsetof(struct sw_flow_key, phy.priority);
}
struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *table,
struct sw_flow *flow;
struct hlist_node *n;
struct hlist_head *head;
+ u8 *_key;
+ int key_start;
u32 hash;
- hash = ovs_flow_hash(key, key_len);
+ key_start = flow_key_start(key);
+ hash = ovs_flow_hash(key, key_start, key_len);
+ _key = (u8 *) key + key_start;
head = find_bucket(table, hash);
- hlist_for_each_entry_rcu(flow, n, head, hash_node) {
+ hlist_for_each_entry_rcu(flow, n, head, hash_node[table->node_ver]) {
if (flow->hash == hash &&
- !memcmp(&flow->key, key, key_len)) {
+ !memcmp((u8 *)&flow->key + key_start, _key, key_len - key_start)) {
return flow;
}
}
return NULL;
}
-void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow)
+void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
+ struct sw_flow_key *key, int key_len)
{
- struct hlist_head *head;
-
- head = find_bucket(table, flow->hash);
- hlist_add_head_rcu(&flow->hash_node, head);
- table->count++;
+ flow->hash = ovs_flow_hash(key, flow_key_start(key), key_len);
+ memcpy(&flow->key, key, sizeof(flow->key));
+ __flow_tbl_insert(table, flow);
}
void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
{
- if (!hlist_unhashed(&flow->hash_node)) {
- hlist_del_init_rcu(&flow->hash_node);
- table->count--;
- BUG_ON(table->count < 0);
- }
+ hlist_del_rcu(&flow->hash_node[table->node_ver]);
+ table->count--;
+ BUG_ON(table->count < 0);
}
/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */
/* Not upstream. */
[OVS_KEY_ATTR_TUN_ID] = sizeof(__be64),
+ [OVS_KEY_ATTR_IPV4_TUNNEL] = sizeof(struct ovs_key_ipv4_tunnel),
};
static int ipv4_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len,
swkey->phy.in_port = in_port;
attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT);
} else {
- swkey->phy.in_port = USHRT_MAX;
+ swkey->phy.in_port = DP_MAX_PORTS;
}
- if (attrs & (1ULL << OVS_KEY_ATTR_TUN_ID)) {
- swkey->phy.tun_id = nla_get_be64(a[OVS_KEY_ATTR_TUN_ID]);
+ if (attrs & (1ULL << OVS_KEY_ATTR_TUN_ID) &&
+ attrs & (1ULL << OVS_KEY_ATTR_IPV4_TUNNEL)) {
+ struct ovs_key_ipv4_tunnel *tun_key;
+ __be64 tun_id;
+
+ tun_key = nla_data(a[OVS_KEY_ATTR_IPV4_TUNNEL]);
+
+ if (!tun_key->ipv4_dst)
+ return -EINVAL;
+ if (!(tun_key->tun_flags & OVS_FLOW_TNL_F_KEY))
+ return -EINVAL;
+
+ tun_id = nla_get_be64(a[OVS_KEY_ATTR_TUN_ID]);
+ if (tun_id != tun_key->tun_id)
+ return -EINVAL;
+
+ memcpy(&swkey->phy.tun.tun_key, tun_key, sizeof(swkey->phy.tun.tun_key));
+ attrs &= ~(1ULL << OVS_KEY_ATTR_TUN_ID);
+ attrs &= ~(1ULL << OVS_KEY_ATTR_IPV4_TUNNEL);
+ } else if (attrs & (1ULL << OVS_KEY_ATTR_TUN_ID)) {
+ swkey->phy.tun.tun_key.tun_id = nla_get_be64(a[OVS_KEY_ATTR_TUN_ID]);
+ swkey->phy.tun.tun_key.tun_flags |= OVS_FLOW_TNL_F_KEY;
+
attrs &= ~(1ULL << OVS_KEY_ATTR_TUN_ID);
+ } else if (attrs & (1ULL << OVS_KEY_ATTR_IPV4_TUNNEL)) {
+ struct ovs_key_ipv4_tunnel *tun_key;
+ tun_key = nla_data(a[OVS_KEY_ATTR_IPV4_TUNNEL]);
+
+ if (!tun_key->ipv4_dst)
+ return -EINVAL;
+
+ memcpy(&swkey->phy.tun.tun_key, tun_key, sizeof(swkey->phy.tun.tun_key));
+ attrs &= ~(1ULL << OVS_KEY_ATTR_IPV4_TUNNEL);
}
/* Data attributes. */
* get the metadata, that is, the parts of the flow key that cannot be
* extracted from the packet itself.
*/
-int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, __be64 *tun_id,
- const struct nlattr *attr)
+
+int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, int key_len, const struct nlattr *attr)
{
+ struct ovs_key_ipv4_tunnel *tun_key = &flow->key.phy.tun.tun_key;
const struct nlattr *nla;
int rem;
+ __be64 tun_id = 0;
- *in_port = USHRT_MAX;
- *tun_id = 0;
- *priority = 0;
+ flow->key.phy.in_port = DP_MAX_PORTS;
+ flow->key.phy.priority = 0;
+ memset(tun_key, 0, sizeof(flow->key.phy.tun.tun_key));
nla_for_each_nested(nla, attr, rem) {
int type = nla_type(nla);
switch (type) {
case OVS_KEY_ATTR_PRIORITY:
- *priority = nla_get_u32(nla);
+ flow->key.phy.priority = nla_get_u32(nla);
break;
case OVS_KEY_ATTR_TUN_ID:
- *tun_id = nla_get_be64(nla);
+ tun_id = nla_get_be64(nla);
+
+ if (tun_key->ipv4_dst) {
+ if (!(tun_key->tun_flags & OVS_FLOW_TNL_F_KEY))
+ return -EINVAL;
+ if (tun_key->tun_id != tun_id)
+ return -EINVAL;
+ break;
+ }
+ tun_key->tun_id = tun_id;
+ tun_key->tun_flags |= OVS_FLOW_TNL_F_KEY;
+
+ break;
+
+ case OVS_KEY_ATTR_IPV4_TUNNEL:
+ if (tun_key->tun_flags & OVS_FLOW_TNL_F_KEY) {
+ tun_id = tun_key->tun_id;
+
+ memcpy(tun_key, nla_data(nla), sizeof(*tun_key));
+ if (!(tun_key->tun_flags & OVS_FLOW_TNL_F_KEY))
+ return -EINVAL;
+
+ if (tun_key->tun_id != tun_id)
+ return -EINVAL;
+ } else
+ memcpy(tun_key, nla_data(nla), sizeof(*tun_key));
+
+ if (!tun_key->ipv4_dst)
+ return -EINVAL;
break;
case OVS_KEY_ATTR_IN_PORT:
if (nla_get_u32(nla) >= DP_MAX_PORTS)
return -EINVAL;
- *in_port = nla_get_u32(nla);
+ flow->key.phy.in_port = nla_get_u32(nla);
break;
}
}
}
if (rem)
return -EINVAL;
+
+ flow->hash = ovs_flow_hash(&flow->key,
+ flow_key_start(&flow->key), key_len);
+
return 0;
}
struct ovs_key_ethernet *eth_key;
struct nlattr *nla, *encap;
- if (swkey->phy.priority)
- NLA_PUT_U32(skb, OVS_KEY_ATTR_PRIORITY, swkey->phy.priority);
+ if (swkey->phy.priority &&
+ nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, swkey->phy.priority))
+ goto nla_put_failure;
- if (swkey->phy.tun_id != cpu_to_be64(0))
- NLA_PUT_BE64(skb, OVS_KEY_ATTR_TUN_ID, swkey->phy.tun_id);
+ if (swkey->phy.tun.tun_key.ipv4_dst) {
+ struct ovs_key_ipv4_tunnel *tun_key;
+ nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4_TUNNEL, sizeof(*tun_key));
+ if (!nla)
+ goto nla_put_failure;
+ tun_key = nla_data(nla);
+ memcpy(tun_key, &swkey->phy.tun.tun_key, sizeof(*tun_key));
+ }
+ if ((swkey->phy.tun.tun_key.tun_flags & OVS_FLOW_TNL_F_KEY) &&
+ nla_put_be64(skb, OVS_KEY_ATTR_TUN_ID, swkey->phy.tun.tun_key.tun_id))
+ goto nla_put_failure;
- if (swkey->phy.in_port != USHRT_MAX)
- NLA_PUT_U32(skb, OVS_KEY_ATTR_IN_PORT, swkey->phy.in_port);
+ if (swkey->phy.in_port != DP_MAX_PORTS &&
+ nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, swkey->phy.in_port))
+ goto nla_put_failure;
nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
if (!nla)
memcpy(eth_key->eth_dst, swkey->eth.dst, ETH_ALEN);
if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) {
- NLA_PUT_BE16(skb, OVS_KEY_ATTR_ETHERTYPE, htons(ETH_P_8021Q));
- NLA_PUT_BE16(skb, OVS_KEY_ATTR_VLAN, swkey->eth.tci);
+ if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, htons(ETH_P_8021Q)) ||
+ nla_put_be16(skb, OVS_KEY_ATTR_VLAN, swkey->eth.tci))
+ goto nla_put_failure;
encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
if (!swkey->eth.tci)
goto unencap;
if (swkey->eth.type == htons(ETH_P_802_2))
goto unencap;
- NLA_PUT_BE16(skb, OVS_KEY_ATTR_ETHERTYPE, swkey->eth.type);
+ if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, swkey->eth.type))
+ goto nla_put_failure;
if (swkey->eth.type == htons(ETH_P_IP)) {
struct ovs_key_ipv4 *ipv4_key;
if (flow_cache == NULL)
return -ENOMEM;
- get_random_bytes(&hash_seed, sizeof(hash_seed));
-
return 0;
}