#include "datapath.h"
#include "actions.h"
#include "flow.h"
-#include "loop_counter.h"
#include "table.h"
+#include "vlan.h"
#include "vport-internal_dev.h"
int (*dp_ioctl_hook)(struct net_device *dev, struct ifreq *rq, int cmd);
static LIST_HEAD(dps);
static struct vport *new_vport(const struct vport_parms *);
+static int queue_control_packets(struct datapath *, struct sk_buff *,
+ const struct dp_upcall_info *);
/* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */
struct datapath *get_dp(int dp_ifindex)
struct datapath *dp = p->dp;
struct dp_stats_percpu *stats;
int stats_counter_off;
- struct sw_flow_actions *acts;
- struct loop_counter *loop;
int error;
OVS_CB(skb)->vport = p;
if (!OVS_CB(skb)->flow) {
struct sw_flow_key key;
struct tbl_node *flow_node;
+ int key_len;
bool is_frag;
/* Extract flow from 'skb' into 'key'. */
- error = flow_extract(skb, p->port_no, &key, &is_frag);
+ error = flow_extract(skb, p->port_no, &key, &key_len, &is_frag);
if (unlikely(error)) {
kfree_skb(skb);
return;
}
/* Look up flow. */
- flow_node = tbl_lookup(rcu_dereference(dp->table), &key,
- flow_hash(&key), flow_cmp);
+ flow_node = tbl_lookup(rcu_dereference(dp->table), &key, key_len,
+ flow_hash(&key, key_len), flow_cmp);
if (unlikely(!flow_node)) {
struct dp_upcall_info upcall;
stats_counter_off = offsetof(struct dp_stats_percpu, n_hit);
flow_used(OVS_CB(skb)->flow, skb);
-
- acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts);
-
- /* Check whether we've looped too much. */
- loop = loop_get_counter();
- if (unlikely(++loop->count > MAX_LOOPS))
- loop->looping = true;
- if (unlikely(loop->looping)) {
- loop_suppress(dp, acts);
- kfree_skb(skb);
- goto out_loop;
- }
-
- /* Execute actions. */
- execute_actions(dp, skb, &OVS_CB(skb)->flow->key, acts->actions,
- acts->actions_len);
-
- /* Check whether sub-actions looped too much. */
- if (unlikely(loop->looping))
- loop_suppress(dp, acts);
-
-out_loop:
- /* Decrement loop counter. */
- if (!--loop->count)
- loop->looping = false;
- loop_put_counter();
+ execute_actions(dp, skb);
out:
/* Update datapath statistics. */
get_skb_csum_pointers(skb, &csum_start, &csum_offset);
csum_start -= skb_headroom(skb);
- BUG_ON(csum_start >= skb_headlen(skb));
skb_copy_bits(skb, 0, to, csum_start);
*(__sum16 *)(to + csum_start + csum_offset) = csum_fold(csum);
}
-static struct genl_family dp_packet_genl_family;
+static struct genl_family dp_packet_genl_family = {
+ .id = GENL_ID_GENERATE,
+ .hdrsize = sizeof(struct odp_header),
+ .name = ODP_PACKET_FAMILY,
+ .version = 1,
+ .maxattr = ODP_PACKET_ATTR_MAX
+};
+
+/* Generic Netlink multicast groups for upcalls.
+ *
+ * We really want three unique multicast groups per datapath, but we can't even
+ * get one, because genl_register_mc_group() takes genl_lock, which is also
+ * held during Generic Netlink message processing, so trying to acquire
+ * multicast groups during ODP_DP_NEW processing deadlocks. Instead, we
+ * preallocate a few groups and use them round-robin for datapaths. Collision
+ * isn't fatal--multicast listeners should check that the family is the one
+ * that they want and discard others--but it wastes time and memory to receive
+ * unwanted messages.
+ */
#define PACKET_N_MC_GROUPS 16
+static struct genl_multicast_group packet_mc_groups[PACKET_N_MC_GROUPS];
-static int packet_mc_group(struct datapath *dp, u8 cmd)
+static u32 packet_mc_group(struct datapath *dp, u8 cmd)
{
+ u32 idx;
BUILD_BUG_ON_NOT_POWER_OF_2(PACKET_N_MC_GROUPS);
- return jhash_2words(dp->dp_ifindex, cmd, 0) & (PACKET_N_MC_GROUPS - 1);
+
+ idx = jhash_2words(dp->dp_ifindex, cmd, 0) & (PACKET_N_MC_GROUPS - 1);
+ return packet_mc_groups[idx].id;
+}
+
+static int packet_register_mc_groups(void)
+{
+ int i;
+
+ for (i = 0; i < PACKET_N_MC_GROUPS; i++) {
+ struct genl_multicast_group *group = &packet_mc_groups[i];
+ int error;
+
+ sprintf(group->name, "packet%d", i);
+ error = genl_register_mc_group(&dp_packet_genl_family, group);
+ if (error)
+ return error;
+ }
+ return 0;
+}
+
+int dp_upcall(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info)
+{
+ struct dp_stats_percpu *stats;
+ int err;
+
+ WARN_ON_ONCE(skb_shared(skb));
+
+ forward_ip_summed(skb);
+
+ err = vswitch_skb_checksum_setup(skb);
+ if (err)
+ goto err_kfree_skb;
+
+ /* Break apart GSO packets into their component pieces. Otherwise
+ * userspace may try to stuff a 64kB packet into a 1500-byte MTU. */
+ if (skb_is_gso(skb)) {
+ struct sk_buff *nskb = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM);
+
+ kfree_skb(skb);
+ skb = nskb;
+ if (IS_ERR(skb)) {
+ err = PTR_ERR(skb);
+ goto err;
+ }
+ }
+
+ err = queue_control_packets(dp, skb, upcall_info);
+ if (err)
+ goto err;
+
+ return 0;
+
+err_kfree_skb:
+ kfree_skb(skb);
+err:
+ local_bh_disable();
+ stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
+
+ write_seqcount_begin(&stats->seqlock);
+ stats->n_lost++;
+ write_seqcount_end(&stats->seqlock);
+
+ local_bh_enable();
+
+ return err;
}
/* Send each packet in the 'skb' list to userspace for 'dp' as directed by
{
u32 group = packet_mc_group(dp, upcall_info->cmd);
struct sk_buff *nskb;
- int port_no;
int err;
- if (OVS_CB(skb)->vport)
- port_no = OVS_CB(skb)->vport->port_no;
- else
- port_no = ODPP_LOCAL;
-
do {
struct odp_header *upcall;
struct sk_buff *user_skb; /* to be queued to userspace */
nskb = skb->next;
skb->next = NULL;
+ err = vlan_deaccel_tag(skb);
+ if (unlikely(err))
+ goto err_kfree_skbs;
+
+ if (nla_attr_size(skb->len) > USHRT_MAX)
+ goto err_kfree_skbs;
+
len = sizeof(struct odp_header);
- len += nla_total_size(4); /* ODP_PACKET_ATTR_TYPE. */
len += nla_total_size(skb->len);
len += nla_total_size(FLOW_BUFSIZE);
if (upcall_info->userdata)
return err;
}
-/* Generic Netlink multicast groups for upcalls.
- *
- * We really want three unique multicast groups per datapath, but we can't even
- * get one, because genl_register_mc_group() takes genl_lock, which is also
- * held during Generic Netlink message processing, so trying to acquire
- * multicast groups during ODP_DP_NEW processing deadlocks. Instead, we
- * preallocate a few groups and use them round-robin for datapaths. Collision
- * isn't fatal--multicast listeners should check that the family is the one
- * that they want and discard others--but it wastes time and memory to receive
- * unwanted messages.
- */
-static struct genl_multicast_group packet_mc_groups[PACKET_N_MC_GROUPS];
-
-static struct genl_family dp_packet_genl_family = {
- .id = GENL_ID_GENERATE,
- .hdrsize = sizeof(struct odp_header),
- .name = ODP_PACKET_FAMILY,
- .version = 1,
- .maxattr = ODP_PACKET_ATTR_MAX
-};
-
-static int packet_register_mc_groups(void)
-{
- int i;
-
- for (i = 0; i < PACKET_N_MC_GROUPS; i++) {
- struct genl_multicast_group *group = &packet_mc_groups[i];
- int error;
-
- sprintf(group->name, "packet%d", i);
- error = genl_register_mc_group(&dp_packet_genl_family, group);
- if (error)
- return error;
- }
- return 0;
-}
-
-int dp_upcall(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info)
-{
- struct dp_stats_percpu *stats;
- int err;
-
- WARN_ON_ONCE(skb_shared(skb));
-
- forward_ip_summed(skb);
-
- err = vswitch_skb_checksum_setup(skb);
- if (err)
- goto err_kfree_skb;
-
- /* Break apart GSO packets into their component pieces. Otherwise
- * userspace may try to stuff a 64kB packet into a 1500-byte MTU. */
- if (skb_is_gso(skb)) {
- struct sk_buff *nskb = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM);
-
- kfree_skb(skb);
- skb = nskb;
- if (IS_ERR(skb)) {
- err = PTR_ERR(skb);
- goto err;
- }
- }
-
- return queue_control_packets(dp, skb, upcall_info);
-
-err_kfree_skb:
- kfree_skb(skb);
-err:
- local_bh_disable();
- stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
-
- write_seqcount_begin(&stats->seqlock);
- stats->n_lost++;
- write_seqcount_end(&stats->seqlock);
-
- local_bh_enable();
-
- return err;
-}
-
/* Called with genl_mutex. */
static int flush_flows(int dp_ifindex)
{
[ODP_ACTION_ATTR_SET_TUNNEL] = 8,
[ODP_ACTION_ATTR_SET_PRIORITY] = 4,
[ODP_ACTION_ATTR_POP_PRIORITY] = 0,
- [ODP_ACTION_ATTR_DROP_SPOOFED_ARP] = 0,
};
int type = nla_type(a);
case ODP_ACTION_ATTR_SET_TUNNEL:
case ODP_ACTION_ATTR_SET_PRIORITY:
case ODP_ACTION_ATTR_POP_PRIORITY:
- case ODP_ACTION_ATTR_DROP_SPOOFED_ARP:
/* No validation needed. */
break;
{
struct odp_header *odp_header = info->userhdr;
struct nlattr **a = info->attrs;
+ struct sw_flow_actions *acts;
struct sk_buff *packet;
- struct sw_flow_key key;
+ struct sw_flow *flow;
struct datapath *dp;
struct ethhdr *eth;
bool is_frag;
+ int len;
int err;
+ int key_len;
err = -EINVAL;
- if (!a[ODP_PACKET_ATTR_PACKET] || !a[ODP_PACKET_ATTR_ACTIONS] ||
+ if (!a[ODP_PACKET_ATTR_PACKET] || !a[ODP_PACKET_ATTR_KEY] ||
+ !a[ODP_PACKET_ATTR_ACTIONS] ||
nla_len(a[ODP_PACKET_ATTR_PACKET]) < ETH_HLEN)
- goto exit;
+ goto err;
err = validate_actions(a[ODP_PACKET_ATTR_ACTIONS]);
if (err)
- goto exit;
+ goto err;
- packet = skb_clone(skb, GFP_KERNEL);
+ len = nla_len(a[ODP_PACKET_ATTR_PACKET]);
+ packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
err = -ENOMEM;
if (!packet)
- goto exit;
- packet->data = nla_data(a[ODP_PACKET_ATTR_PACKET]);
- packet->len = nla_len(a[ODP_PACKET_ATTR_PACKET]);
+ goto err;
+ skb_reserve(packet, NET_IP_ALIGN);
+
+ memcpy(__skb_put(packet, len), nla_data(a[ODP_PACKET_ATTR_PACKET]), len);
skb_reset_mac_header(packet);
eth = eth_hdr(packet);
else
packet->protocol = htons(ETH_P_802_2);
- err = flow_extract(packet, -1, &key, &is_frag);
+ /* Build an sw_flow for sending this packet. */
+ flow = flow_alloc();
+ err = PTR_ERR(flow);
+ if (IS_ERR(flow))
+ goto err_kfree_skb;
+
+ err = flow_extract(packet, -1, &flow->key, &key_len, &is_frag);
if (err)
- goto exit;
+ goto err_flow_put;
+ flow->tbl_node.hash = flow_hash(&flow->key, key_len);
+
+ err = flow_metadata_from_nlattrs(&flow->key.eth.in_port,
+ &flow->key.eth.tun_id,
+ a[ODP_PACKET_ATTR_KEY]);
+ if (err)
+ goto err_flow_put;
+
+ acts = flow_actions_alloc(a[ODP_PACKET_ATTR_ACTIONS]);
+ err = PTR_ERR(acts);
+ if (IS_ERR(acts))
+ goto err_flow_put;
+ rcu_assign_pointer(flow->sf_acts, acts);
+
+ OVS_CB(packet)->flow = flow;
rcu_read_lock();
dp = get_dp(odp_header->dp_ifindex);
err = -ENODEV;
- if (dp)
- err = execute_actions(dp, packet, &key,
- nla_data(a[ODP_PACKET_ATTR_ACTIONS]),
- nla_len(a[ODP_PACKET_ATTR_ACTIONS]));
+ if (!dp)
+ goto err_unlock;
+ err = execute_actions(dp, packet);
rcu_read_unlock();
-exit:
+ flow_put(flow);
+ return err;
+
+err_unlock:
+ rcu_read_unlock();
+err_flow_put:
+ flow_put(flow);
+err_kfree_skb:
+ kfree_skb(packet);
+err:
return err;
}
static const struct nla_policy packet_policy[ODP_PACKET_ATTR_MAX + 1] = {
[ODP_PACKET_ATTR_PACKET] = { .type = NLA_UNSPEC },
+ [ODP_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
[ODP_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
};
continue;
dev_mtu = vport_get_mtu(p);
+ if (!dev_mtu)
+ continue;
if (!mtu || dev_mtu < mtu)
mtu = dev_mtu;
}
struct nlattr *nla;
unsigned long used;
u8 tcp_flags;
- int nla_len;
int err;
sf_acts = rcu_dereference_protected(flow->sf_acts,
spin_unlock_bh(&flow->lock);
if (used)
- NLA_PUT_MSECS(skb, ODP_FLOW_ATTR_USED, used);
+ NLA_PUT_U64(skb, ODP_FLOW_ATTR_USED, flow_used_time(used));
if (stats.n_packets)
NLA_PUT(skb, ODP_FLOW_ATTR_STATS, sizeof(struct odp_flow_stats), &stats);
if (tcp_flags)
NLA_PUT_U8(skb, ODP_FLOW_ATTR_TCP_FLAGS, tcp_flags);
- /* If ODP_FLOW_ATTR_ACTIONS doesn't fit, and this is the first flow to
- * be dumped into 'skb', then expand the skb. This is unusual for
- * Netlink but individual action lists can be longer than a page and
- * thus entirely undumpable if we didn't do this. */
- nla_len = nla_total_size(sf_acts->actions_len);
- if (nla_len > skb_tailroom(skb) && !skb_orig_len) {
- int hdr_off = (unsigned char *)odp_header - skb->data;
-
- err = pskb_expand_head(skb, 0, nla_len - skb_tailroom(skb), GFP_KERNEL);
- if (err)
- goto error;
-
- odp_header = (struct odp_header *)(skb->data + hdr_off);
- }
- nla = nla_nest_start(skb, ODP_FLOW_ATTR_ACTIONS);
- memcpy(__skb_put(skb, sf_acts->actions_len), sf_acts->actions, sf_acts->actions_len);
- nla_nest_end(skb, nla);
+ /* If ODP_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
+ * this is the first flow to be dumped into 'skb'. This is unusual for
+ * Netlink but individual action lists can be longer than
+ * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
+ * The userspace caller can always fetch the actions separately if it
+ * really wants them. (Most userspace callers in fact don't care.)
+ *
+ * This can only fail for dump operations because the skb is always
+ * properly sized for single flows.
+ */
+ err = nla_put(skb, ODP_FLOW_ATTR_ACTIONS, sf_acts->actions_len,
+ sf_acts->actions);
+ if (err < 0 && skb_orig_len)
+ goto error;
return genlmsg_end(skb, odp_header);
struct tbl *table;
u32 hash;
int error;
+ int key_len;
/* Extract key. */
error = -EINVAL;
if (!a[ODP_FLOW_ATTR_KEY])
goto error;
- error = flow_from_nlattrs(&key, a[ODP_FLOW_ATTR_KEY]);
+ error = flow_from_nlattrs(&key, &key_len, a[ODP_FLOW_ATTR_KEY]);
if (error)
goto error;
if (!dp)
goto error;
- hash = flow_hash(&key);
+ hash = flow_hash(&key, key_len);
table = get_table_protected(dp);
- flow_node = tbl_lookup(table, &key, hash, flow_cmp);
+ flow_node = tbl_lookup(table, &key, key_len, hash, flow_cmp);
if (!flow_node) {
struct sw_flow_actions *acts;
struct datapath *dp;
struct tbl *table;
int err;
+ int key_len;
if (!a[ODP_FLOW_ATTR_KEY])
return -EINVAL;
- err = flow_from_nlattrs(&key, a[ODP_FLOW_ATTR_KEY]);
+ err = flow_from_nlattrs(&key, &key_len, a[ODP_FLOW_ATTR_KEY]);
if (err)
return err;
return -ENODEV;
table = get_table_protected(dp);
- flow_node = tbl_lookup(table, &key, flow_hash(&key), flow_cmp);
+ flow_node = tbl_lookup(table, &key, key_len, flow_hash(&key, key_len),
+ flow_cmp);
if (!flow_node)
return -ENOENT;
struct datapath *dp;
struct tbl *table;
int err;
+ int key_len;
if (!a[ODP_FLOW_ATTR_KEY])
return flush_flows(odp_header->dp_ifindex);
- err = flow_from_nlattrs(&key, a[ODP_FLOW_ATTR_KEY]);
+ err = flow_from_nlattrs(&key, &key_len, a[ODP_FLOW_ATTR_KEY]);
if (err)
return err;
return -ENODEV;
table = get_table_protected(dp);
- flow_node = tbl_lookup(table, &key, flow_hash(&key), flow_cmp);
+ flow_node = tbl_lookup(table, &key, key_len, flow_hash(&key, key_len),
+ flow_cmp);
if (!flow_node)
return -ENOENT;
flow = flow_cast(flow_node);
return -EINVAL;
}
- return VERIFY_NUL_STRING(a[ODP_DP_ATTR_NAME], IFNAMSIZ - 1);
+ return CHECK_NUL_STRING(a[ODP_DP_ATTR_NAME], IFNAMSIZ - 1);
}
/* Called with genl_mutex and optionally with RTNL lock also. */
list_del(&dp->list_node);
dp_detach_port(get_vport_protected(dp, ODPP_LOCAL));
+ /* rtnl_unlock() will wait until all the references to devices that
+ * are pending unregistration have been dropped. We do it here to
+ * ensure that any internal devices (which contain DP pointers) are
+ * fully destroyed before freeing the datapath.
+ */
+ rtnl_unlock();
+
call_rcu(&dp->rcu, destroy_dp_rcu);
module_put(THIS_MODULE);
genl_notify(reply, genl_info_net(info), info->snd_pid,
dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL);
- err = 0;
+
+ return 0;
exit_unlock:
rtnl_unlock();
struct odp_header *odp_header;
struct nlattr *nla;
int ifindex, iflink;
+ int mtu;
int err;
odp_header = genlmsg_put(skb, pid, seq, &dp_vport_genl_family,
NLA_PUT(skb, ODP_VPORT_ATTR_ADDRESS, ETH_ALEN, vport_get_addr(vport));
- NLA_PUT_U32(skb, ODP_VPORT_ATTR_MTU, vport_get_mtu(vport));
+ mtu = vport_get_mtu(vport);
+ if (mtu)
+ NLA_PUT_U32(skb, ODP_VPORT_ATTR_MTU, mtu);
err = vport_get_options(vport, skb);
if (err == -EMSGSIZE)
static int odp_vport_cmd_validate(struct nlattr *a[ODP_VPORT_ATTR_MAX + 1])
{
- return VERIFY_NUL_STRING(a[ODP_VPORT_ATTR_NAME], IFNAMSIZ - 1);
+ return CHECK_NUL_STRING(a[ODP_VPORT_ATTR_NAME], IFNAMSIZ - 1);
}
/* Called with RTNL lock or RCU read lock. */
if (IS_ERR(reply))
goto exit_unlock;
- err = genlmsg_reply(reply, info);
+ rcu_read_unlock();
+
+ return genlmsg_reply(reply, info);
exit_unlock:
rcu_read_unlock();