Prepare Open vSwitch 1.1.2 release.
[sliver-openvswitch.git] / datapath / datapath.c
index 8931456..d901e6b 100644 (file)
 #include "flow.h"
 #include "loop_counter.h"
 #include "table.h"
+#include "vlan.h"
 #include "vport-internal_dev.h"
 
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) || \
+    LINUX_VERSION_CODE > KERNEL_VERSION(2,6,38)
+#error Kernels before 2.6.18 or after 2.6.38 are not supported by this version of Open vSwitch.
+#endif
+
 int (*dp_ioctl_hook)(struct net_device *dev, struct ifreq *rq, int cmd);
 EXPORT_SYMBOL(dp_ioctl_hook);
 
@@ -73,20 +79,31 @@ EXPORT_SYMBOL(dp_ioctl_hook);
  * each other.
  */
 
-/* Protected by genl_mutex. */
-static struct datapath __rcu *dps[256];
+/* Global list of datapaths to enable dumping them all out.
+ * Protected by genl_mutex.
+ */
+static LIST_HEAD(dps);
 
 static struct vport *new_vport(const struct vport_parms *);
+static int queue_control_packets(struct datapath *, struct sk_buff *,
+                                const struct dp_upcall_info *);
 
 /* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */
-struct datapath *get_dp(int dp_idx)
+struct datapath *get_dp(int dp_ifindex)
 {
-       if (dp_idx < 0 || dp_idx >= ARRAY_SIZE(dps))
-               return NULL;
+       struct datapath *dp = NULL;
+       struct net_device *dev;
+
+       rcu_read_lock();
+       dev = dev_get_by_index_rcu(&init_net, dp_ifindex);
+       if (dev) {
+               struct vport *vport = internal_dev_get_vport(dev);
+               if (vport)
+                       dp = vport->dp;
+       }
+       rcu_read_unlock();
 
-       return rcu_dereference_check(dps[dp_idx], rcu_read_lock_held() ||
-                                        lockdep_rtnl_is_held() ||
-                                        lockdep_genl_is_held());
+       return dp;
 }
 EXPORT_SYMBOL_GPL(get_dp);
 
@@ -347,7 +364,6 @@ static void copy_and_csum_skb(struct sk_buff *skb, void *to)
 
        get_skb_csum_pointers(skb, &csum_start, &csum_offset);
        csum_start -= skb_headroom(skb);
-       BUG_ON(csum_start >= skb_headlen(skb));
 
        skb_copy_bits(skb, 0, to, csum_start);
 
@@ -356,13 +372,98 @@ static void copy_and_csum_skb(struct sk_buff *skb, void *to)
        *(__sum16 *)(to + csum_start + csum_offset) = csum_fold(csum);
 }
 
-static struct genl_family dp_packet_genl_family;
+static struct genl_family dp_packet_genl_family = {
+       .id = GENL_ID_GENERATE,
+       .hdrsize = sizeof(struct odp_header),
+       .name = ODP_PACKET_FAMILY,
+       .version = 1,
+       .maxattr = ODP_PACKET_ATTR_MAX
+};
+
+/* Generic Netlink multicast groups for upcalls.
+ *
+ * We really want three unique multicast groups per datapath, but we can't even
+ * get one, because genl_register_mc_group() takes genl_lock, which is also
+ * held during Generic Netlink message processing, so trying to acquire
+ * multicast groups during ODP_DP_NEW processing deadlocks.  Instead, we
+ * preallocate a few groups and use them round-robin for datapaths.  Collision
+ * isn't fatal--multicast listeners should check that the family is the one
+ * that they want and discard others--but it wastes time and memory to receive
+ * unwanted messages.
+ */
 #define PACKET_N_MC_GROUPS 16
+static struct genl_multicast_group packet_mc_groups[PACKET_N_MC_GROUPS];
 
-static int packet_mc_group(struct datapath *dp, u8 cmd)
+static u32 packet_mc_group(struct datapath *dp, u8 cmd)
 {
+       u32 idx;
        BUILD_BUG_ON_NOT_POWER_OF_2(PACKET_N_MC_GROUPS);
-       return jhash_2words(dp->dp_idx, cmd, 0) & (PACKET_N_MC_GROUPS - 1);
+
+       idx = jhash_2words(dp->dp_ifindex, cmd, 0) & (PACKET_N_MC_GROUPS - 1);
+       return packet_mc_groups[idx].id;
+}
+
+static int packet_register_mc_groups(void)
+{
+       int i;
+
+       for (i = 0; i < PACKET_N_MC_GROUPS; i++) {
+               struct genl_multicast_group *group = &packet_mc_groups[i];
+               int error;
+
+               sprintf(group->name, "packet%d", i);
+               error = genl_register_mc_group(&dp_packet_genl_family, group);
+               if (error)
+                       return error;
+       }
+       return 0;
+}
+
+int dp_upcall(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info)
+{
+       struct dp_stats_percpu *stats;
+       int err;
+
+       WARN_ON_ONCE(skb_shared(skb));
+
+       forward_ip_summed(skb);
+
+       err = vswitch_skb_checksum_setup(skb);
+       if (err)
+               goto err_kfree_skb;
+
+       /* Break apart GSO packets into their component pieces.  Otherwise
+        * userspace may try to stuff a 64kB packet into a 1500-byte MTU. */
+       if (skb_is_gso(skb)) {
+               struct sk_buff *nskb = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM);
+               
+               kfree_skb(skb);
+               skb = nskb;
+               if (IS_ERR(skb)) {
+                       err = PTR_ERR(skb);
+                       goto err;
+               }
+       }
+
+       err = queue_control_packets(dp, skb, upcall_info);
+       if (err)
+               goto err;
+
+       return 0;
+
+err_kfree_skb:
+       kfree_skb(skb);
+err:
+       local_bh_disable();
+       stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
+
+       write_seqcount_begin(&stats->seqlock);
+       stats->n_lost++;
+       write_seqcount_end(&stats->seqlock);
+
+       local_bh_enable();
+
+       return err;
 }
 
 /* Send each packet in the 'skb' list to userspace for 'dp' as directed by
@@ -391,8 +492,14 @@ static int queue_control_packets(struct datapath *dp, struct sk_buff *skb,
                nskb = skb->next;
                skb->next = NULL;
 
+               err = vlan_deaccel_tag(skb);
+               if (unlikely(err))
+                       goto err_kfree_skbs;
+
+               if (nla_attr_size(skb->len) > USHRT_MAX)
+                       goto err_kfree_skbs;
+
                len = sizeof(struct odp_header);
-               len += nla_total_size(4); /* ODP_PACKET_ATTR_TYPE. */
                len += nla_total_size(skb->len);
                len += nla_total_size(FLOW_BUFSIZE);
                if (upcall_info->userdata)
@@ -409,7 +516,7 @@ static int queue_control_packets(struct datapath *dp, struct sk_buff *skb,
                }
 
                upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family, 0, upcall_info->cmd);
-               upcall->dp_idx = dp->dp_idx;
+               upcall->dp_ifindex = dp->dp_ifindex;
 
                nla = nla_nest_start(user_skb, ODP_PACKET_ATTR_KEY);
                flow_to_nlattrs(upcall_info->key, user_skb);
@@ -452,94 +559,14 @@ err_kfree_skbs:
        return err;
 }
 
-/* Generic Netlink multicast groups for upcalls.
- *
- * We really want three unique multicast groups per datapath, but we can't even
- * get one, because genl_register_mc_group() takes genl_lock, which is also
- * held during Generic Netlink message processing, so trying to acquire
- * multicast groups during ODP_DP_NEW processing deadlocks.  Instead, we
- * preallocate a few groups and use them round-robin for datapaths.  Collision
- * isn't fatal--multicast listeners should check that the family is the one
- * that they want and discard others--but it wastes time and memory to receive
- * unwanted messages.
- */
-static struct genl_multicast_group packet_mc_groups[PACKET_N_MC_GROUPS];
-
-static struct genl_family dp_packet_genl_family = {
-       .id = GENL_ID_GENERATE,
-       .hdrsize = sizeof(struct odp_header),
-       .name = ODP_PACKET_FAMILY,
-       .version = 1,
-       .maxattr = ODP_PACKET_ATTR_MAX
-};
-
-static int packet_register_mc_groups(void)
-{
-       int i;
-
-       for (i = 0; i < PACKET_N_MC_GROUPS; i++) {
-               struct genl_multicast_group *group = &packet_mc_groups[i];
-               int error;
-
-               sprintf(group->name, "packet%d", i);
-               error = genl_register_mc_group(&dp_packet_genl_family, group);
-               if (error)
-                       return error;
-       }
-       return 0;
-}
-
-int dp_upcall(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info)
-{
-       struct dp_stats_percpu *stats;
-       int err;
-
-       WARN_ON_ONCE(skb_shared(skb));
-
-       forward_ip_summed(skb);
-
-       err = vswitch_skb_checksum_setup(skb);
-       if (err)
-               goto err_kfree_skb;
-
-       /* Break apart GSO packets into their component pieces.  Otherwise
-        * userspace may try to stuff a 64kB packet into a 1500-byte MTU. */
-       if (skb_is_gso(skb)) {
-               struct sk_buff *nskb = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM);
-               
-               kfree_skb(skb);
-               skb = nskb;
-               if (IS_ERR(skb)) {
-                       err = PTR_ERR(skb);
-                       goto err;
-               }
-       }
-
-       return queue_control_packets(dp, skb, upcall_info);
-
-err_kfree_skb:
-       kfree_skb(skb);
-err:
-       local_bh_disable();
-       stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
-
-       write_seqcount_begin(&stats->seqlock);
-       stats->n_lost++;
-       write_seqcount_end(&stats->seqlock);
-
-       local_bh_enable();
-
-       return err;
-}
-
 /* Called with genl_mutex. */
-static int flush_flows(int dp_idx)
+static int flush_flows(int dp_ifindex)
 {
        struct tbl *old_table;
        struct tbl *new_table;
        struct datapath *dp;
 
-       dp = get_dp(dp_idx);
+       dp = get_dp(dp_ifindex);
        if (!dp)
                return -ENODEV;
 
@@ -561,58 +588,58 @@ static int validate_actions(const struct nlattr *attr)
        int rem;
 
        nla_for_each_nested(a, attr, rem) {
-               static const u32 action_lens[ODPAT_MAX + 1] = {
-                       [ODPAT_OUTPUT] = 4,
-                       [ODPAT_CONTROLLER] = 8,
-                       [ODPAT_SET_DL_TCI] = 2,
-                       [ODPAT_STRIP_VLAN] = 0,
-                       [ODPAT_SET_DL_SRC] = ETH_ALEN,
-                       [ODPAT_SET_DL_DST] = ETH_ALEN,
-                       [ODPAT_SET_NW_SRC] = 4,
-                       [ODPAT_SET_NW_DST] = 4,
-                       [ODPAT_SET_NW_TOS] = 1,
-                       [ODPAT_SET_TP_SRC] = 2,
-                       [ODPAT_SET_TP_DST] = 2,
-                       [ODPAT_SET_TUNNEL] = 8,
-                       [ODPAT_SET_PRIORITY] = 4,
-                       [ODPAT_POP_PRIORITY] = 0,
-                       [ODPAT_DROP_SPOOFED_ARP] = 0,
+               static const u32 action_lens[ODP_ACTION_ATTR_MAX + 1] = {
+                       [ODP_ACTION_ATTR_OUTPUT] = 4,
+                       [ODP_ACTION_ATTR_CONTROLLER] = 8,
+                       [ODP_ACTION_ATTR_SET_DL_TCI] = 2,
+                       [ODP_ACTION_ATTR_STRIP_VLAN] = 0,
+                       [ODP_ACTION_ATTR_SET_DL_SRC] = ETH_ALEN,
+                       [ODP_ACTION_ATTR_SET_DL_DST] = ETH_ALEN,
+                       [ODP_ACTION_ATTR_SET_NW_SRC] = 4,
+                       [ODP_ACTION_ATTR_SET_NW_DST] = 4,
+                       [ODP_ACTION_ATTR_SET_NW_TOS] = 1,
+                       [ODP_ACTION_ATTR_SET_TP_SRC] = 2,
+                       [ODP_ACTION_ATTR_SET_TP_DST] = 2,
+                       [ODP_ACTION_ATTR_SET_TUNNEL] = 8,
+                       [ODP_ACTION_ATTR_SET_PRIORITY] = 4,
+                       [ODP_ACTION_ATTR_POP_PRIORITY] = 0,
+                       [ODP_ACTION_ATTR_DROP_SPOOFED_ARP] = 0,
                };
                int type = nla_type(a);
 
-               if (type > ODPAT_MAX || nla_len(a) != action_lens[type])
+               if (type > ODP_ACTION_ATTR_MAX || nla_len(a) != action_lens[type])
                        return -EINVAL;
 
                switch (type) {
-               case ODPAT_UNSPEC:
+               case ODP_ACTION_ATTR_UNSPEC:
                        return -EINVAL;
 
-               case ODPAT_CONTROLLER:
-               case ODPAT_STRIP_VLAN:
-               case ODPAT_SET_DL_SRC:
-               case ODPAT_SET_DL_DST:
-               case ODPAT_SET_NW_SRC:
-               case ODPAT_SET_NW_DST:
-               case ODPAT_SET_TP_SRC:
-               case ODPAT_SET_TP_DST:
-               case ODPAT_SET_TUNNEL:
-               case ODPAT_SET_PRIORITY:
-               case ODPAT_POP_PRIORITY:
-               case ODPAT_DROP_SPOOFED_ARP:
+               case ODP_ACTION_ATTR_CONTROLLER:
+               case ODP_ACTION_ATTR_STRIP_VLAN:
+               case ODP_ACTION_ATTR_SET_DL_SRC:
+               case ODP_ACTION_ATTR_SET_DL_DST:
+               case ODP_ACTION_ATTR_SET_NW_SRC:
+               case ODP_ACTION_ATTR_SET_NW_DST:
+               case ODP_ACTION_ATTR_SET_TP_SRC:
+               case ODP_ACTION_ATTR_SET_TP_DST:
+               case ODP_ACTION_ATTR_SET_TUNNEL:
+               case ODP_ACTION_ATTR_SET_PRIORITY:
+               case ODP_ACTION_ATTR_POP_PRIORITY:
+               case ODP_ACTION_ATTR_DROP_SPOOFED_ARP:
                        /* No validation needed. */
                        break;
 
-               case ODPAT_OUTPUT:
+               case ODP_ACTION_ATTR_OUTPUT:
                        if (nla_get_u32(a) >= DP_MAX_PORTS)
                                return -EINVAL;
                        break;
 
-               case ODPAT_SET_DL_TCI:
+               case ODP_ACTION_ATTR_SET_DL_TCI:
                        if (nla_get_be16(a) & htons(VLAN_CFI_MASK))
                                return -EINVAL;
                        break;
 
-               case ODPAT_SET_NW_TOS:
+               case ODP_ACTION_ATTR_SET_NW_TOS:
                        if (nla_get_u8(a) & INET_ECN_MASK)
                                return -EINVAL;
                        break;
@@ -660,23 +687,26 @@ static int odp_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
        struct datapath *dp;
        struct ethhdr *eth;
        bool is_frag;
+       int len;
        int err;
 
        err = -EINVAL;
        if (!a[ODP_PACKET_ATTR_PACKET] || !a[ODP_PACKET_ATTR_ACTIONS] ||
            nla_len(a[ODP_PACKET_ATTR_PACKET]) < ETH_HLEN)
-               goto exit;
+               goto err;
 
        err = validate_actions(a[ODP_PACKET_ATTR_ACTIONS]);
        if (err)
-               goto exit;
+               goto err;
 
-       packet = skb_clone(skb, GFP_KERNEL);
+       len = nla_len(a[ODP_PACKET_ATTR_PACKET]);
+       packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
        err = -ENOMEM;
        if (!packet)
-               goto exit;
-       packet->data = nla_data(a[ODP_PACKET_ATTR_PACKET]);
-       packet->len = nla_len(a[ODP_PACKET_ATTR_PACKET]);
+               goto err;
+       skb_reserve(packet, NET_IP_ALIGN);
+
+       memcpy(__skb_put(packet, len), nla_data(a[ODP_PACKET_ATTR_PACKET]), len);
 
        skb_reset_mac_header(packet);
        eth = eth_hdr(packet);
@@ -689,20 +719,29 @@ static int odp_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
        else
                packet->protocol = htons(ETH_P_802_2);
 
+       /* Initialize OVS_CB (it came from Netlink so might not be zeroed). */
+       memset(OVS_CB(packet), 0, sizeof(struct ovs_skb_cb));
+
        err = flow_extract(packet, -1, &key, &is_frag);
        if (err)
-               goto exit;
+               goto err_kfree_skb;
 
        rcu_read_lock();
-       dp = get_dp(odp_header->dp_idx);
+       dp = get_dp(odp_header->dp_ifindex);
        err = -ENODEV;
-       if (dp)
-               err = execute_actions(dp, packet, &key,
-                                     nla_data(a[ODP_PACKET_ATTR_ACTIONS]),
-                                     nla_len(a[ODP_PACKET_ATTR_ACTIONS]));
+       if (!dp)
+               goto err_unlock;
+       err = execute_actions(dp, packet, &key,
+                             nla_data(a[ODP_PACKET_ATTR_ACTIONS]),
+                             nla_len(a[ODP_PACKET_ATTR_ACTIONS]));
        rcu_read_unlock();
+       return err;
 
-exit:
+err_unlock:
+       rcu_read_unlock();
+err_kfree_skb:
+       kfree_skb(packet);
+err:
        return err;
 }
 
@@ -762,6 +801,8 @@ int dp_min_mtu(const struct datapath *dp)
                        continue;
 
                dev_mtu = vport_get_mtu(p);
+               if (!dev_mtu)
+                       continue;
                if (!mtu || dev_mtu < mtu)
                        mtu = dev_mtu;
        }
@@ -816,7 +857,6 @@ static int odp_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
        struct nlattr *nla;
        unsigned long used;
        u8 tcp_flags;
-       int nla_len;
        int err;
 
        sf_acts = rcu_dereference_protected(flow->sf_acts,
@@ -826,7 +866,7 @@ static int odp_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
        if (!odp_header)
                return -EMSGSIZE;
 
-       odp_header->dp_idx = dp->dp_idx;
+       odp_header->dp_ifindex = dp->dp_ifindex;
 
        nla = nla_nest_start(skb, ODP_FLOW_ATTR_KEY);
        if (!nla)
@@ -844,7 +884,7 @@ static int odp_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
        spin_unlock_bh(&flow->lock);
 
        if (used)
-               NLA_PUT_MSECS(skb, ODP_FLOW_ATTR_USED, used);
+               NLA_PUT_U64(skb, ODP_FLOW_ATTR_USED, flow_used_time(used));
 
        if (stats.n_packets)
                NLA_PUT(skb, ODP_FLOW_ATTR_STATS, sizeof(struct odp_flow_stats), &stats);
@@ -852,23 +892,20 @@ static int odp_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
        if (tcp_flags)
                NLA_PUT_U8(skb, ODP_FLOW_ATTR_TCP_FLAGS, tcp_flags);
 
-       /* If ODP_FLOW_ATTR_ACTIONS doesn't fit, and this is the first flow to
-        * be dumped into 'skb', then expand the skb.  This is unusual for
-        * Netlink but individual action lists can be longer than a page and
-        * thus entirely undumpable if we didn't do this. */
-       nla_len = nla_total_size(sf_acts->actions_len);
-       if (nla_len > skb_tailroom(skb) && !skb_orig_len) {
-               int hdr_off = (unsigned char *)odp_header - skb->data;
-
-               err = pskb_expand_head(skb, 0, nla_len - skb_tailroom(skb), GFP_KERNEL);
-               if (err)
-                       goto error;
-
-               odp_header = (struct odp_header *)(skb->data + hdr_off);
-       }
-       nla = nla_nest_start(skb, ODP_FLOW_ATTR_ACTIONS);
-       memcpy(__skb_put(skb, sf_acts->actions_len), sf_acts->actions, sf_acts->actions_len);
-       nla_nest_end(skb, nla);
+       /* If ODP_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
+        * this is the first flow to be dumped into 'skb'.  This is unusual for
+        * Netlink but individual action lists can be longer than
+        * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
+        * The userspace caller can always fetch the actions separately if it
+        * really wants them.  (Most userspace callers in fact don't care.)
+        *
+        * This can only fail for dump operations because the skb is always
+        * properly sized for single flows.
+        */
+       err = nla_put(skb, ODP_FLOW_ATTR_ACTIONS, sf_acts->actions_len,
+                     sf_acts->actions);
+       if (err < 0 && skb_orig_len)
+               goto error;
 
        return genlmsg_end(skb, odp_header);
 
@@ -941,7 +978,7 @@ static int odp_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
                goto error;
        }
 
-       dp = get_dp(odp_header->dp_idx);
+       dp = get_dp(odp_header->dp_ifindex);
        error = -ENODEV;
        if (!dp)
                goto error;
@@ -1065,7 +1102,7 @@ static int odp_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
        if (err)
                return err;
 
-       dp = get_dp(odp_header->dp_idx);
+       dp = get_dp(odp_header->dp_ifindex);
        if (!dp)
                return -ENODEV;
 
@@ -1095,12 +1132,12 @@ static int odp_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
        int err;
 
        if (!a[ODP_FLOW_ATTR_KEY])
-               return flush_flows(odp_header->dp_idx);
+               return flush_flows(odp_header->dp_ifindex);
        err = flow_from_nlattrs(&key, a[ODP_FLOW_ATTR_KEY]);
        if (err)
                return err;
 
-       dp = get_dp(odp_header->dp_idx);
+       dp = get_dp(odp_header->dp_ifindex);
        if (!dp)
                return -ENODEV;
 
@@ -1136,7 +1173,7 @@ static int odp_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
        struct odp_header *odp_header = genlmsg_data(nlmsg_data(cb->nlh));
        struct datapath *dp;
 
-       dp = get_dp(odp_header->dp_idx);
+       dp = get_dp(odp_header->dp_ifindex);
        if (!dp)
                return -ENODEV;
 
@@ -1219,7 +1256,7 @@ static int odp_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
        if (!odp_header)
                goto error;
 
-       odp_header->dp_idx = dp->dp_idx;
+       odp_header->dp_ifindex = dp->dp_ifindex;
 
        rcu_read_lock();
        err = nla_put_string(skb, ODP_DP_ATTR_NAME, dp_name(dp));
@@ -1281,30 +1318,25 @@ static int odp_dp_cmd_validate(struct nlattr *a[ODP_DP_ATTR_MAX + 1])
                        return -EINVAL;
        }
 
-       return VERIFY_NUL_STRING(a[ODP_DP_ATTR_NAME], IFNAMSIZ - 1);
+       return CHECK_NUL_STRING(a[ODP_DP_ATTR_NAME], IFNAMSIZ - 1);
 }
 
 /* Called with genl_mutex and optionally with RTNL lock also. */
 static struct datapath *lookup_datapath(struct odp_header *odp_header, struct nlattr *a[ODP_DP_ATTR_MAX + 1])
 {
-       if (!a[ODP_DP_ATTR_NAME]) {
-               struct datapath *dp = get_dp(odp_header->dp_idx);
-               if (!dp)
-                       return ERR_PTR(-ENODEV);
-               return dp;
-       } else {
+       struct datapath *dp;
+
+       if (!a[ODP_DP_ATTR_NAME])
+               dp = get_dp(odp_header->dp_ifindex);
+       else {
                struct vport *vport;
-               int dp_idx;
 
                rcu_read_lock();
                vport = vport_locate(nla_data(a[ODP_DP_ATTR_NAME]));
-               dp_idx = vport && vport->port_no == ODPP_LOCAL ? vport->dp->dp_idx : -1;
+               dp = vport && vport->port_no == ODPP_LOCAL ? vport->dp : NULL;
                rcu_read_unlock();
-
-               if (dp_idx < 0)
-                       return ERR_PTR(-ENODEV);
-               return vport->dp;
        }
+       return dp ? dp : ERR_PTR(-ENODEV);
 }
 
 /* Called with genl_mutex. */
@@ -1319,12 +1351,10 @@ static void change_datapath(struct datapath *dp, struct nlattr *a[ODP_DP_ATTR_MA
 static int odp_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
 {
        struct nlattr **a = info->attrs;
-       struct odp_header *odp_header = info->userhdr;
        struct vport_parms parms;
        struct sk_buff *reply;
        struct datapath *dp;
        struct vport *vport;
-       int dp_idx;
        int err;
 
        err = -EINVAL;
@@ -1340,28 +1370,11 @@ static int odp_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
        if (!try_module_get(THIS_MODULE))
                goto err_unlock_rtnl;
 
-       dp_idx = odp_header->dp_idx;
-       if (dp_idx < 0) {
-               err = -EFBIG;
-               for (dp_idx = 0; dp_idx < ARRAY_SIZE(dps); dp_idx++) {
-                       if (get_dp(dp_idx))
-                               continue;
-                       err = 0;
-                       break;
-               }
-       } else if (dp_idx < ARRAY_SIZE(dps))
-               err = get_dp(dp_idx) ? -EBUSY : 0;
-       else
-               err = -EINVAL;
-       if (err)
-               goto err_put_module;
-
        err = -ENOMEM;
        dp = kzalloc(sizeof(*dp), GFP_KERNEL);
        if (dp == NULL)
                goto err_put_module;
        INIT_LIST_HEAD(&dp->port_list);
-       dp->dp_idx = dp_idx;
 
        /* Initialize kobject for bridge.  This will be added as
         * /sys/class/net/<devname>/brif later, if sysfs is enabled. */
@@ -1388,6 +1401,7 @@ static int odp_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
 
                goto err_destroy_table;
        }
+       dp->dp_ifindex = vport_get_ifindex(vport);
 
        dp->drop_frags = 0;
        dp->stats_percpu = alloc_percpu(struct dp_stats_percpu);
@@ -1403,7 +1417,7 @@ static int odp_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
        if (IS_ERR(reply))
                goto err_destroy_local_port;
 
-       rcu_assign_pointer(dps[dp_idx], dp);
+       list_add_tail(&dp->list_node, &dps);
        dp_sysfs_add_dp(dp);
 
        rtnl_unlock();
@@ -1453,15 +1467,23 @@ static int odp_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
                        dp_detach_port(vport);
 
        dp_sysfs_del_dp(dp);
-       rcu_assign_pointer(dps[dp->dp_idx], NULL);
+       list_del(&dp->list_node);
        dp_detach_port(get_vport_protected(dp, ODPP_LOCAL));
 
+       /* rtnl_unlock() will wait until all the references to devices that
+        * are pending unregistration have been dropped.  We do it here to
+        * ensure that any internal devices (which contain DP pointers) are
+        * fully destroyed before freeing the datapath.
+        */
+       rtnl_unlock();
+
        call_rcu(&dp->rcu, destroy_dp_rcu);
        module_put(THIS_MODULE);
 
        genl_notify(reply, genl_info_net(info), info->snd_pid,
                    dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL);
-       err = 0;
+
+       return 0;
 
 exit_unlock:
        rtnl_unlock();
@@ -1521,19 +1543,22 @@ static int odp_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
 
 static int odp_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
 {
-       u32 dp_idx;
+       struct datapath *dp;
+       int skip = cb->args[0];
+       int i = 0;
 
-       for (dp_idx = cb->args[0]; dp_idx < ARRAY_SIZE(dps); dp_idx++) {
-               struct datapath *dp = get_dp(dp_idx);
-               if (!dp)
+       list_for_each_entry (dp, &dps, list_node) {
+               if (i < skip)
                        continue;
                if (odp_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).pid,
                                         cb->nlh->nlmsg_seq, NLM_F_MULTI,
                                         ODP_DP_CMD_NEW) < 0)
                        break;
+               i++;
        }
 
-       cb->args[0] = dp_idx;
+       cb->args[0] = i;
+
        return skb->len;
 }
 
@@ -1595,6 +1620,7 @@ static int odp_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
        struct odp_header *odp_header;
        struct nlattr *nla;
        int ifindex, iflink;
+       int mtu;
        int err;
 
        odp_header = genlmsg_put(skb, pid, seq, &dp_vport_genl_family,
@@ -1602,7 +1628,7 @@ static int odp_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
        if (!odp_header)
                return -EMSGSIZE;
 
-       odp_header->dp_idx = vport->dp->dp_idx;
+       odp_header->dp_ifindex = vport->dp->dp_ifindex;
 
        NLA_PUT_U32(skb, ODP_VPORT_ATTR_PORT_NO, vport->port_no);
        NLA_PUT_U32(skb, ODP_VPORT_ATTR_TYPE, vport_get_type(vport));
@@ -1616,7 +1642,9 @@ static int odp_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
 
        NLA_PUT(skb, ODP_VPORT_ATTR_ADDRESS, ETH_ALEN, vport_get_addr(vport));
 
-       NLA_PUT_U32(skb, ODP_VPORT_ATTR_MTU, vport_get_mtu(vport));
+       mtu = vport_get_mtu(vport);
+       if (mtu)
+               NLA_PUT_U32(skb, ODP_VPORT_ATTR_MTU, mtu);
 
        err = vport_get_options(vport, skb);
        if (err == -EMSGSIZE)
@@ -1660,7 +1688,7 @@ static struct sk_buff *odp_vport_cmd_build_info(struct vport *vport, u32 pid,
 
 static int odp_vport_cmd_validate(struct nlattr *a[ODP_VPORT_ATTR_MAX + 1])
 {
-       return VERIFY_NUL_STRING(a[ODP_VPORT_ATTR_NAME], IFNAMSIZ - 1);
+       return CHECK_NUL_STRING(a[ODP_VPORT_ATTR_NAME], IFNAMSIZ - 1);
 }
 
 /* Called with RTNL lock or RCU read lock. */
@@ -1681,7 +1709,7 @@ static struct vport *lookup_vport(struct odp_header *odp_header,
                if (port_no >= DP_MAX_PORTS)
                        return ERR_PTR(-EFBIG);
 
-               dp = get_dp(odp_header->dp_idx);
+               dp = get_dp(odp_header->dp_ifindex);
                if (!dp)
                        return ERR_PTR(-ENODEV);
 
@@ -1726,7 +1754,7 @@ static int odp_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
                goto exit;
 
        rtnl_lock();
-       dp = get_dp(odp_header->dp_idx);
+       dp = get_dp(odp_header->dp_ifindex);
        err = -ENODEV;
        if (!dp)
                goto exit_unlock;
@@ -1908,7 +1936,7 @@ static int odp_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
        u32 port_no;
        int retval;
 
-       dp = get_dp(odp_header->dp_idx);
+       dp = get_dp(odp_header->dp_ifindex);
        if (!dp)
                return -ENODEV;