datapath: Use vlan acceleration for vlan operations.
[sliver-openvswitch.git] / datapath / datapath.c
index dee1b0f..c48dc9d 100644 (file)
@@ -79,6 +79,8 @@ EXPORT_SYMBOL(dp_ioctl_hook);
 static LIST_HEAD(dps);
 
 static struct vport *new_vport(const struct vport_parms *);
+static int queue_control_packets(struct datapath *, struct sk_buff *,
+                                const struct dp_upcall_info *);
 
 /* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */
 struct datapath *get_dp(int dp_ifindex)
@@ -365,13 +367,94 @@ static void copy_and_csum_skb(struct sk_buff *skb, void *to)
        *(__sum16 *)(to + csum_start + csum_offset) = csum_fold(csum);
 }
 
-static struct genl_family dp_packet_genl_family;
+static struct genl_family dp_packet_genl_family = {
+       .id = GENL_ID_GENERATE,
+       .hdrsize = sizeof(struct odp_header),
+       .name = ODP_PACKET_FAMILY,
+       .version = 1,
+       .maxattr = ODP_PACKET_ATTR_MAX
+};
+
+/* Generic Netlink multicast groups for upcalls.
+ *
+ * We really want three unique multicast groups per datapath, but we can't even
+ * get one, because genl_register_mc_group() takes genl_lock, which is also
+ * held during Generic Netlink message processing, so trying to acquire
+ * multicast groups during ODP_DP_NEW processing deadlocks.  Instead, we
+ * preallocate a few groups and use them round-robin for datapaths.  Collision
+ * isn't fatal--multicast listeners should check that the family is the one
+ * that they want and discard others--but it wastes time and memory to receive
+ * unwanted messages.
+ */
 #define PACKET_N_MC_GROUPS 16
+static struct genl_multicast_group packet_mc_groups[PACKET_N_MC_GROUPS];
 
-static int packet_mc_group(struct datapath *dp, u8 cmd)
+static u32 packet_mc_group(struct datapath *dp, u8 cmd)
 {
+       u32 idx;
        BUILD_BUG_ON_NOT_POWER_OF_2(PACKET_N_MC_GROUPS);
-       return jhash_2words(dp->dp_ifindex, cmd, 0) & (PACKET_N_MC_GROUPS - 1);
+
+       idx = jhash_2words(dp->dp_ifindex, cmd, 0) & (PACKET_N_MC_GROUPS - 1);
+       return packet_mc_groups[idx].id;
+}
+
+static int packet_register_mc_groups(void)
+{
+       int i;
+
+       for (i = 0; i < PACKET_N_MC_GROUPS; i++) {
+               struct genl_multicast_group *group = &packet_mc_groups[i];
+               int error;
+
+               sprintf(group->name, "packet%d", i);
+               error = genl_register_mc_group(&dp_packet_genl_family, group);
+               if (error)
+                       return error;
+       }
+       return 0;
+}
+
+int dp_upcall(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info)
+{
+       struct dp_stats_percpu *stats;
+       int err;
+
+       WARN_ON_ONCE(skb_shared(skb));
+
+       forward_ip_summed(skb);
+
+       err = vswitch_skb_checksum_setup(skb);
+       if (err)
+               goto err_kfree_skb;
+
+       /* Break apart GSO packets into their component pieces.  Otherwise
+        * userspace may try to stuff a 64kB packet into a 1500-byte MTU. */
+       if (skb_is_gso(skb)) {
+               struct sk_buff *nskb = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM);
+               
+               kfree_skb(skb);
+               skb = nskb;
+               if (IS_ERR(skb)) {
+                       err = PTR_ERR(skb);
+                       goto err;
+               }
+       }
+
+       return queue_control_packets(dp, skb, upcall_info);
+
+err_kfree_skb:
+       kfree_skb(skb);
+err:
+       local_bh_disable();
+       stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
+
+       write_seqcount_begin(&stats->seqlock);
+       stats->n_lost++;
+       write_seqcount_end(&stats->seqlock);
+
+       local_bh_enable();
+
+       return err;
 }
 
 /* Send each packet in the 'skb' list to userspace for 'dp' as directed by
@@ -400,8 +483,15 @@ static int queue_control_packets(struct datapath *dp, struct sk_buff *skb,
                nskb = skb->next;
                skb->next = NULL;
 
+               if (vlan_tx_tag_present(skb)) {
+                       skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
+                       if (unlikely(!skb)) {
+                               err = -ENOMEM;
+                               goto err_kfree_skbs;
+                       }
+               }
+
                len = sizeof(struct odp_header);
-               len += nla_total_size(4); /* ODP_PACKET_ATTR_TYPE. */
                len += nla_total_size(skb->len);
                len += nla_total_size(FLOW_BUFSIZE);
                if (upcall_info->userdata)
@@ -461,86 +551,6 @@ err_kfree_skbs:
        return err;
 }
 
-/* Generic Netlink multicast groups for upcalls.
- *
- * We really want three unique multicast groups per datapath, but we can't even
- * get one, because genl_register_mc_group() takes genl_lock, which is also
- * held during Generic Netlink message processing, so trying to acquire
- * multicast groups during ODP_DP_NEW processing deadlocks.  Instead, we
- * preallocate a few groups and use them round-robin for datapaths.  Collision
- * isn't fatal--multicast listeners should check that the family is the one
- * that they want and discard others--but it wastes time and memory to receive
- * unwanted messages.
- */
-static struct genl_multicast_group packet_mc_groups[PACKET_N_MC_GROUPS];
-
-static struct genl_family dp_packet_genl_family = {
-       .id = GENL_ID_GENERATE,
-       .hdrsize = sizeof(struct odp_header),
-       .name = ODP_PACKET_FAMILY,
-       .version = 1,
-       .maxattr = ODP_PACKET_ATTR_MAX
-};
-
-static int packet_register_mc_groups(void)
-{
-       int i;
-
-       for (i = 0; i < PACKET_N_MC_GROUPS; i++) {
-               struct genl_multicast_group *group = &packet_mc_groups[i];
-               int error;
-
-               sprintf(group->name, "packet%d", i);
-               error = genl_register_mc_group(&dp_packet_genl_family, group);
-               if (error)
-                       return error;
-       }
-       return 0;
-}
-
-int dp_upcall(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info)
-{
-       struct dp_stats_percpu *stats;
-       int err;
-
-       WARN_ON_ONCE(skb_shared(skb));
-
-       forward_ip_summed(skb);
-
-       err = vswitch_skb_checksum_setup(skb);
-       if (err)
-               goto err_kfree_skb;
-
-       /* Break apart GSO packets into their component pieces.  Otherwise
-        * userspace may try to stuff a 64kB packet into a 1500-byte MTU. */
-       if (skb_is_gso(skb)) {
-               struct sk_buff *nskb = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM);
-               
-               kfree_skb(skb);
-               skb = nskb;
-               if (IS_ERR(skb)) {
-                       err = PTR_ERR(skb);
-                       goto err;
-               }
-       }
-
-       return queue_control_packets(dp, skb, upcall_info);
-
-err_kfree_skb:
-       kfree_skb(skb);
-err:
-       local_bh_disable();
-       stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
-
-       write_seqcount_begin(&stats->seqlock);
-       stats->n_lost++;
-       write_seqcount_end(&stats->seqlock);
-
-       local_bh_enable();
-
-       return err;
-}
-
 /* Called with genl_mutex. */
 static int flush_flows(int dp_ifindex)
 {
@@ -570,58 +580,58 @@ static int validate_actions(const struct nlattr *attr)
        int rem;
 
        nla_for_each_nested(a, attr, rem) {
-               static const u32 action_lens[ODPAT_MAX + 1] = {
-                       [ODPAT_OUTPUT] = 4,
-                       [ODPAT_CONTROLLER] = 8,
-                       [ODPAT_SET_DL_TCI] = 2,
-                       [ODPAT_STRIP_VLAN] = 0,
-                       [ODPAT_SET_DL_SRC] = ETH_ALEN,
-                       [ODPAT_SET_DL_DST] = ETH_ALEN,
-                       [ODPAT_SET_NW_SRC] = 4,
-                       [ODPAT_SET_NW_DST] = 4,
-                       [ODPAT_SET_NW_TOS] = 1,
-                       [ODPAT_SET_TP_SRC] = 2,
-                       [ODPAT_SET_TP_DST] = 2,
-                       [ODPAT_SET_TUNNEL] = 8,
-                       [ODPAT_SET_PRIORITY] = 4,
-                       [ODPAT_POP_PRIORITY] = 0,
-                       [ODPAT_DROP_SPOOFED_ARP] = 0,
+               static const u32 action_lens[ODP_ACTION_ATTR_MAX + 1] = {
+                       [ODP_ACTION_ATTR_OUTPUT] = 4,
+                       [ODP_ACTION_ATTR_CONTROLLER] = 8,
+                       [ODP_ACTION_ATTR_SET_DL_TCI] = 2,
+                       [ODP_ACTION_ATTR_STRIP_VLAN] = 0,
+                       [ODP_ACTION_ATTR_SET_DL_SRC] = ETH_ALEN,
+                       [ODP_ACTION_ATTR_SET_DL_DST] = ETH_ALEN,
+                       [ODP_ACTION_ATTR_SET_NW_SRC] = 4,
+                       [ODP_ACTION_ATTR_SET_NW_DST] = 4,
+                       [ODP_ACTION_ATTR_SET_NW_TOS] = 1,
+                       [ODP_ACTION_ATTR_SET_TP_SRC] = 2,
+                       [ODP_ACTION_ATTR_SET_TP_DST] = 2,
+                       [ODP_ACTION_ATTR_SET_TUNNEL] = 8,
+                       [ODP_ACTION_ATTR_SET_PRIORITY] = 4,
+                       [ODP_ACTION_ATTR_POP_PRIORITY] = 0,
+                       [ODP_ACTION_ATTR_DROP_SPOOFED_ARP] = 0,
                };
                int type = nla_type(a);
 
-               if (type > ODPAT_MAX || nla_len(a) != action_lens[type])
+               if (type > ODP_ACTION_ATTR_MAX || nla_len(a) != action_lens[type])
                        return -EINVAL;
 
                switch (type) {
-               case ODPAT_UNSPEC:
+               case ODP_ACTION_ATTR_UNSPEC:
                        return -EINVAL;
 
-               case ODPAT_CONTROLLER:
-               case ODPAT_STRIP_VLAN:
-               case ODPAT_SET_DL_SRC:
-               case ODPAT_SET_DL_DST:
-               case ODPAT_SET_NW_SRC:
-               case ODPAT_SET_NW_DST:
-               case ODPAT_SET_TP_SRC:
-               case ODPAT_SET_TP_DST:
-               case ODPAT_SET_TUNNEL:
-               case ODPAT_SET_PRIORITY:
-               case ODPAT_POP_PRIORITY:
-               case ODPAT_DROP_SPOOFED_ARP:
+               case ODP_ACTION_ATTR_CONTROLLER:
+               case ODP_ACTION_ATTR_STRIP_VLAN:
+               case ODP_ACTION_ATTR_SET_DL_SRC:
+               case ODP_ACTION_ATTR_SET_DL_DST:
+               case ODP_ACTION_ATTR_SET_NW_SRC:
+               case ODP_ACTION_ATTR_SET_NW_DST:
+               case ODP_ACTION_ATTR_SET_TP_SRC:
+               case ODP_ACTION_ATTR_SET_TP_DST:
+               case ODP_ACTION_ATTR_SET_TUNNEL:
+               case ODP_ACTION_ATTR_SET_PRIORITY:
+               case ODP_ACTION_ATTR_POP_PRIORITY:
+               case ODP_ACTION_ATTR_DROP_SPOOFED_ARP:
                        /* No validation needed. */
                        break;
 
-               case ODPAT_OUTPUT:
+               case ODP_ACTION_ATTR_OUTPUT:
                        if (nla_get_u32(a) >= DP_MAX_PORTS)
                                return -EINVAL;
                        break;
 
-               case ODPAT_SET_DL_TCI:
+               case ODP_ACTION_ATTR_SET_DL_TCI:
                        if (nla_get_be16(a) & htons(VLAN_CFI_MASK))
                                return -EINVAL;
                        break;
 
-               case ODPAT_SET_NW_TOS:
+               case ODP_ACTION_ATTR_SET_NW_TOS:
                        if (nla_get_u8(a) & INET_ECN_MASK)
                                return -EINVAL;
                        break;
@@ -771,6 +781,8 @@ int dp_min_mtu(const struct datapath *dp)
                        continue;
 
                dev_mtu = vport_get_mtu(p);
+               if (!dev_mtu)
+                       continue;
                if (!mtu || dev_mtu < mtu)
                        mtu = dev_mtu;
        }
@@ -825,7 +837,6 @@ static int odp_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
        struct nlattr *nla;
        unsigned long used;
        u8 tcp_flags;
-       int nla_len;
        int err;
 
        sf_acts = rcu_dereference_protected(flow->sf_acts,
@@ -853,7 +864,7 @@ static int odp_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
        spin_unlock_bh(&flow->lock);
 
        if (used)
-               NLA_PUT_MSECS(skb, ODP_FLOW_ATTR_USED, used);
+               NLA_PUT_U64(skb, ODP_FLOW_ATTR_USED, flow_used_time(used));
 
        if (stats.n_packets)
                NLA_PUT(skb, ODP_FLOW_ATTR_STATS, sizeof(struct odp_flow_stats), &stats);
@@ -861,23 +872,20 @@ static int odp_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
        if (tcp_flags)
                NLA_PUT_U8(skb, ODP_FLOW_ATTR_TCP_FLAGS, tcp_flags);
 
-       /* If ODP_FLOW_ATTR_ACTIONS doesn't fit, and this is the first flow to
-        * be dumped into 'skb', then expand the skb.  This is unusual for
-        * Netlink but individual action lists can be longer than a page and
-        * thus entirely undumpable if we didn't do this. */
-       nla_len = nla_total_size(sf_acts->actions_len);
-       if (nla_len > skb_tailroom(skb) && !skb_orig_len) {
-               int hdr_off = (unsigned char *)odp_header - skb->data;
-
-               err = pskb_expand_head(skb, 0, nla_len - skb_tailroom(skb), GFP_KERNEL);
-               if (err)
-                       goto error;
-
-               odp_header = (struct odp_header *)(skb->data + hdr_off);
-       }
-       nla = nla_nest_start(skb, ODP_FLOW_ATTR_ACTIONS);
-       memcpy(__skb_put(skb, sf_acts->actions_len), sf_acts->actions, sf_acts->actions_len);
-       nla_nest_end(skb, nla);
+       /* If ODP_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
+        * this is the first flow to be dumped into 'skb'.  This is unusual for
+        * Netlink but individual action lists can be longer than
+        * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
+        * The userspace caller can always fetch the actions separately if it
+        * really wants them.  (Most userspace callers in fact don't care.)
+        *
+        * This can only fail for dump operations because the skb is always
+        * properly sized for single flows.
+        */
+       err = nla_put(skb, ODP_FLOW_ATTR_ACTIONS, sf_acts->actions_len,
+                     sf_acts->actions);
+       if (err < 0 && skb_orig_len)
+               goto error;
 
        return genlmsg_end(skb, odp_header);
 
@@ -1584,6 +1592,7 @@ static int odp_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
        struct odp_header *odp_header;
        struct nlattr *nla;
        int ifindex, iflink;
+       int mtu;
        int err;
 
        odp_header = genlmsg_put(skb, pid, seq, &dp_vport_genl_family,
@@ -1605,7 +1614,9 @@ static int odp_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
 
        NLA_PUT(skb, ODP_VPORT_ATTR_ADDRESS, ETH_ALEN, vport_get_addr(vport));
 
-       NLA_PUT_U32(skb, ODP_VPORT_ATTR_MTU, vport_get_mtu(vport));
+       mtu = vport_get_mtu(vport);
+       if (mtu)
+               NLA_PUT_U32(skb, ODP_VPORT_ATTR_MTU, mtu);
 
        err = vport_get_options(vport, skb);
        if (err == -EMSGSIZE)