datapath: Use vlan acceleration for vlan operations.

[sliver-openvswitch.git] / datapath / datapath.c
diff --git a/datapath/datapath.c b/datapath/datapath.c

index 10eb5b7..c48dc9d 100644 (file)
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -79,6 +79,8 @@ EXPORT_SYMBOL(dp_ioctl_hook);
  static LIST_HEAD(dps);
  
  static struct vport *new_vport(const struct vport_parms *);
+static int queue_control_packets(struct datapath *, struct sk_buff *,
+                                const struct dp_upcall_info *);
  
  /* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */
  struct datapath *get_dp(int dp_ifindex)
@@ -365,13 +367,94 @@ static void copy_and_csum_skb(struct sk_buff *skb, void *to)
         *(__sum16 *)(to + csum_start + csum_offset) = csum_fold(csum);
  }
  
-static struct genl_family dp_packet_genl_family;
+static struct genl_family dp_packet_genl_family = {
+       .id = GENL_ID_GENERATE,
+       .hdrsize = sizeof(struct odp_header),
+       .name = ODP_PACKET_FAMILY,
+       .version = 1,
+       .maxattr = ODP_PACKET_ATTR_MAX
+};
+
+/* Generic Netlink multicast groups for upcalls.
+ *
+ * We really want three unique multicast groups per datapath, but we can't even
+ * get one, because genl_register_mc_group() takes genl_lock, which is also
+ * held during Generic Netlink message processing, so trying to acquire
+ * multicast groups during ODP_DP_NEW processing deadlocks.  Instead, we
+ * preallocate a few groups and use them round-robin for datapaths.  Collision
+ * isn't fatal--multicast listeners should check that the family is the one
+ * that they want and discard others--but it wastes time and memory to receive
+ * unwanted messages.
+ */
  #define PACKET_N_MC_GROUPS 16
+static struct genl_multicast_group packet_mc_groups[PACKET_N_MC_GROUPS];
  
-static int packet_mc_group(struct datapath *dp, u8 cmd)
+static u32 packet_mc_group(struct datapath *dp, u8 cmd)
  {
+       u32 idx;
         BUILD_BUG_ON_NOT_POWER_OF_2(PACKET_N_MC_GROUPS);
-       return jhash_2words(dp->dp_ifindex, cmd, 0) & (PACKET_N_MC_GROUPS - 1);
+
+       idx = jhash_2words(dp->dp_ifindex, cmd, 0) & (PACKET_N_MC_GROUPS - 1);
+       return packet_mc_groups[idx].id;
+}
+
+static int packet_register_mc_groups(void)
+{
+       int i;
+
+       for (i = 0; i < PACKET_N_MC_GROUPS; i++) {
+               struct genl_multicast_group *group = &packet_mc_groups[i];
+               int error;
+
+               sprintf(group->name, "packet%d", i);
+               error = genl_register_mc_group(&dp_packet_genl_family, group);
+               if (error)
+                       return error;
+       }
+       return 0;
+}
+
+int dp_upcall(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info)
+{
+       struct dp_stats_percpu *stats;
+       int err;
+
+       WARN_ON_ONCE(skb_shared(skb));
+
+       forward_ip_summed(skb);
+
+       err = vswitch_skb_checksum_setup(skb);
+       if (err)
+               goto err_kfree_skb;
+
+       /* Break apart GSO packets into their component pieces.  Otherwise
+        * userspace may try to stuff a 64kB packet into a 1500-byte MTU. */
+       if (skb_is_gso(skb)) {
+               struct sk_buff *nskb = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM);
+               
+               kfree_skb(skb);
+               skb = nskb;
+               if (IS_ERR(skb)) {
+                       err = PTR_ERR(skb);
+                       goto err;
+               }
+       }
+
+       return queue_control_packets(dp, skb, upcall_info);
+
+err_kfree_skb:
+       kfree_skb(skb);
+err:
+       local_bh_disable();
+       stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
+
+       write_seqcount_begin(&stats->seqlock);
+       stats->n_lost++;
+       write_seqcount_end(&stats->seqlock);
+
+       local_bh_enable();
+
+       return err;
  }
  
  /* Send each packet in the 'skb' list to userspace for 'dp' as directed by
@@ -400,8 +483,15 @@ static int queue_control_packets(struct datapath *dp, struct sk_buff *skb,
                 nskb = skb->next;
                 skb->next = NULL;
  
+               if (vlan_tx_tag_present(skb)) {
+                       skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
+                       if (unlikely(!skb)) {
+                               err = -ENOMEM;
+                               goto err_kfree_skbs;
+                       }
+               }
+
                 len = sizeof(struct odp_header);
-               len += nla_total_size(4); /* ODP_PACKET_ATTR_TYPE. */
                 len += nla_total_size(skb->len);
                 len += nla_total_size(FLOW_BUFSIZE);
                 if (upcall_info->userdata)
@@ -461,86 +551,6 @@ err_kfree_skbs:
         return err;
  }
  
-/* Generic Netlink multicast groups for upcalls.
- *
- * We really want three unique multicast groups per datapath, but we can't even
- * get one, because genl_register_mc_group() takes genl_lock, which is also
- * held during Generic Netlink message processing, so trying to acquire
- * multicast groups during ODP_DP_NEW processing deadlocks.  Instead, we
- * preallocate a few groups and use them round-robin for datapaths.  Collision
- * isn't fatal--multicast listeners should check that the family is the one
- * that they want and discard others--but it wastes time and memory to receive
- * unwanted messages.
- */
-static struct genl_multicast_group packet_mc_groups[PACKET_N_MC_GROUPS];
-
-static struct genl_family dp_packet_genl_family = {
-       .id = GENL_ID_GENERATE,
-       .hdrsize = sizeof(struct odp_header),
-       .name = ODP_PACKET_FAMILY,
-       .version = 1,
-       .maxattr = ODP_PACKET_ATTR_MAX
-};
-
-static int packet_register_mc_groups(void)
-{
-       int i;
-
-       for (i = 0; i < PACKET_N_MC_GROUPS; i++) {
-               struct genl_multicast_group *group = &packet_mc_groups[i];
-               int error;
-
-               sprintf(group->name, "packet%d", i);
-               error = genl_register_mc_group(&dp_packet_genl_family, group);
-               if (error)
-                       return error;
-       }
-       return 0;
-}
-
-int dp_upcall(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info)
-{
-       struct dp_stats_percpu *stats;
-       int err;
-
-       WARN_ON_ONCE(skb_shared(skb));
-
-       forward_ip_summed(skb);
-
-       err = vswitch_skb_checksum_setup(skb);
-       if (err)
-               goto err_kfree_skb;
-
-       /* Break apart GSO packets into their component pieces.  Otherwise
-        * userspace may try to stuff a 64kB packet into a 1500-byte MTU. */
-       if (skb_is_gso(skb)) {
-               struct sk_buff *nskb = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM);
-               
-               kfree_skb(skb);
-               skb = nskb;
-               if (IS_ERR(skb)) {
-                       err = PTR_ERR(skb);
-                       goto err;
-               }
-       }
-
-       return queue_control_packets(dp, skb, upcall_info);
-
-err_kfree_skb:
-       kfree_skb(skb);
-err:
-       local_bh_disable();
-       stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
-
-       write_seqcount_begin(&stats->seqlock);
-       stats->n_lost++;
-       write_seqcount_end(&stats->seqlock);
-
-       local_bh_enable();
-
-       return err;
-}
-
  /* Called with genl_mutex. */
  static int flush_flows(int dp_ifindex)
  {
@@ -771,6 +781,8 @@ int dp_min_mtu(const struct datapath *dp)
                         continue;
  
                 dev_mtu = vport_get_mtu(p);
+               if (!dev_mtu)
+                       continue;
                 if (!mtu || dev_mtu < mtu)
                         mtu = dev_mtu;
         }
@@ -825,7 +837,6 @@ static int odp_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
         struct nlattr *nla;
         unsigned long used;
         u8 tcp_flags;
-       int nla_len;
         int err;
  
         sf_acts = rcu_dereference_protected(flow->sf_acts,
@@ -853,7 +864,7 @@ static int odp_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
         spin_unlock_bh(&flow->lock);
  
         if (used)
-               NLA_PUT_MSECS(skb, ODP_FLOW_ATTR_USED, used);
+               NLA_PUT_U64(skb, ODP_FLOW_ATTR_USED, flow_used_time(used));
  
         if (stats.n_packets)
                 NLA_PUT(skb, ODP_FLOW_ATTR_STATS, sizeof(struct odp_flow_stats), &stats);
@@ -861,23 +872,20 @@ static int odp_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
         if (tcp_flags)
                 NLA_PUT_U8(skb, ODP_FLOW_ATTR_TCP_FLAGS, tcp_flags);
  
-       /* If ODP_FLOW_ATTR_ACTIONS doesn't fit, and this is the first flow to
-        * be dumped into 'skb', then expand the skb.  This is unusual for
-        * Netlink but individual action lists can be longer than a page and
-        * thus entirely undumpable if we didn't do this. */
-       nla_len = nla_total_size(sf_acts->actions_len);
-       if (nla_len > skb_tailroom(skb) && !skb_orig_len) {
-               int hdr_off = (unsigned char *)odp_header - skb->data;
-
-               err = pskb_expand_head(skb, 0, nla_len - skb_tailroom(skb), GFP_KERNEL);
-               if (err)
-                       goto error;
-
-               odp_header = (struct odp_header *)(skb->data + hdr_off);
-       }
-       nla = nla_nest_start(skb, ODP_FLOW_ATTR_ACTIONS);
-       memcpy(__skb_put(skb, sf_acts->actions_len), sf_acts->actions, sf_acts->actions_len);
-       nla_nest_end(skb, nla);
+       /* If ODP_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
+        * this is the first flow to be dumped into 'skb'.  This is unusual for
+        * Netlink but individual action lists can be longer than
+        * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
+        * The userspace caller can always fetch the actions separately if it
+        * really wants them.  (Most userspace callers in fact don't care.)
+        *
+        * This can only fail for dump operations because the skb is always
+        * properly sized for single flows.
+        */
+       err = nla_put(skb, ODP_FLOW_ATTR_ACTIONS, sf_acts->actions_len,
+                     sf_acts->actions);
+       if (err < 0 && skb_orig_len)
+               goto error;
  
         return genlmsg_end(skb, odp_header);
  
@@ -1584,6 +1592,7 @@ static int odp_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
         struct odp_header *odp_header;
         struct nlattr *nla;
         int ifindex, iflink;
+       int mtu;
         int err;
  
         odp_header = genlmsg_put(skb, pid, seq, &dp_vport_genl_family,
@@ -1605,7 +1614,9 @@ static int odp_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
  
         NLA_PUT(skb, ODP_VPORT_ATTR_ADDRESS, ETH_ALEN, vport_get_addr(vport));
  
-       NLA_PUT_U32(skb, ODP_VPORT_ATTR_MTU, vport_get_mtu(vport));
+       mtu = vport_get_mtu(vport);
+       if (mtu)
+               NLA_PUT_U32(skb, ODP_VPORT_ATTR_MTU, mtu);
  
         err = vport_get_options(vport, skb);
         if (err == -EMSGSIZE)