datapath: Use percpu allocator for flow-stats.

author Pravin B Shelar <pshelar@nicira.com>

Thu, 5 Dec 2013 23:50:27 +0000 (15:50 -0800)

committer Pravin B Shelar <pshelar@nicira.com>

Tue, 3 Dec 2013 16:57:56 +0000 (08:57 -0800)
author Pravin B Shelar <pshelar@nicira.com>
Thu, 5 Dec 2013 23:50:27 +0000 (15:50 -0800)
committer Pravin B Shelar <pshelar@nicira.com>
Tue, 3 Dec 2013 16:57:56 +0000 (08:57 -0800)
diff --git a/datapath/datapath.c b/datapath/datapath.c

index 1a5bffb..1808c36 100644 (file)
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -502,7 +502,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
                 packet->protocol = htons(ETH_P_802_2);
  
         /* Build an sw_flow for sending this packet. */
-       flow = ovs_flow_alloc();
+       flow = ovs_flow_alloc(false);
         err = PTR_ERR(flow);
         if (IS_ERR(flow))
                 goto err_kfree_skb;
@@ -637,7 +637,9 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
  {
         const int skb_orig_len = skb->len;
         struct nlattr *start;
-       struct sw_flow_stats flow_stats;
+       struct ovs_flow_stats stats;
+       __be16 tcp_flags;
+       unsigned long used;
         struct ovs_header *ovs_header;
         struct nlattr *nla;
         int err;
@@ -668,25 +670,17 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
  
         nla_nest_end(skb, nla);
  
-       ovs_flow_stats_get(flow, &flow_stats);
-       if (flow_stats.used &&
-           nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(flow_stats.used)))
+       ovs_flow_stats_get(flow, &stats, &used, &tcp_flags);
+       if (used &&
+           nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used)))
                 goto nla_put_failure;
  
-       if (flow_stats.packet_count) {
-               struct ovs_flow_stats stats = {
-                       .n_packets = flow_stats.packet_count,
-                       .n_bytes = flow_stats.byte_count,
-               };
-
-               if (nla_put(skb, OVS_FLOW_ATTR_STATS,
-                           sizeof(struct ovs_flow_stats), &stats))
-                       goto nla_put_failure;
-       }
+       if (stats.n_packets &&
+           nla_put(skb, OVS_FLOW_ATTR_STATS, sizeof(struct ovs_flow_stats), &stats))
+               goto nla_put_failure;
  
-       if ((u8)ntohs(flow_stats.tcp_flags) &&
-           nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS,
-                      (u8)ntohs(flow_stats.tcp_flags)))
+       if ((u8)ntohs(tcp_flags) &&
+            nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags)))
                 goto nla_put_failure;
  
         /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
@@ -766,6 +760,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
         struct datapath *dp;
         struct sw_flow_actions *acts = NULL;
         struct sw_flow_match match;
+       bool exact_5tuple;
         int error;
  
         /* Extract key. */
@@ -774,7 +769,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
                 goto error;
  
         ovs_match_init(&match, &key, &mask);
-       error = ovs_nla_get_match(&match,
+       error = ovs_nla_get_match(&match, &exact_5tuple,
                                   a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]);
         if (error)
                 goto error;
@@ -813,7 +808,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
                         goto err_unlock_ovs;
  
                 /* Allocate flow. */
-               flow = ovs_flow_alloc();
+               flow = ovs_flow_alloc(!exact_5tuple);
                 if (IS_ERR(flow)) {
                         error = PTR_ERR(flow);
                         goto err_unlock_ovs;
@@ -900,7 +895,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
         }
  
         ovs_match_init(&match, &key, NULL);
-       err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL);
+       err = ovs_nla_get_match(&match, NULL, a[OVS_FLOW_ATTR_KEY], NULL);
         if (err)
                 return err;
  
@@ -954,7 +949,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
         }
  
         ovs_match_init(&match, &key, NULL);
-       err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL);
+       err = ovs_nla_get_match(&match, NULL, a[OVS_FLOW_ATTR_KEY], NULL);
         if (err)
                 goto unlock;
  
diff --git a/datapath/flow.c b/datapath/flow.c

index 57eb6b5..9b3d3a7 100644 (file)
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -64,9 +64,14 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies)
  
  void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb)
  {
-       struct sw_flow_stats *stats = &flow->stats[smp_processor_id()];
+       struct flow_stats *stats;
         __be16 tcp_flags = 0;
  
+       if (!flow->stats.is_percpu)
+               stats = flow->stats.stat;
+       else
+               stats = this_cpu_ptr(flow->stats.cpu_stats);
+
         if ((flow->key.eth.type == htons(ETH_P_IP) ||
              flow->key.eth.type == htons(ETH_P_IPV6)) &&
             flow->key.ip.proto == IPPROTO_TCP &&
@@ -82,56 +87,79 @@ void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb)
         spin_unlock(&stats->lock);
  }
  
-void ovs_flow_stats_get(struct sw_flow *flow, struct sw_flow_stats *res)
+static void stats_read(struct flow_stats *stats,
+                      struct ovs_flow_stats *ovs_stats,
+                      unsigned long *used, __be16 *tcp_flags)
  {
-       int cpu, cur_cpu;
+       spin_lock(&stats->lock);
+       if (time_after(stats->used, *used))
+               *used = stats->used;
+       *tcp_flags |= stats->tcp_flags;
+       ovs_stats->n_packets += stats->packet_count;
+       ovs_stats->n_bytes += stats->byte_count;
+       spin_unlock(&stats->lock);
+}
  
-       memset(res, 0, sizeof(*res));
+void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *ovs_stats,
+                       unsigned long *used, __be16 *tcp_flags)
+{
+       int cpu, cur_cpu;
  
-       cur_cpu = get_cpu();
-       for_each_possible_cpu(cpu) {
-               struct sw_flow_stats *stats = &flow->stats[cpu];
+       *used = 0;
+       *tcp_flags = 0;
+       memset(ovs_stats, 0, sizeof(*ovs_stats));
  
-               if (cpu == cur_cpu)
-                       local_bh_disable();
+       if (!flow->stats.is_percpu) {
+               stats_read(flow->stats.stat, ovs_stats, used, tcp_flags);
+       } else {
+               cur_cpu = get_cpu();
+               for_each_possible_cpu(cpu) {
+                       struct flow_stats *stats;
  
-               spin_lock(&stats->lock);
-               if (time_after(stats->used, res->used))
-                       res->used = stats->used;
-               res->packet_count += stats->packet_count;
-               res->byte_count += stats->byte_count;
-               res->tcp_flags |= stats->tcp_flags;
-               spin_unlock(&stats->lock);
+                       if (cpu == cur_cpu)
+                               local_bh_disable();
  
-               if (cpu == cur_cpu)
-                       local_bh_enable();
+                       stats = per_cpu_ptr(flow->stats.cpu_stats, cpu);
+                       stats_read(stats, ovs_stats, used, tcp_flags);
  
+                       if (cpu == cur_cpu)
+                               local_bh_enable();
+               }
+               put_cpu();
         }
-       put_cpu();
+}
+
+static void stats_reset(struct flow_stats *stats)
+{
+       spin_lock(&stats->lock);
+       stats->used = 0;
+       stats->packet_count = 0;
+       stats->byte_count = 0;
+       stats->tcp_flags = 0;
+       spin_unlock(&stats->lock);
  }
  
  void ovs_flow_stats_clear(struct sw_flow *flow)
  {
         int cpu, cur_cpu;
  
-       cur_cpu = get_cpu();
-       for_each_possible_cpu(cpu) {
-               struct sw_flow_stats *stats = &flow->stats[cpu];
+       if (!flow->stats.is_percpu) {
+               stats_reset(flow->stats.stat);
+       } else {
+               cur_cpu = get_cpu();
+
+               for_each_possible_cpu(cpu) {
  
-               if (cpu == cur_cpu)
-                       local_bh_disable();
+                       if (cpu == cur_cpu)
+                               local_bh_disable();
  
-               spin_lock(&stats->lock);
-               stats->used = 0;
-               stats->packet_count = 0;
-               stats->byte_count = 0;
-               stats->tcp_flags = 0;
-               spin_unlock(&stats->lock);
+                       stats_reset(per_cpu_ptr(flow->stats.cpu_stats, cpu));
  
-               if (cpu == cur_cpu)
-                       local_bh_enable();
+                       if (cpu == cur_cpu)
+                               local_bh_enable();
+               }
+               put_cpu();
         }
-       put_cpu();
  }
  
  static int check_header(struct sk_buff *skb, int len)
diff --git a/datapath/flow.h b/datapath/flow.h

index 6b68cf1..eafcfd8 100644 (file)
--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -149,13 +149,21 @@ struct sw_flow_actions {
         struct nlattr actions[];
  };
  
-struct sw_flow_stats {
+struct flow_stats {
         u64 packet_count;               /* Number of packets matched. */
         u64 byte_count;                 /* Number of bytes matched. */
         unsigned long used;             /* Last used time (in jiffies). */
         spinlock_t lock;                /* Lock for atomic stats update. */
         __be16 tcp_flags;               /* Union of seen TCP flags. */
-} ____cacheline_aligned_in_smp;
+};
+
+struct sw_flow_stats {
+       bool is_percpu;
+       union {
+               struct flow_stats *stat;
+               struct flow_stats __percpu *cpu_stats;
+       };
+};
  
  struct sw_flow {
         struct rcu_head rcu;
@@ -166,7 +174,7 @@ struct sw_flow {
         struct sw_flow_key unmasked_key;
         struct sw_flow_mask *mask;
         struct sw_flow_actions __rcu *sf_acts;
-       struct sw_flow_stats stats[];
+       struct sw_flow_stats stats;
  };
  
  struct arp_eth_header {
@@ -184,7 +192,8 @@ struct arp_eth_header {
  } __packed;
  
  void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb);
-void ovs_flow_stats_get(struct sw_flow *flow, struct sw_flow_stats *res);
+void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *stats,
+                       unsigned long *used, __be16 *tcp_flags);
  void ovs_flow_stats_clear(struct sw_flow *flow);
  u64 ovs_flow_used_time(unsigned long flow_jiffies);
  
diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c

index 75c72b3..9b26528 100644 (file)
--- a/datapath/flow_netlink.c
+++ b/datapath/flow_netlink.c
@@ -266,6 +266,20 @@ static bool is_all_zero(const u8 *fp, size_t size)
         return true;
  }
  
+static bool is_all_set(const u8 *fp, size_t size)
+{
+       int i;
+
+       if (!fp)
+               return false;
+
+       for (i = 0; i < size; i++)
+               if (fp[i] != 0xff)
+                       return false;
+
+       return true;
+}
+
  static int __parse_flow_nlattrs(const struct nlattr *attr,
                                 const struct nlattr *a[],
                                 u64 *attrsp, bool nz)
@@ -487,8 +501,9 @@ static int metadata_from_nlattrs(struct sw_flow_match *match,  u64 *attrs,
         return 0;
  }
  
-static int ovs_key_from_nlattrs(struct sw_flow_match *match,  u64 attrs,
-                               const struct nlattr **a, bool is_mask)
+static int ovs_key_from_nlattrs(struct sw_flow_match *match,  bool *exact_5tuple,
+                               u64 attrs, const struct nlattr **a,
+                               bool is_mask)
  {
         int err;
         u64 orig_attrs = attrs;
@@ -545,6 +560,11 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match,  u64 attrs,
                 SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask);
         }
  
+       if (is_mask && exact_5tuple) {
+               if (match->mask->key.eth.type != htons(0xffff))
+                       *exact_5tuple = false;
+       }
+
         if (attrs & (1ULL << OVS_KEY_ATTR_IPV4)) {
                 const struct ovs_key_ipv4 *ipv4_key;
  
@@ -567,6 +587,13 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match,  u64 attrs,
                 SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
                                 ipv4_key->ipv4_dst, is_mask);
                 attrs &= ~(1ULL << OVS_KEY_ATTR_IPV4);
+
+               if (is_mask && exact_5tuple && *exact_5tuple) {
+                       if (ipv4_key->ipv4_proto != 0xff ||
+                           ipv4_key->ipv4_src != htonl(0xffffffff) ||
+                           ipv4_key->ipv4_dst != htonl(0xffffffff))
+                               *exact_5tuple = false;
+               }
         }
  
         if (attrs & (1ULL << OVS_KEY_ATTR_IPV6)) {
@@ -598,6 +625,13 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match,  u64 attrs,
                                 is_mask);
  
                 attrs &= ~(1ULL << OVS_KEY_ATTR_IPV6);
+
+               if (is_mask && exact_5tuple && *exact_5tuple) {
+                       if (ipv6_key->ipv6_proto != 0xff ||
+                           !is_all_set((u8 *)ipv6_key->ipv6_src, sizeof(match->key->ipv6.addr.src)) ||
+                           !is_all_set((u8 *)ipv6_key->ipv6_dst, sizeof(match->key->ipv6.addr.dst)))
+                               *exact_5tuple = false;
+               }
         }
  
         if (attrs & (1ULL << OVS_KEY_ATTR_ARP)) {
@@ -640,6 +674,11 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match,  u64 attrs,
                                         tcp_key->tcp_dst, is_mask);
                 }
                 attrs &= ~(1ULL << OVS_KEY_ATTR_TCP);
+
+               if (is_mask && exact_5tuple && *exact_5tuple &&
+                   (tcp_key->tcp_src != htons(0xffff) ||
+                    tcp_key->tcp_dst != htons(0xffff)))
+                       *exact_5tuple = false;
         }
  
         if (attrs & (1ULL << OVS_KEY_ATTR_TCP_FLAGS)) {
@@ -671,6 +710,11 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match,  u64 attrs,
                                         udp_key->udp_dst, is_mask);
                 }
                 attrs &= ~(1ULL << OVS_KEY_ATTR_UDP);
+
+               if (is_mask && exact_5tuple && *exact_5tuple &&
+                   (udp_key->udp_src != htons(0xffff) ||
+                    udp_key->udp_dst != htons(0xffff)))
+                       *exact_5tuple = false;
         }
  
         if (attrs & (1ULL << OVS_KEY_ATTR_SCTP)) {
@@ -756,6 +800,7 @@ static void sw_flow_mask_set(struct sw_flow_mask *mask,
   * attribute specifies the mask field of the wildcarded flow.
   */
  int ovs_nla_get_match(struct sw_flow_match *match,
+                     bool *exact_5tuple,
                       const struct nlattr *key,
                       const struct nlattr *mask)
  {
@@ -803,10 +848,13 @@ int ovs_nla_get_match(struct sw_flow_match *match,
                 }
         }
  
-       err = ovs_key_from_nlattrs(match, key_attrs, a, false);
+       err = ovs_key_from_nlattrs(match, NULL, key_attrs, a, false);
         if (err)
                 return err;
  
+       if (exact_5tuple)
+               *exact_5tuple = true;
+
         if (mask) {
                 err = parse_flow_mask_nlattrs(mask, a, &mask_attrs);
                 if (err)
@@ -844,7 +892,7 @@ int ovs_nla_get_match(struct sw_flow_match *match,
                         }
                 }
  
-               err = ovs_key_from_nlattrs(match, mask_attrs, a, true);
+               err = ovs_key_from_nlattrs(match, exact_5tuple, mask_attrs, a, true);
                 if (err)
                         return err;
         } else {
diff --git a/datapath/flow_netlink.h b/datapath/flow_netlink.h

index 4401510..b31fbe2 100644 (file)
--- a/datapath/flow_netlink.h
+++ b/datapath/flow_netlink.h
@@ -45,6 +45,7 @@ int ovs_nla_put_flow(const struct sw_flow_key *,
  int ovs_nla_get_flow_metadata(struct sw_flow *flow,
                               const struct nlattr *attr);
  int ovs_nla_get_match(struct sw_flow_match *match,
+                     bool *exact_5tuple,
                       const struct nlattr *,
                       const struct nlattr *);
  
diff --git a/datapath/flow_table.c b/datapath/flow_table.c

index 8597651..b20adcb 100644 (file)
--- a/datapath/flow_table.c
+++ b/datapath/flow_table.c
@@ -73,7 +73,7 @@ void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
                 *d++ = *s++ & *m++;
  }
  
-struct sw_flow *ovs_flow_alloc(void)
+struct sw_flow *ovs_flow_alloc(bool percpu_stats)
  {
         struct sw_flow *flow;
         int cpu;
@@ -85,11 +85,30 @@ struct sw_flow *ovs_flow_alloc(void)
         flow->sf_acts = NULL;
         flow->mask = NULL;
  
-       memset(flow->stats, 0, num_possible_cpus() * sizeof(struct sw_flow_stats));
-       for_each_possible_cpu(cpu)
-               spin_lock_init(&flow->stats[cpu].lock);
+       flow->stats.is_percpu = percpu_stats;
  
+       if (!percpu_stats) {
+               flow->stats.stat = kzalloc(sizeof(*flow->stats.stat), GFP_KERNEL);
+               if (!flow->stats.stat)
+                       goto err;
+
+               spin_lock_init(&flow->stats.stat->lock);
+       } else {
+               flow->stats.cpu_stats = alloc_percpu(struct flow_stats);
+               if (!flow->stats.cpu_stats)
+                       goto err;
+
+               for_each_possible_cpu(cpu) {
+                       struct flow_stats *cpu_stats;
+
+                       cpu_stats = per_cpu_ptr(flow->stats.cpu_stats, cpu);
+                       spin_lock_init(&cpu_stats->lock);
+               }
+       }
         return flow;
+err:
+       kfree(flow);
+       return ERR_PTR(-ENOMEM);
  }
  
  int ovs_flow_tbl_count(struct flow_table *table)
@@ -123,6 +142,10 @@ static struct flex_array *alloc_buckets(unsigned int n_buckets)
  static void flow_free(struct sw_flow *flow)
  {
         kfree((struct sf_flow_acts __force *)flow->sf_acts);
+       if (flow->stats.is_percpu)
+               free_percpu(flow->stats.cpu_stats);
+       else
+               kfree(flow->stats.stat);
         kmem_cache_free(flow_cache, flow);
  }
  
diff --git a/datapath/flow_table.h b/datapath/flow_table.h

index f54aa82..1996e34 100644 (file)
--- a/datapath/flow_table.h
+++ b/datapath/flow_table.h
@@ -55,7 +55,7 @@ struct flow_table {
  int ovs_flow_init(void);
  void ovs_flow_exit(void);
  
-struct sw_flow *ovs_flow_alloc(void);
+struct sw_flow *ovs_flow_alloc(bool percpu_stats);
  void ovs_flow_free(struct sw_flow *, bool deferred);
  
  int ovs_flow_tbl_init(struct flow_table *);
author	Pravin B Shelar <pshelar@nicira.com>
	Thu, 5 Dec 2013 23:50:27 +0000 (15:50 -0800)
committer	Pravin B Shelar <pshelar@nicira.com>
	Tue, 3 Dec 2013 16:57:56 +0000 (08:57 -0800)
datapath/datapath.c		patch \| blob \| history
datapath/flow.c		patch \| blob \| history
datapath/flow.h		patch \| blob \| history
datapath/flow_netlink.c		patch \| blob \| history
datapath/flow_netlink.h		patch \| blob \| history
datapath/flow_table.c		patch \| blob \| history
datapath/flow_table.h		patch \| blob \| history