datpath: Avoid reporting half updated statistics.
authorJesse Gross <jesse@nicira.com>
Thu, 29 Jul 2010 01:20:43 +0000 (18:20 -0700)
committerJesse Gross <jesse@nicira.com>
Sat, 21 Aug 2010 02:44:46 +0000 (19:44 -0700)
We enforce mutual exclusion when updating statistics by disabling
bottom halves and only writing to per-CPU state.  However, reading
requires looking at the statistics for foreign CPUs, which could be
in the process of updating them since there isn't a lock.  This means
we could get garbage values for 64-bit values on 32-bit machines or
byte counts that don't correspond to packet counts, etc.

This commit introduces a sequence lock for statistics values to avoid
this problem.  Getting a write lock is very cheap - it only requires
incrementing a counter plus a memory barrier (which is compiled away
on x86) to acquire or release the lock and will never block.  On
read we spin until the sequence number hasn't changed in the middle
of the operation, indicating that the we have a consistent set of
values.

Signed-off-by: Jesse Gross <jesse@nicira.com>
datapath/datapath.c
datapath/datapath.h
datapath/vport.c
datapath/vport.h

index e468198..32572c6 100644 (file)
@@ -599,7 +599,11 @@ out:
        /* Update datapath statistics. */
        local_bh_disable();
        stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
+
+       write_seqcount_begin(&stats->seqlock);
        (*(u64 *)((u8 *)stats + stats_counter_off))++;
+       write_seqcount_end(&stats->seqlock);
+
        local_bh_enable();
 }
 
@@ -860,7 +864,11 @@ err_kfree_skb:
 err:
        local_bh_disable();
        stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
+
+       write_seqcount_begin(&stats->seqlock);
        stats->n_lost++;
+       write_seqcount_end(&stats->seqlock);
+
        local_bh_enable();
 
        return err;
@@ -1394,12 +1402,21 @@ static int get_dp_stats(struct datapath *dp, struct odp_stats __user *statsp)
        stats.max_groups = DP_MAX_GROUPS;
        stats.n_frags = stats.n_hit = stats.n_missed = stats.n_lost = 0;
        for_each_possible_cpu(i) {
-               const struct dp_stats_percpu *s;
-               s = per_cpu_ptr(dp->stats_percpu, i);
-               stats.n_frags += s->n_frags;
-               stats.n_hit += s->n_hit;
-               stats.n_missed += s->n_missed;
-               stats.n_lost += s->n_lost;
+               const struct dp_stats_percpu *percpu_stats;
+               struct dp_stats_percpu local_stats;
+               unsigned seqcount;
+
+               percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
+
+               do {
+                       seqcount = read_seqcount_begin(&percpu_stats->seqlock);
+                       local_stats = *percpu_stats;
+               } while (read_seqcount_retry(&percpu_stats->seqlock, seqcount));
+
+               stats.n_frags += local_stats.n_frags;
+               stats.n_hit += local_stats.n_hit;
+               stats.n_missed += local_stats.n_missed;
+               stats.n_lost += local_stats.n_lost;
        }
        stats.max_miss_queue = DP_MAX_QUEUE_LEN;
        stats.max_action_queue = DP_MAX_QUEUE_LEN;
index 8e27283..fd81dfb 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/mutex.h>
 #include <linux/netdevice.h>
 #include <linux/workqueue.h>
+#include <linux/seqlock.h>
 #include <linux/skbuff.h>
 #include <linux/version.h>
 #include "flow.h"
@@ -53,6 +54,7 @@ struct dp_stats_percpu {
        u64 n_hit;
        u64 n_missed;
        u64 n_lost;
+       seqcount_t seqlock;
 };
 
 struct dp_port_group {
index dd1c31f..cdf615a 100644 (file)
@@ -1052,12 +1052,20 @@ int vport_get_stats(struct vport *vport, struct odp_vport_stats *stats)
 
                for_each_possible_cpu(i) {
                        const struct vport_percpu_stats *percpu_stats;
+                       struct vport_percpu_stats local_stats;
+                       unsigned seqcount;
 
                        percpu_stats = per_cpu_ptr(vport->percpu_stats, i);
-                       stats->rx_bytes         += percpu_stats->rx_bytes;
-                       stats->rx_packets       += percpu_stats->rx_packets;
-                       stats->tx_bytes         += percpu_stats->tx_bytes;
-                       stats->tx_packets       += percpu_stats->tx_packets;
+
+                       do {
+                               seqcount = read_seqcount_begin(&percpu_stats->seqlock);
+                               local_stats = *percpu_stats;
+                       } while (read_seqcount_retry(&percpu_stats->seqlock, seqcount));
+
+                       stats->rx_bytes         += local_stats.rx_bytes;
+                       stats->rx_packets       += local_stats.rx_packets;
+                       stats->tx_bytes         += local_stats.tx_bytes;
+                       stats->tx_packets       += local_stats.tx_packets;
                }
 
                err = 0;
@@ -1192,10 +1200,12 @@ void vport_receive(struct vport *vport, struct sk_buff *skb)
                struct vport_percpu_stats *stats;
 
                local_bh_disable();
-
                stats = per_cpu_ptr(vport->percpu_stats, smp_processor_id());
+
+               write_seqcount_begin(&stats->seqlock);
                stats->rx_packets++;
                stats->rx_bytes += skb->len;
+               write_seqcount_end(&stats->seqlock);
 
                local_bh_enable();
        }
@@ -1244,10 +1254,12 @@ int vport_send(struct vport *vport, struct sk_buff *skb)
                struct vport_percpu_stats *stats;
 
                local_bh_disable();
-
                stats = per_cpu_ptr(vport->percpu_stats, smp_processor_id());
+
+               write_seqcount_begin(&stats->seqlock);
                stats->tx_packets++;
                stats->tx_bytes += sent;
+               write_seqcount_end(&stats->seqlock);
 
                local_bh_enable();
        }
index a5c17f6..0a6801d 100644 (file)
@@ -10,6 +10,7 @@
 #define VPORT_H 1
 
 #include <linux/list.h>
+#include <linux/seqlock.h>
 #include <linux/skbuff.h>
 #include <linux/spinlock.h>
 
@@ -83,6 +84,7 @@ struct vport_percpu_stats {
        u64 rx_packets;
        u64 tx_bytes;
        u64 tx_packets;
+       seqcount_t seqlock;
 };
 
 struct vport_err_stats {