From: Jesse Gross <jesse@nicira.com>
Date: Sat, 19 Nov 2011 17:08:56 +0000 (-0800)
Subject: datapath: Use u64_stats_sync for datapath and vport stats.
X-Git-Tag: v1.4.0~114
X-Git-Url: http://git.onelab.eu/?a=commitdiff_plain;h=821cb9fac77850ae3546ab38766a58ff5e75e8a8;p=sliver-openvswitch.git

datapath: Use u64_stats_sync for datapath and vport stats.

We currently use a seqcount to prevent reading partial 64-bit stats
on 32-bit CPUs.  u64_stats_sync uses the same logic but elides it on
64-bit and uniprocessor machines.  This improves performance (primarily
on non-x86 architectures) at the cost of not guaranteeing that packet
and byte counts were necessarily read together.

Suggested-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
---

diff --git a/datapath/datapath.c b/datapath/datapath.c
index 1ea5d1e9a..adfc76a49 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -335,10 +335,9 @@ void dp_process_received_packet(struct vport *p, struct sk_buff *skb)
 
 out:
 	/* Update datapath statistics. */
-
-	write_seqcount_begin(&stats->seqlock);
+	u64_stats_update_begin(&stats->sync);
 	(*stats_counter)++;
-	write_seqcount_end(&stats->seqlock);
+	u64_stats_update_end(&stats->sync);
 }
 
 static struct genl_family dp_packet_genl_family = {
@@ -381,9 +380,9 @@ int dp_upcall(struct datapath *dp, struct sk_buff *skb,
 err:
 	stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
 
-	write_seqcount_begin(&stats->seqlock);
+	u64_stats_update_begin(&stats->sync);
 	stats->n_lost++;
-	write_seqcount_end(&stats->seqlock);
+	u64_stats_update_end(&stats->sync);
 
 	return err;
 }
@@ -836,14 +835,14 @@ static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats)
 	for_each_possible_cpu(i) {
 		const struct dp_stats_percpu *percpu_stats;
 		struct dp_stats_percpu local_stats;
-		unsigned seqcount;
+		unsigned int start;
 
 		percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
 
 		do {
-			seqcount = read_seqcount_begin(&percpu_stats->seqlock);
+			start = u64_stats_fetch_begin_bh(&percpu_stats->sync);
 			local_stats = *percpu_stats;
-		} while (read_seqcount_retry(&percpu_stats->seqlock, seqcount));
+		} while (u64_stats_fetch_retry_bh(&percpu_stats->sync, start));
 
 		stats->n_hit += local_stats.n_hit;
 		stats->n_missed += local_stats.n_missed;
diff --git a/datapath/datapath.h b/datapath/datapath.h
index dc2c75c64..85842215a 100644
--- a/datapath/datapath.h
+++ b/datapath/datapath.h
@@ -23,8 +23,8 @@
 #include <linux/kernel.h>
 #include <linux/mutex.h>
 #include <linux/netdevice.h>
-#include <linux/seqlock.h>
 #include <linux/skbuff.h>
+#include <linux/u64_stats_sync.h>
 #include <linux/version.h>
 
 #include "checksum.h"
@@ -54,7 +54,7 @@ struct dp_stats_percpu {
 	u64 n_hit;
 	u64 n_missed;
 	u64 n_lost;
-	seqcount_t seqlock;
+	struct u64_stats_sync sync;
 };
 
 /**
diff --git a/datapath/linux/Modules.mk b/datapath/linux/Modules.mk
index 0857735d5..1f9973b47 100644
--- a/datapath/linux/Modules.mk
+++ b/datapath/linux/Modules.mk
@@ -48,6 +48,7 @@ openvswitch_headers += \
 	linux/compat/include/linux/tcp.h \
 	linux/compat/include/linux/timer.h \
 	linux/compat/include/linux/types.h \
+	linux/compat/include/linux/u64_stats_sync.h \
 	linux/compat/include/linux/udp.h \
 	linux/compat/include/linux/workqueue.h \
 	linux/compat/include/net/checksum.h \
diff --git a/datapath/linux/compat/include/linux/u64_stats_sync.h b/datapath/linux/compat/include/linux/u64_stats_sync.h
new file mode 100644
index 000000000..45b617a29
--- /dev/null
+++ b/datapath/linux/compat/include/linux/u64_stats_sync.h
@@ -0,0 +1,147 @@
+#ifndef _LINUX_U64_STATS_SYNC_WRAPPER_H
+#define _LINUX_U64_STATS_SYNC_WRAPPER_H
+
+#include <linux/version.h>
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,36)
+#include_next <linux/u64_stats_sync.h>
+#else
+
+/*
+ * To properly implement 64bits network statistics on 32bit and 64bit hosts,
+ * we provide a synchronization point, that is a noop on 64bit or UP kernels.
+ *
+ * Key points :
+ * 1) Use a seqcount on SMP 32bits, with low overhead.
+ * 2) Whole thing is a noop on 64bit arches or UP kernels.
+ * 3) Write side must ensure mutual exclusion or one seqcount update could
+ *    be lost, thus blocking readers forever.
+ *    If this synchronization point is not a mutex, but a spinlock or
+ *    spinlock_bh() or disable_bh() :
+ * 3.1) Write side should not sleep.
+ * 3.2) Write side should not allow preemption.
+ * 3.3) If applicable, interrupts should be disabled.
+ *
+ * 4) If reader fetches several counters, there is no guarantee the whole values
+ *    are consistent (remember point 1) : this is a noop on 64bit arches anyway)
+ *
+ * 5) readers are allowed to sleep or be preempted/interrupted : They perform
+ *    pure reads. But if they have to fetch many values, it's better to not allow
+ *    preemptions/interruptions to avoid many retries.
+ *
+ * 6) If counter might be written by an interrupt, readers should block interrupts.
+ *    (On UP, there is no seqcount_t protection, a reader allowing interrupts could
+ *     read partial values)
+ *
+ * 7) For softirq uses, readers can use u64_stats_fetch_begin_bh() and
+ *    u64_stats_fetch_retry_bh() helpers
+ *
+ * Usage :
+ *
+ * Stats producer (writer) should use following template granted it already got
+ * an exclusive access to counters (a lock is already taken, or per cpu
+ * data is used [in a non preemptable context])
+ *
+ *   spin_lock_bh(...) or other synchronization to get exclusive access
+ *   ...
+ *   u64_stats_update_begin(&stats->syncp);
+ *   stats->bytes64 += len; // non atomic operation
+ *   stats->packets64++;    // non atomic operation
+ *   u64_stats_update_end(&stats->syncp);
+ *
+ * While a consumer (reader) should use following template to get consistent
+ * snapshot for each variable (but no guarantee on several ones)
+ *
+ * u64 tbytes, tpackets;
+ * unsigned int start;
+ *
+ * do {
+ *         start = u64_stats_fetch_begin(&stats->syncp);
+ *         tbytes = stats->bytes64; // non atomic operation
+ *         tpackets = stats->packets64; // non atomic operation
+ * } while (u64_stats_fetch_retry(&stats->syncp, start));
+ *
+ *
+ * Example of use in drivers/net/loopback.c, using per_cpu containers,
+ * in BH disabled context.
+ */
+#include <linux/seqlock.h>
+
+struct u64_stats_sync {
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+	seqcount_t	seq;
+#endif
+};
+
+static inline void u64_stats_update_begin(struct u64_stats_sync *syncp)
+{
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+	write_seqcount_begin(&syncp->seq);
+#endif
+}
+
+static inline void u64_stats_update_end(struct u64_stats_sync *syncp)
+{
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+	write_seqcount_end(&syncp->seq);
+#endif
+}
+
+static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
+{
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+	return read_seqcount_begin(&syncp->seq);
+#else
+#if BITS_PER_LONG==32
+	preempt_disable();
+#endif
+	return 0;
+#endif
+}
+
+static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
+					 unsigned int start)
+{
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+	return read_seqcount_retry(&syncp->seq, start);
+#else
+#if BITS_PER_LONG==32
+	preempt_enable();
+#endif
+	return false;
+#endif
+}
+
+/*
+ * In case softirq handlers can update u64 counters, readers can use following helpers
+ * - SMP 32bit arches use seqcount protection, irq safe.
+ * - UP 32bit must disable BH.
+ * - 64bit have no problem atomically reading u64 values, irq safe.
+ */
+static inline unsigned int u64_stats_fetch_begin_bh(const struct u64_stats_sync *syncp)
+{
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+	return read_seqcount_begin(&syncp->seq);
+#else
+#if BITS_PER_LONG==32
+	local_bh_disable();
+#endif
+	return 0;
+#endif
+}
+
+static inline bool u64_stats_fetch_retry_bh(const struct u64_stats_sync *syncp,
+					 unsigned int start)
+{
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+	return read_seqcount_retry(&syncp->seq, start);
+#else
+#if BITS_PER_LONG==32
+	local_bh_enable();
+#endif
+	return false;
+#endif
+}
+
+#endif /* Linux kernel < 2.6.36 */
+#endif /* _LINUX_U64_STATS_SYNC_WRAPPER_H */
diff --git a/datapath/vport.c b/datapath/vport.c
index 91011d2cb..04ae50e55 100644
--- a/datapath/vport.c
+++ b/datapath/vport.c
@@ -376,14 +376,14 @@ void vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats)
 	for_each_possible_cpu(i) {
 		const struct vport_percpu_stats *percpu_stats;
 		struct vport_percpu_stats local_stats;
-		unsigned seqcount;
+		unsigned int start;
 
 		percpu_stats = per_cpu_ptr(vport->percpu_stats, i);
 
 		do {
-			seqcount = read_seqcount_begin(&percpu_stats->seqlock);
+			start = u64_stats_fetch_begin_bh(&percpu_stats->sync);
 			local_stats = *percpu_stats;
-		} while (read_seqcount_retry(&percpu_stats->seqlock, seqcount));
+		} while (u64_stats_fetch_retry_bh(&percpu_stats->sync, start));
 
 		stats->rx_bytes		+= local_stats.rx_bytes;
 		stats->rx_packets	+= local_stats.rx_packets;
@@ -444,10 +444,10 @@ void vport_receive(struct vport *vport, struct sk_buff *skb)
 
 	stats = per_cpu_ptr(vport->percpu_stats, smp_processor_id());
 
-	write_seqcount_begin(&stats->seqlock);
+	u64_stats_update_begin(&stats->sync);
 	stats->rx_packets++;
 	stats->rx_bytes += skb->len;
-	write_seqcount_end(&stats->seqlock);
+	u64_stats_update_end(&stats->sync);
 
 	if (!(vport->ops->flags & VPORT_F_FLOW))
 		OVS_CB(skb)->flow = NULL;
@@ -476,10 +476,10 @@ int vport_send(struct vport *vport, struct sk_buff *skb)
 
 		stats = per_cpu_ptr(vport->percpu_stats, smp_processor_id());
 
-		write_seqcount_begin(&stats->seqlock);
+		u64_stats_update_begin(&stats->sync);
 		stats->tx_packets++;
 		stats->tx_bytes += sent;
-		write_seqcount_end(&stats->seqlock);
+		u64_stats_update_end(&stats->sync);
 	}
 	return sent;
 }
diff --git a/datapath/vport.h b/datapath/vport.h
index 0f5f07796..8f03f2e98 100644
--- a/datapath/vport.h
+++ b/datapath/vport.h
@@ -21,9 +21,9 @@
 
 #include <linux/list.h>
 #include <linux/openvswitch.h>
-#include <linux/seqlock.h>
 #include <linux/skbuff.h>
 #include <linux/spinlock.h>
+#include <linux/u64_stats_sync.h>
 
 #include "datapath.h"
 
@@ -56,7 +56,7 @@ struct vport_percpu_stats {
 	u64 rx_packets;
 	u64 tx_bytes;
 	u64 tx_packets;
-	seqcount_t seqlock;
+	struct u64_stats_sync sync;
 };
 
 struct vport_err_stats {