Catalli's threaded switch

[sliver-openvswitch.git] / datapath / datapath.c
diff --git a/datapath/datapath.c b/datapath/datapath.c

index d0db550..5ee9157 100644 (file)
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -8,6 +8,8 @@
  
  /* Functions for managing the dp interface/device. */
  
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
  #include <linux/init.h>
  #include <linux/module.h>
  #include <linux/fs.h>
@@ -69,6 +71,23 @@ EXPORT_SYMBOL(dp_ioctl_hook);
  static struct datapath *dps[ODP_MAX];
  static DEFINE_MUTEX(dp_mutex);
  
+/* We limit the number of times that we pass into dp_process_received_packet()
+ * to avoid blowing out the stack in the event that we have a loop. */
+struct loop_counter {
+       int count;              /* Count. */
+       bool looping;           /* Loop detected? */
+};
+
+#define DP_MAX_LOOPS 5
+
+/* We use a separate counter for each CPU for both interrupt and non-interrupt
+ * context in order to keep the limit deterministic for a given packet. */
+struct percpu_loop_counters {
+       struct loop_counter counters[2];
+};
+
+static DEFINE_PER_CPU(struct percpu_loop_counters, dp_loop_counters);
+
  static int new_dp_port(struct datapath *, struct odp_port *, int port_no);
  
  /* Must be called with rcu_read_lock or dp_mutex. */
@@ -511,6 +530,14 @@ out:
         return err;
  }
  
+static void suppress_loop(struct datapath *dp, struct sw_flow_actions *actions)
+{
+       if (net_ratelimit())
+               pr_warn("%s: flow looped %d times, dropping\n",
+                       dp_name(dp), DP_MAX_LOOPS);
+       actions->n_actions = 0;
+}
+
  /* Must be called with rcu_read_lock. */
  void dp_process_received_packet(struct dp_port *p, struct sk_buff *skb)
  {
@@ -519,34 +546,71 @@ void dp_process_received_packet(struct dp_port *p, struct sk_buff *skb)
         int stats_counter_off;
         struct odp_flow_key key;
         struct tbl_node *flow_node;
+       struct sw_flow *flow;
+       struct sw_flow_actions *acts;
+       struct loop_counter *loop;
+       int error;
  
         OVS_CB(skb)->dp_port = p;
  
-       if (flow_extract(skb, p ? p->port_no : ODPP_NONE, &key)) {
-               if (dp->drop_frags) {
-                       kfree_skb(skb);
-                       stats_counter_off = offsetof(struct dp_stats_percpu, n_frags);
-                       goto out;
-               }
+       /* Extract flow from 'skb' into 'key'. */
+       error = flow_extract(skb, p ? p->port_no : ODPP_NONE, &key);
+       if (unlikely(error)) {
+               kfree_skb(skb);
+               return;
+       }
+
+       if (OVS_CB(skb)->is_frag && dp->drop_frags) {
+               kfree_skb(skb);
+               stats_counter_off = offsetof(struct dp_stats_percpu, n_frags);
+               goto out;
         }
  
+       /* Look up flow. */
         flow_node = tbl_lookup(rcu_dereference(dp->table), &key, flow_hash(&key), flow_cmp);
-       if (flow_node) {
-               struct sw_flow *flow = flow_cast(flow_node);
-               struct sw_flow_actions *acts = rcu_dereference(flow->sf_acts);
-               flow_used(flow, skb);
-               execute_actions(dp, skb, &key, acts->actions, acts->n_actions,
-                               GFP_ATOMIC);
-               stats_counter_off = offsetof(struct dp_stats_percpu, n_hit);
-       } else {
-               stats_counter_off = offsetof(struct dp_stats_percpu, n_missed);
+       if (unlikely(!flow_node)) {
                 dp_output_control(dp, skb, _ODPL_MISS_NR, OVS_CB(skb)->tun_id);
+               stats_counter_off = offsetof(struct dp_stats_percpu, n_missed);
+               goto out;
+       }
+
+       flow = flow_cast(flow_node);
+       flow_used(flow, skb);
+
+       acts = rcu_dereference(flow->sf_acts);
+
+       /* Check whether we've looped too much. */
+       loop = &get_cpu_var(dp_loop_counters).counters[!!in_interrupt()];
+       if (unlikely(++loop->count > DP_MAX_LOOPS))
+               loop->looping = true;
+       if (unlikely(loop->looping)) {
+               suppress_loop(dp, acts);
+               goto out_loop;
         }
  
+       /* Execute actions. */
+       execute_actions(dp, skb, &key, acts->actions, acts->n_actions, GFP_ATOMIC);
+       stats_counter_off = offsetof(struct dp_stats_percpu, n_hit);
+
+       /* Check whether sub-actions looped too much. */
+       if (unlikely(loop->looping))
+               suppress_loop(dp, acts);
+
+out_loop:
+       /* Decrement loop counter. */
+       if (!--loop->count)
+               loop->looping = false;
+       put_cpu_var(dp_loop_counters);
+
  out:
+       /* Update datapath statistics. */
         local_bh_disable();
         stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
+
+       write_seqcount_begin(&stats->seqlock);
         (*(u64 *)((u8 *)stats + stats_counter_off))++;
+       write_seqcount_end(&stats->seqlock);
+
         local_bh_enable();
  }
  
@@ -583,9 +647,9 @@ int vswitch_skb_checksum_setup(struct sk_buff *skb)
                 break;
         default:
                 if (net_ratelimit())
-                       printk(KERN_ERR "Attempting to checksum a non-"
-                              "TCP/UDP packet, dropping a protocol"
-                              " %d packet", iph->protocol);
+                       pr_err("Attempting to checksum a non-TCP/UDP packet, "
+                              "dropping a protocol %d packet",
+                              iph->protocol);
                 goto out;
         }
  
@@ -688,11 +752,10 @@ void compute_ip_summed(struct sk_buff *skb, bool xmit)
                 break;
  #endif
         default:
-               printk(KERN_ERR "openvswitch: unknown checksum type %d\n",
-                      skb->ip_summed);
+               pr_err("unknown checksum type %d\n", skb->ip_summed);
                 /* None seems the safest... */
                 OVS_CB(skb)->ip_summed = OVS_CSUM_NONE;
-       }       
+       }
  
  #if defined(CONFIG_XEN) && defined(HAVE_PROTO_DATA_VALID)
         /* Xen has a special way of representing CHECKSUM_PARTIAL on older
@@ -807,7 +870,11 @@ err_kfree_skb:
  err:
         local_bh_disable();
         stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
+
+       write_seqcount_begin(&stats->seqlock);
         stats->n_lost++;
+       write_seqcount_end(&stats->seqlock);
+
         local_bh_enable();
  
         return err;
@@ -926,7 +993,7 @@ static void get_stats(struct sw_flow *flow, struct odp_flow_stats *stats,
  
         stats->n_packets = flow->packet_count;
         stats->n_bytes = flow->byte_count;
-       stats->ip_tos = flow->ip_tos;
+       stats->reserved = 0;
         stats->tcp_flags = flow->tcp_flags;
         stats->error = 0;
  }
@@ -935,7 +1002,6 @@ static void clear_stats(struct sw_flow *flow)
  {
         flow->used = 0;
         flow->tcp_flags = 0;
-       flow->ip_tos = 0;
         flow->packet_count = 0;
         flow->byte_count = 0;
  }
@@ -1300,7 +1366,9 @@ static int do_execute(struct datapath *dp, const struct odp_execute *execute)
         else
                 skb->protocol = htons(ETH_P_802_2);
  
-       flow_extract(skb, execute->in_port, &key);
+       err = flow_extract(skb, execute->in_port, &key);
+       if (err)
+               goto error_free_skb;
  
         rcu_read_lock();
         err = execute_actions(dp, skb, &key, actions->actions,
@@ -1342,12 +1410,21 @@ static int get_dp_stats(struct datapath *dp, struct odp_stats __user *statsp)
         stats.max_groups = DP_MAX_GROUPS;
         stats.n_frags = stats.n_hit = stats.n_missed = stats.n_lost = 0;
         for_each_possible_cpu(i) {
-               const struct dp_stats_percpu *s;
-               s = per_cpu_ptr(dp->stats_percpu, i);
-               stats.n_frags += s->n_frags;
-               stats.n_hit += s->n_hit;
-               stats.n_missed += s->n_missed;
-               stats.n_lost += s->n_lost;
+               const struct dp_stats_percpu *percpu_stats;
+               struct dp_stats_percpu local_stats;
+               unsigned seqcount;
+
+               percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
+
+               do {
+                       seqcount = read_seqcount_begin(&percpu_stats->seqlock);
+                       local_stats = *percpu_stats;
+               } while (read_seqcount_retry(&percpu_stats->seqlock, seqcount));
+
+               stats.n_frags += local_stats.n_frags;
+               stats.n_hit += local_stats.n_hit;
+               stats.n_missed += local_stats.n_missed;
+               stats.n_lost += local_stats.n_lost;
         }
         stats.max_miss_queue = DP_MAX_QUEUE_LEN;
         stats.max_action_queue = DP_MAX_QUEUE_LEN;
@@ -2207,7 +2284,7 @@ ssize_t openvswitch_read(struct file *f, char __user *buf, size_t nbytes,
         }
  success:
         copy_bytes = tot_copy_bytes = min_t(size_t, skb->len, nbytes);
-       
+
         retval = 0;
         if (skb->ip_summed == CHECKSUM_PARTIAL) {
                 if (copy_bytes == skb->len) {