static struct datapath *dps[ODP_MAX];
static DEFINE_MUTEX(dp_mutex);
-/* Number of milliseconds between runs of the maintenance thread. */
-#define MAINT_SLEEP_MSECS 1000
+/* We limit the number of times that we pass into dp_process_received_packet()
+ * to avoid blowing out the stack in the event that we have a loop. */
+struct loop_counter {
+ int count; /* Count. */
+ bool looping; /* Loop detected? */
+};
+
+#define DP_MAX_LOOPS 5
+
+/* We use a separate counter for each CPU for both interrupt and non-interrupt
+ * context in order to keep the limit deterministic for a given packet. */
+struct percpu_loop_counters {
+ struct loop_counter counters[2];
+};
+
+static DEFINE_PER_CPU(struct percpu_loop_counters, dp_loop_counters);
static int new_dp_port(struct datapath *, struct odp_port *, int port_no);
p->port_no = port_no;
p->dp = dp;
+ p->vport = vport;
atomic_set(&p->sflow_pool, 0);
err = vport_attach(vport, p);
return err;
}
+static void suppress_loop(struct datapath *dp, struct sw_flow_actions *actions)
+{
+ if (net_ratelimit())
+ printk(KERN_WARNING "%s: flow looped %d times, dropping\n",
+ dp_name(dp), DP_MAX_LOOPS);
+ actions->n_actions = 0;
+}
+
/* Must be called with rcu_read_lock. */
void dp_process_received_packet(struct dp_port *p, struct sk_buff *skb)
{
int stats_counter_off;
struct odp_flow_key key;
struct tbl_node *flow_node;
-
- WARN_ON_ONCE(skb_shared(skb));
- skb_warn_if_lro(skb);
+ struct sw_flow *flow;
+ struct sw_flow_actions *acts;
+ struct loop_counter *loop;
+ int error;
OVS_CB(skb)->dp_port = p;
- if (flow_extract(skb, p ? p->port_no : ODPP_NONE, &key)) {
- if (dp->drop_frags) {
- kfree_skb(skb);
- stats_counter_off = offsetof(struct dp_stats_percpu, n_frags);
- goto out;
- }
+ /* Extract flow from 'skb' into 'key'. */
+ error = flow_extract(skb, p ? p->port_no : ODPP_NONE, &key);
+ if (unlikely(error)) {
+ kfree_skb(skb);
+ return;
+ }
+
+ if (OVS_CB(skb)->is_frag && dp->drop_frags) {
+ kfree_skb(skb);
+ stats_counter_off = offsetof(struct dp_stats_percpu, n_frags);
+ goto out;
}
+ /* Look up flow. */
flow_node = tbl_lookup(rcu_dereference(dp->table), &key, flow_hash(&key), flow_cmp);
- if (flow_node) {
- struct sw_flow *flow = flow_cast(flow_node);
- struct sw_flow_actions *acts = rcu_dereference(flow->sf_acts);
- flow_used(flow, skb);
- execute_actions(dp, skb, &key, acts->actions, acts->n_actions,
- GFP_ATOMIC);
- stats_counter_off = offsetof(struct dp_stats_percpu, n_hit);
- } else {
- stats_counter_off = offsetof(struct dp_stats_percpu, n_missed);
+ if (unlikely(!flow_node)) {
dp_output_control(dp, skb, _ODPL_MISS_NR, OVS_CB(skb)->tun_id);
+ stats_counter_off = offsetof(struct dp_stats_percpu, n_missed);
+ goto out;
+ }
+
+ flow = flow_cast(flow_node);
+ flow_used(flow, skb);
+
+ acts = rcu_dereference(flow->sf_acts);
+
+ /* Check whether we've looped too much. */
+ loop = &get_cpu_var(dp_loop_counters).counters[!!in_interrupt()];
+ if (unlikely(++loop->count > DP_MAX_LOOPS))
+ loop->looping = true;
+ if (unlikely(loop->looping)) {
+ suppress_loop(dp, acts);
+ goto out_loop;
}
+ /* Execute actions. */
+ execute_actions(dp, skb, &key, acts->actions, acts->n_actions, GFP_ATOMIC);
+ stats_counter_off = offsetof(struct dp_stats_percpu, n_hit);
+
+ /* Check whether sub-actions looped too much. */
+ if (unlikely(loop->looping))
+ suppress_loop(dp, acts);
+
+out_loop:
+ /* Decrement loop counter. */
+ if (!--loop->count)
+ loop->looping = false;
+ put_cpu_var(dp_loop_counters);
+
out:
+ /* Update datapath statistics. */
local_bh_disable();
stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
+
+ write_seqcount_begin(&stats->seqlock);
(*(u64 *)((u8 *)stats + stats_counter_off))++;
+ write_seqcount_end(&stats->seqlock);
+
local_bh_enable();
}
err:
local_bh_disable();
stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
+
+ write_seqcount_begin(&stats->seqlock);
stats->n_lost++;
+ write_seqcount_end(&stats->seqlock);
+
local_bh_enable();
return err;
return ERR_PTR(error);
}
-static void get_stats(struct sw_flow *flow, struct odp_flow_stats *stats)
+static struct timespec get_time_offset(void)
+{
+ struct timespec now_mono, now_jiffies;
+
+ ktime_get_ts(&now_mono);
+ jiffies_to_timespec(jiffies, &now_jiffies);
+ return timespec_sub(now_mono, now_jiffies);
+}
+
+static void get_stats(struct sw_flow *flow, struct odp_flow_stats *stats,
+ struct timespec time_offset)
{
- if (flow->used.tv_sec) {
- stats->used_sec = flow->used.tv_sec;
- stats->used_nsec = flow->used.tv_nsec;
+ if (flow->used) {
+ struct timespec flow_ts, used;
+
+ jiffies_to_timespec(flow->used, &flow_ts);
+ set_normalized_timespec(&used, flow_ts.tv_sec + time_offset.tv_sec,
+ flow_ts.tv_nsec + time_offset.tv_nsec);
+
+ stats->used_sec = used.tv_sec;
+ stats->used_nsec = used.tv_nsec;
} else {
stats->used_sec = 0;
stats->used_nsec = 0;
}
+
stats->n_packets = flow->packet_count;
stats->n_bytes = flow->byte_count;
- stats->ip_tos = flow->ip_tos;
+ stats->reserved = 0;
stats->tcp_flags = flow->tcp_flags;
stats->error = 0;
}
static void clear_stats(struct sw_flow *flow)
{
- flow->used.tv_sec = flow->used.tv_nsec = 0;
+ flow->used = 0;
flow->tcp_flags = 0;
- flow->ip_tos = 0;
flow->packet_count = 0;
flow->byte_count = 0;
}
/* Fetch stats, then clear them if necessary. */
spin_lock_bh(&flow->lock);
- get_stats(flow, stats);
+ get_stats(flow, stats, get_time_offset());
if (uf->flags & ODPPF_ZERO_STATS)
clear_stats(flow);
spin_unlock_bh(&flow->lock);
}
static int do_answer_query(struct sw_flow *flow, u32 query_flags,
+ struct timespec time_offset,
struct odp_flow_stats __user *ustats,
union odp_action __user *actions,
u32 __user *n_actionsp)
u32 n_actions;
spin_lock_bh(&flow->lock);
- get_stats(flow, &stats);
+ get_stats(flow, &stats, time_offset);
if (query_flags & ODPFF_ZERO_TCP_FLAGS)
flow->tcp_flags = 0;
}
static int answer_query(struct sw_flow *flow, u32 query_flags,
+ struct timespec time_offset,
struct odp_flow __user *ufp)
{
union odp_action *actions;
if (get_user(actions, &ufp->actions))
return -EFAULT;
- return do_answer_query(flow, query_flags,
+ return do_answer_query(flow, query_flags, time_offset,
&ufp->stats, actions, &ufp->n_actions);
}
if (IS_ERR(flow))
return PTR_ERR(flow);
- error = answer_query(flow, 0, ufp);
+ error = answer_query(flow, 0, get_time_offset(), ufp);
flow_deferred_free(flow);
return error;
}
static int do_query_flows(struct datapath *dp, const struct odp_flowvec *flowvec)
{
struct tbl *table = rcu_dereference(dp->table);
+ struct timespec time_offset;
u32 i;
+ time_offset = get_time_offset();
+
for (i = 0; i < flowvec->n_flows; i++) {
struct odp_flow __user *ufp = &flowvec->flows[i];
struct odp_flow uf;
if (!flow_node)
error = put_user(ENOENT, &ufp->stats.error);
else
- error = answer_query(flow_cast(flow_node), uf.flags, ufp);
+ error = answer_query(flow_cast(flow_node), uf.flags, time_offset, ufp);
if (error)
return -EFAULT;
}
struct odp_flow __user *uflows;
u32 n_flows;
u32 listed_flows;
+ struct timespec time_offset;
};
static int list_flow(struct tbl_node *node, void *cbdata_)
if (copy_to_user(&ufp->key, &flow->key, sizeof flow->key))
return -EFAULT;
- error = answer_query(flow, 0, ufp);
+ error = answer_query(flow, 0, cbdata->time_offset, ufp);
if (error)
return error;
cbdata.uflows = flowvec->flows;
cbdata.n_flows = flowvec->n_flows;
cbdata.listed_flows = 0;
+ cbdata.time_offset = get_time_offset();
+
error = tbl_foreach(rcu_dereference(dp->table), list_flow, &cbdata);
return error ? error : cbdata.listed_flows;
}
else
skb->protocol = htons(ETH_P_802_2);
- flow_extract(skb, execute->in_port, &key);
+ err = flow_extract(skb, execute->in_port, &key);
+ if (err)
+ goto error_free_skb;
rcu_read_lock();
err = execute_actions(dp, skb, &key, actions->actions,
stats.max_groups = DP_MAX_GROUPS;
stats.n_frags = stats.n_hit = stats.n_missed = stats.n_lost = 0;
for_each_possible_cpu(i) {
- const struct dp_stats_percpu *s;
- s = per_cpu_ptr(dp->stats_percpu, i);
- stats.n_frags += s->n_frags;
- stats.n_hit += s->n_hit;
- stats.n_missed += s->n_missed;
- stats.n_lost += s->n_lost;
+ const struct dp_stats_percpu *percpu_stats;
+ struct dp_stats_percpu local_stats;
+ unsigned seqcount;
+
+ percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
+
+ do {
+ seqcount = read_seqcount_begin(&percpu_stats->seqlock);
+ local_stats = *percpu_stats;
+ } while (read_seqcount_retry(&percpu_stats->seqlock, seqcount));
+
+ stats.n_frags += local_stats.n_frags;
+ stats.n_hit += local_stats.n_hit;
+ stats.n_missed += local_stats.n_missed;
+ stats.n_lost += local_stats.n_lost;
}
stats.max_miss_queue = DP_MAX_QUEUE_LEN;
stats.max_action_queue = DP_MAX_QUEUE_LEN;
}
static int compat_answer_query(struct sw_flow *flow, u32 query_flags,
+ struct timespec time_offset,
struct compat_odp_flow __user *ufp)
{
compat_uptr_t actions;
if (get_user(actions, &ufp->actions))
return -EFAULT;
- return do_answer_query(flow, query_flags, &ufp->stats,
+ return do_answer_query(flow, query_flags, time_offset, &ufp->stats,
compat_ptr(actions), &ufp->n_actions);
}
if (IS_ERR(flow))
return PTR_ERR(flow);
- error = compat_answer_query(flow, 0, ufp);
+ error = compat_answer_query(flow, 0, get_time_offset(), ufp);
flow_deferred_free(flow);
return error;
}
static int compat_query_flows(struct datapath *dp, struct compat_odp_flow *flows, u32 n_flows)
{
struct tbl *table = rcu_dereference(dp->table);
+ struct timespec time_offset;
u32 i;
+ time_offset = get_time_offset();
+
for (i = 0; i < n_flows; i++) {
struct compat_odp_flow __user *ufp = &flows[i];
struct odp_flow uf;
if (!flow_node)
error = put_user(ENOENT, &ufp->stats.error);
else
- error = compat_answer_query(flow_cast(flow_node), uf.flags, ufp);
+ error = compat_answer_query(flow_cast(flow_node), uf.flags, time_offset, ufp);
if (error)
return -EFAULT;
}
struct compat_odp_flow __user *uflows;
u32 n_flows;
u32 listed_flows;
+ struct timespec time_offset;
};
static int compat_list_flow(struct tbl_node *node, void *cbdata_)
if (copy_to_user(&ufp->key, &flow->key, sizeof flow->key))
return -EFAULT;
- error = compat_answer_query(flow, 0, ufp);
+ error = compat_answer_query(flow, 0, cbdata->time_offset, ufp);
if (error)
return error;
cbdata.uflows = flows;
cbdata.n_flows = n_flows;
cbdata.listed_flows = 0;
+ cbdata.time_offset = get_time_offset();
+
error = tbl_foreach(rcu_dereference(dp->table), compat_list_flow, &cbdata);
return error ? error : cbdata.listed_flows;
}