Setting tag sliver-openvswitch-2.2.90-1
[sliver-openvswitch.git] / datapath / flow.c
index 8be3801..c52081b 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007-2013 Nicira, Inc.
+ * Copyright (c) 2007-2014 Nicira, Inc.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of version 2 of the GNU General Public
@@ -62,112 +62,112 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies)
 
 #define TCP_FLAGS_BE16(tp) (*(__be16 *)&tcp_flag_word(tp) & htons(0x0FFF))
 
-void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb)
+void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
+                          struct sk_buff *skb)
 {
        struct flow_stats *stats;
-       __be16 tcp_flags = 0;
-
-       if (!flow->stats.is_percpu)
-               stats = flow->stats.stat;
-       else
-               stats = this_cpu_ptr(flow->stats.cpu_stats);
-
-       if ((flow->key.eth.type == htons(ETH_P_IP) ||
-            flow->key.eth.type == htons(ETH_P_IPV6)) &&
-           flow->key.ip.proto == IPPROTO_TCP &&
-           likely(skb->len >= skb_transport_offset(skb) + sizeof(struct tcphdr))) {
-               tcp_flags = TCP_FLAGS_BE16(tcp_hdr(skb));
+       int node = numa_node_id();
+
+       stats = rcu_dereference(flow->stats[node]);
+
+       /* Check if already have node-specific stats. */
+       if (likely(stats)) {
+               spin_lock(&stats->lock);
+               /* Mark if we write on the pre-allocated stats. */
+               if (node == 0 && unlikely(flow->stats_last_writer != node))
+                       flow->stats_last_writer = node;
+       } else {
+               stats = rcu_dereference(flow->stats[0]); /* Pre-allocated. */
+               spin_lock(&stats->lock);
+
+               /* If the current NUMA-node is the only writer on the
+                * pre-allocated stats keep using them.
+                */
+               if (unlikely(flow->stats_last_writer != node)) {
+                       /* A previous locker may have already allocated the
+                        * stats, so we need to check again.  If node-specific
+                        * stats were already allocated, we update the pre-
+                        * allocated stats as we have already locked them.
+                        */
+                       if (likely(flow->stats_last_writer != NUMA_NO_NODE)
+                           && likely(!rcu_dereference(flow->stats[node]))) {
+                               /* Try to allocate node-specific stats. */
+                               struct flow_stats *new_stats;
+
+                               new_stats =
+                                       kmem_cache_alloc_node(flow_stats_cache,
+                                                             GFP_THISNODE |
+                                                             __GFP_NOMEMALLOC,
+                                                             node);
+                               if (likely(new_stats)) {
+                                       new_stats->used = jiffies;
+                                       new_stats->packet_count = 1;
+                                       new_stats->byte_count = skb->len;
+                                       new_stats->tcp_flags = tcp_flags;
+                                       spin_lock_init(&new_stats->lock);
+
+                                       rcu_assign_pointer(flow->stats[node],
+                                                          new_stats);
+                                       goto unlock;
+                               }
+                       }
+                       flow->stats_last_writer = node;
+               }
        }
 
-       spin_lock(&stats->lock);
        stats->used = jiffies;
        stats->packet_count++;
        stats->byte_count += skb->len;
        stats->tcp_flags |= tcp_flags;
+unlock:
        spin_unlock(&stats->lock);
 }
 
-static void stats_read(struct flow_stats *stats, bool lock_bh,
-                      struct ovs_flow_stats *ovs_stats,
-                      unsigned long *used, __be16 *tcp_flags)
-{
-       if (lock_bh)
-               spin_lock_bh(&stats->lock);
-       else
-               spin_lock(&stats->lock);
-
-       if (time_after(stats->used, *used))
-               *used = stats->used;
-       *tcp_flags |= stats->tcp_flags;
-       ovs_stats->n_packets += stats->packet_count;
-       ovs_stats->n_bytes += stats->byte_count;
-
-       if (lock_bh)
-               spin_unlock_bh(&stats->lock);
-       else
-               spin_unlock(&stats->lock);
-}
-
-void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *ovs_stats,
+/* Must be called with rcu_read_lock or ovs_mutex. */
+void ovs_flow_stats_get(const struct sw_flow *flow,
+                       struct ovs_flow_stats *ovs_stats,
                        unsigned long *used, __be16 *tcp_flags)
 {
-       int cpu, cur_cpu;
+       int node;
 
        *used = 0;
        *tcp_flags = 0;
        memset(ovs_stats, 0, sizeof(*ovs_stats));
 
-       if (!flow->stats.is_percpu) {
-               stats_read(flow->stats.stat, true, ovs_stats, used, tcp_flags);
-       } else {
-               cur_cpu = get_cpu();
-
-               for_each_possible_cpu(cpu) {
-                       struct flow_stats *stats;
-                       bool lock_bh;
+       for_each_node(node) {
+               struct flow_stats *stats = rcu_dereference_ovsl(flow->stats[node]);
 
-                       stats = per_cpu_ptr(flow->stats.cpu_stats, cpu);
-                       lock_bh = (cpu == cur_cpu);
-                       stats_read(stats, lock_bh, ovs_stats, used, tcp_flags);
+               if (stats) {
+                       /* Local CPU may write on non-local stats, so we must
+                        * block bottom-halves here.
+                        */
+                       spin_lock_bh(&stats->lock);
+                       if (!*used || time_after(stats->used, *used))
+                               *used = stats->used;
+                       *tcp_flags |= stats->tcp_flags;
+                       ovs_stats->n_packets += stats->packet_count;
+                       ovs_stats->n_bytes += stats->byte_count;
+                       spin_unlock_bh(&stats->lock);
                }
-               put_cpu();
        }
 }
 
-static void stats_reset(struct flow_stats *stats, bool lock_bh)
-{
-       if (lock_bh)
-               spin_lock_bh(&stats->lock);
-       else
-               spin_lock(&stats->lock);
-
-       stats->used = 0;
-       stats->packet_count = 0;
-       stats->byte_count = 0;
-       stats->tcp_flags = 0;
-
-       if (lock_bh)
-               spin_unlock_bh(&stats->lock);
-       else
-               spin_unlock(&stats->lock);
-}
-
+/* Called with ovs_mutex. */
 void ovs_flow_stats_clear(struct sw_flow *flow)
 {
-       int cpu, cur_cpu;
-
-       if (!flow->stats.is_percpu) {
-               stats_reset(flow->stats.stat, true);
-       } else {
-               cur_cpu = get_cpu();
-
-               for_each_possible_cpu(cpu) {
-                       bool lock_bh;
-
-                       lock_bh = (cpu == cur_cpu);
-                       stats_reset(per_cpu_ptr(flow->stats.cpu_stats, cpu), lock_bh);
+       int node;
+
+       for_each_node(node) {
+               struct flow_stats *stats = ovsl_dereference(flow->stats[node]);
+
+               if (stats) {
+                       spin_lock_bh(&stats->lock);
+                       stats->used = 0;
+                       stats->packet_count = 0;
+                       stats->byte_count = 0;
+                       stats->tcp_flags = 0;
+                       spin_unlock_bh(&stats->lock);
                }
-               put_cpu();
        }
 }
 
@@ -355,8 +355,8 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
        /* The ICMPv6 type and code fields use the 16-bit transport port
         * fields, so we need to store them in 16-bit network byte order.
         */
-       key->ipv6.tp.src = htons(icmp->icmp6_type);
-       key->ipv6.tp.dst = htons(icmp->icmp6_code);
+       key->tp.src = htons(icmp->icmp6_type);
+       key->tp.dst = htons(icmp->icmp6_code);
 
        if (icmp->icmp6_code == 0 &&
            (icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION ||
@@ -395,14 +395,14 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
                            && opt_len == 8) {
                                if (unlikely(!is_zero_ether_addr(key->ipv6.nd.sll)))
                                        goto invalid;
-                               memcpy(key->ipv6.nd.sll,
-                                   &nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN);
+                               ether_addr_copy(key->ipv6.nd.sll,
+                                   &nd->opt[offset+sizeof(*nd_opt)]);
                        } else if (nd_opt->nd_opt_type == ND_OPT_TARGET_LL_ADDR
                                   && opt_len == 8) {
                                if (unlikely(!is_zero_ether_addr(key->ipv6.nd.tll)))
                                        goto invalid;
-                               memcpy(key->ipv6.nd.tll,
-                                   &nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN);
+                               ether_addr_copy(key->ipv6.nd.tll,
+                                   &nd->opt[offset+sizeof(*nd_opt)]);
                        }
 
                        icmp_len -= opt_len;
@@ -462,8 +462,8 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
         * header in the linear data area.
         */
        eth = eth_hdr(skb);
-       memcpy(key->eth.src, eth->h_source, ETH_ALEN);
-       memcpy(key->eth.dst, eth->h_dest, ETH_ALEN);
+       ether_addr_copy(key->eth.src, eth->h_source);
+       ether_addr_copy(key->eth.dst, eth->h_dest);
 
        __skb_pull(skb, 2 * ETH_ALEN);
        /* We are going to push all headers that we pull, so no need to
@@ -517,21 +517,21 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
                if (key->ip.proto == IPPROTO_TCP) {
                        if (tcphdr_ok(skb)) {
                                struct tcphdr *tcp = tcp_hdr(skb);
-                               key->ipv4.tp.src = tcp->source;
-                               key->ipv4.tp.dst = tcp->dest;
-                               key->ipv4.tp.flags = TCP_FLAGS_BE16(tcp);
+                               key->tp.src = tcp->source;
+                               key->tp.dst = tcp->dest;
+                               key->tp.flags = TCP_FLAGS_BE16(tcp);
                        }
                } else if (key->ip.proto == IPPROTO_UDP) {
                        if (udphdr_ok(skb)) {
                                struct udphdr *udp = udp_hdr(skb);
-                               key->ipv4.tp.src = udp->source;
-                               key->ipv4.tp.dst = udp->dest;
+                               key->tp.src = udp->source;
+                               key->tp.dst = udp->dest;
                        }
                } else if (key->ip.proto == IPPROTO_SCTP) {
                        if (sctphdr_ok(skb)) {
                                struct sctphdr *sctp = sctp_hdr(skb);
-                               key->ipv4.tp.src = sctp->source;
-                               key->ipv4.tp.dst = sctp->dest;
+                               key->tp.src = sctp->source;
+                               key->tp.dst = sctp->dest;
                        }
                } else if (key->ip.proto == IPPROTO_ICMP) {
                        if (icmphdr_ok(skb)) {
@@ -539,8 +539,8 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
                                /* The ICMP type and code fields use the 16-bit
                                 * transport port fields, so we need to store
                                 * them in 16-bit network byte order. */
-                               key->ipv4.tp.src = htons(icmp->type);
-                               key->ipv4.tp.dst = htons(icmp->code);
+                               key->tp.src = htons(icmp->type);
+                               key->tp.dst = htons(icmp->code);
                        }
                }
 
@@ -560,8 +560,8 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
                                key->ip.proto = ntohs(arp->ar_op);
                        memcpy(&key->ipv4.addr.src, arp->ar_sip, sizeof(key->ipv4.addr.src));
                        memcpy(&key->ipv4.addr.dst, arp->ar_tip, sizeof(key->ipv4.addr.dst));
-                       memcpy(key->ipv4.arp.sha, arp->ar_sha, ETH_ALEN);
-                       memcpy(key->ipv4.arp.tha, arp->ar_tha, ETH_ALEN);
+                       ether_addr_copy(key->ipv4.arp.sha, arp->ar_sha);
+                       ether_addr_copy(key->ipv4.arp.tha, arp->ar_tha);
                }
        } else if (key->eth.type == htons(ETH_P_IPV6)) {
                int nh_len;             /* IPv6 Header + Extensions */
@@ -586,21 +586,21 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
                if (key->ip.proto == NEXTHDR_TCP) {
                        if (tcphdr_ok(skb)) {
                                struct tcphdr *tcp = tcp_hdr(skb);
-                               key->ipv6.tp.src = tcp->source;
-                               key->ipv6.tp.dst = tcp->dest;
-                               key->ipv6.tp.flags = TCP_FLAGS_BE16(tcp);
+                               key->tp.src = tcp->source;
+                               key->tp.dst = tcp->dest;
+                               key->tp.flags = TCP_FLAGS_BE16(tcp);
                        }
                } else if (key->ip.proto == NEXTHDR_UDP) {
                        if (udphdr_ok(skb)) {
                                struct udphdr *udp = udp_hdr(skb);
-                               key->ipv6.tp.src = udp->source;
-                               key->ipv6.tp.dst = udp->dest;
+                               key->tp.src = udp->source;
+                               key->tp.dst = udp->dest;
                        }
                } else if (key->ip.proto == NEXTHDR_SCTP) {
                        if (sctphdr_ok(skb)) {
                                struct sctphdr *sctp = sctp_hdr(skb);
-                               key->ipv6.tp.src = sctp->source;
-                               key->ipv6.tp.dst = sctp->dest;
+                               key->tp.src = sctp->source;
+                               key->tp.dst = sctp->dest;
                        }
                } else if (key->ip.proto == NEXTHDR_ICMP) {
                        if (icmp6hdr_ok(skb)) {