Merge branch 'master' of git://openvswitch.org/openvswitch

author Giuseppe Lettieri <g.lettieri@iet.unipi.it>

Thu, 8 Aug 2013 14:42:27 +0000 (16:42 +0200)

committer Giuseppe Lettieri <g.lettieri@iet.unipi.it>

Thu, 8 Aug 2013 14:42:27 +0000 (16:42 +0200)
author Giuseppe Lettieri <g.lettieri@iet.unipi.it>
Thu, 8 Aug 2013 14:42:27 +0000 (16:42 +0200)
committer Giuseppe Lettieri <g.lettieri@iet.unipi.it>
Thu, 8 Aug 2013 14:42:27 +0000 (16:42 +0200)
diff --git a/AUTHORS b/AUTHORS

index d11c7c5..fc665b3 100644 (file)
--- a/AUTHORS
+++ b/AUTHORS
@@ -67,6 +67,7 @@ Natasha Gude            natasha@nicira.com
  Neil McKee              neil.mckee@inmon.com
  Paraneetharan Chandrasekaran    paraneetharanc@gmail.com
  Paul Fazzone            pfazzone@nicira.com
+Paul Ingram             paul@nicira.com
  Pavithra Ramesh         paramesh@vmware.com
  Philippe Jung           phil.jung@free.fr
  Pravin B Shelar         pshelar@nicira.com
@@ -186,7 +187,6 @@ Nagi Reddy Jonnala      njonnala@Brocade.com
  Niklas Andersson        nandersson@nicira.com
  Padmanabhan Krishnan    kprad1@yahoo.com
  Pankaj Thakkar          thakkar@nicira.com
-Paul Ingram             paul@nicira.com
  Paulo Cravero           pcravero@as2594.net
  Peter Balland           peter@nicira.com
  Peter Phaal             peter.phaal@inmon.com
diff --git a/CodingStyle b/CodingStyle

index 22f0f45..55b37a1 100644 (file)
--- a/CodingStyle
+++ b/CodingStyle
@@ -518,7 +518,8 @@ older compilers:
  
    As a matter of style, avoid // comments.
  
-  Avoid using GCC extensions unless you also add a fallback for
-non-GCC compilers.  You can, however, use GCC extensions and C99
-features in code that compiles only on GNU/Linux (such as
-lib/netdev-linux.c), because GCC is the system compiler there.
+  Avoid using GCC or Clang extensions unless you also add a fallback
+for other compilers.  You can, however, use C99 features or GCC
+extensions also supported by Clang in code that compiles only on
+GNU/Linux (such as lib/netdev-linux.c), because GCC is the system
+compiler there.
diff --git a/FAQ b/FAQ

index 6b5d8da..810803e 100644 (file)
--- a/FAQ
+++ b/FAQ
@@ -148,7 +148,7 @@ A: The following table lists the Linux kernel versions against which the
         1.9.x      2.6.18 to 3.8
         1.10.x     2.6.18 to 3.8
         1.11.x     2.6.18 to 3.8
-       1.12.x     2.6.18 to 3.8
+       1.12.x     2.6.18 to 3.9
  
     Open vSwitch userspace should also work with the Linux kernel module
     built into Linux 3.3 and later.
diff --git a/INSTALL b/INSTALL

index a0eb266..4c54b59 100644 (file)
--- a/INSTALL
+++ b/INSTALL
@@ -18,8 +18,13 @@ you will need the following software:
  
      - GNU make.
  
-    - The GNU C compiler.  We generally test with version 4.1, 4.2, or
-      4.3.
+    - A C compiler, such as:
+
+        * GCC 4.x.
+
+        * Clang.  Clang 3.4 and later provide useful static semantic
+          analysis and thread-safety checks.  For Ubuntu, there are
+          nightly built packages available on clang's website.
  
      - libssl, from OpenSSL, is optional but recommended if you plan to
        connect the Open vSwitch to an OpenFlow controller.  libssl is
@@ -28,11 +33,6 @@ you will need the following software:
        libssl is installed, then Open vSwitch will automatically build
        with support for it.
  
-    - clang, from LLVM, is optional. It provides useful static semantic
-      analyzer and thread-safety check. clang version must be 3.4 or
-      later. For Ubuntu, there are nightly built packages available on
-      clang's website.
-
  To compile the kernel module on Linux, you must also install the
  following.  If you cannot build or install the kernel module, you may
  use the userspace-only implementation, at a cost in performance.  The
diff --git a/NEWS b/NEWS

index 3bf4421..f9953ab 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -19,6 +19,9 @@ v1.12.0 - xx xxx xxxx
        through database paths (e.g. Private key option with the database name
        should look like "--private-key=db:Open_vSwitch,SSL,private_key").
      - Added ovs-dev.py, a utility script helpful for Open vSwitch developers.
+    - Support for Linux kernels up to 3.9
+    - ovs-ofctl:
+      * New "ofp-parse" for printing OpenFlow messages read from a file.
  
  
  v1.11.0 - xx xxx xxxx
@@ -44,6 +47,7 @@ v1.11.0 - xx xxx xxxx
          in_port to some unused value, such as OFPP_NONE.)
      - ovs-dpctl:
        * New debugging commands "add-flow", "mod-flow", "del-flow".
+      * "dump-flows" now has a -m option to increase output verbosity.
      - In dpif-based bridges, cache action translations, which can improve
        flow set up performance by 80% with a complicated flow table.
      - New syslog format, prefixed with "ovs|", to be easier to filter.
diff --git a/OPENFLOW-1.1+ b/OPENFLOW-1.1+

index aea689e..329c5a2 100644 (file)
--- a/OPENFLOW-1.1+
+++ b/OPENFLOW-1.1+
@@ -55,32 +55,66 @@ The list of remaining work items for OpenFlow 1.1 is below.  It is
  probably incomplete.
  
      * Implement Write-Actions instruction.
+      [required for 1.1+]
  
      * The new in_phy_port field in OFPT_PACKET_IN needs some kind of
        implementation.  It has a sensible interpretation for tunnels
        but in general the physical port is not in the datapath for OVS
        so the value is not necessarily meaningful.  We might have to
        just fix it as the same as in_port.
+      [required for OF1.1; optional for OF1.2+]
  
-    * OFPT_TABLE_MOD stats.  This is new in OF1.1, so we need to
+    * OFPT_TABLE_MOD message.  This is new in OF1.1, so we need to
        implement it.  It should be implemented so that the default OVS
        behavior does not change.
+      [required for OF1.1 and OF1.2]
+
+    * Flow table stats (OFPST_TABLE).
+
+        * Reference count (active entries)
+          [implemented]
+          [required for OF1.1 and OF1.2]
+
+        * Packet Lookups
+          [required for OF1.1; optional for OF1.2]
+
+        * Packet Matches
+          [required for OF1.1; optional for OF1.2]
  
      * MPLS.  Simon Horman maintains a patch series that adds this
        feature.  This is partially merged.
+      [optional for OF1.1+]
  
      * SCTP.  Joe Stringer maintains a patch series that adds this
        feature.  It has received review comments that need to be
        addressed before it is merged.
+      [optional for OF1.1+]
  
      * Match and set double-tagged VLANs (QinQ).  This requires kernel
        work for reasonable performance.
+      [optional for OF1.1+]
  
      * VLANs tagged with 88a8 Ethertype.  This requires kernel work for
        reasonable performance.
+      [required for OF1.1+]
  
      * Groups.
  
+        * Type all
+          [required for OF1.1+]
+
+        * Type select
+          [optional for OF1.1+]
+
+        * Type indirect
+          [required for OF1.1+]
+
+        * Type fast failover
+          [optional for OF1.1+]
+
+        * Statistics
+          [optional for OF1.1+]
+
  OpenFlow 1.2
  ------------
  
@@ -91,10 +125,13 @@ end of the OF1.2 spec.  I didn't compare the specs carefully yet.)
      * OFPT_FLOW_MOD:
  
          * New flag OFPFF_RESET_COUNTS.
+          [required for OF1.2+]
  
          * Add ability to delete flow in all tables.
+          [required for OF1.2+]
  
          * Update DESIGN to describe OF1.2 behavior also.
+          [required for OF1.2+]
  
  OpenFlow 1.3
  ------------
@@ -104,47 +141,63 @@ following additional work.  (This is based on the change log at the
  end of the OF1.3 spec, reusing most of the section titles directly.  I
  didn't compare the specs carefully yet.)
  
+    * Send errors for unsupported multipart requests.
+      [required for OF1.3+]
+
      * Add support for multipart requests.
+      [optional for OF1.3+]
  
      * Add OFPMP_TABLE_FEATURES statistics.
+      [optional for OF1.3+]
  
      * More flexible table miss support.
+      [required for OF1.3+]
  
      * IPv6 extension header handling support.  Fully implementing this
        requires kernel support.  This likely will take some careful and
        probably time-consuming design work.  The actual coding, once
        that is all done, is probably 2 or 3 days work.
+      [optional for OF1.3+]
  
      * Per-flow meters.  Similar to IPv6 extension headers in kernel
        and design requirements.  Might be politically difficult to add
        directly to the kernel module, since its functionality overlaps
        with tc.  Ideally, therefore, we could implement these somehow
        with tc, but I haven't investigated whether that makes sense.
+      [optional for OF1.3+]
  
      * Per-connection event filtering.  OF1.3 adopted Open vSwitch's
        existing design for this feature so implementation should be
        easy.
+      [required for OF1.3+]
  
-    * Auxiliary connections.  These are optional, so a minimal
-      implementation would not need them.  An implementation in
-      generic code might be a week's worth of work.  The value of an
-      implementation in generic code is questionable, though, since
-      much of the benefit of axuiliary connections is supposed to be
-      to take advantage of hardware support.  (We could make the
-      kernel module somehow send packets across the auxiliary
-      connections directly, for some kind of "hardware" support, if we
-      judged it useful enough.)
+    * Auxiliary connections.  An implementation in generic code might
+      be a week's worth of work.  The value of an implementation in
+      generic code is questionable, though, since much of the benefit
+      of axuiliary connections is supposed to be to take advantage of
+      hardware support.  (We could make the kernel module somehow
+      send packets across the auxiliary connections directly, for
+      some kind of "hardware" support, if we judged it useful enough.)
+      [optional for OF1.3+]
  
      * MPLS BoS matching.  (Included in Simon's MPLS series?)
+      [optional for OF1.3+]
  
      * Provider Backbone Bridge tagging.  I don't plan to implement
        this (but we'd accept an implementation).
+      [optional for OF1.3+]
  
      * Rework tag order.  I'm not sure whether we need to do anything
-      for this.
+      for this. Part of MPLS patchset by Simon Horman.
+      [required for v1.3+]
  
      * On-demand flow counters.  I think this might be a real
        optimization in some cases for the software switch.
+      [optional for OF1.3+]
+
+    * Duration Statistics
+      - New for Per Port, Per Queue, Per Group
+      [required for v1.3+]
  
  How to contribute
  -----------------
diff --git a/acinclude.m4 b/acinclude.m4

index 30a4dc6..6033bfa 100644 (file)
--- a/acinclude.m4
+++ b/acinclude.m4
@@ -527,3 +527,50 @@ AC_DEFUN([OVS_ENABLE_SPARSE],
       [if test $ovs_cv_gnu_make_if = yes; then
          CC='$(if $(C),REAL_CC="'"$CC"'" CHECK="$(SPARSE) -I $(top_srcdir)/include/sparse $(SPARSEFLAGS) $(SPARSE_EXTRA_INCLUDES) " cgcc $(CGCCFLAGS),'"$CC"')'
        fi])])
+
+dnl OVS_PTHREAD_SET_NAME
+dnl
+dnl This checks for three known variants of pthreads functions for setting
+dnl the name of the current thread:
+dnl
+dnl   glibc: int pthread_setname_np(pthread_t, const char *name);
+dnl   NetBSD: int pthread_setname_np(pthread_t, const char *format, void *arg);
+dnl   FreeBSD: int pthread_set_name_np(pthread_t, const char *name);
+dnl
+dnl For glibc and FreeBSD, the arguments are just a thread and its name.  For
+dnl NetBSD, 'format' is a printf() format string and 'arg' is an argument to
+dnl provide to it.
+dnl
+dnl This macro defines:
+dnl
+dnl    glibc: HAVE_GLIBC_PTHREAD_SETNAME_NP
+dnl    NetBSD: HAVE_NETBSD_PTHREAD_SETNAME_NP
+dnl    FreeBSD: HAVE_PTHREAD_SET_NAME_NP
+AC_DEFUN([OVS_CHECK_PTHREAD_SET_NAME],
+  [AC_CHECK_FUNCS([pthread_set_name_np])
+   if test $ac_cv_func_pthread_set_name_np != yes; then
+     AC_CACHE_CHECK(
+       [for pthread_setname_np() variant],
+       [ovs_cv_pthread_setname_np],
+       [AC_LINK_IFELSE(
+        [AC_LANG_PROGRAM([#include <pthread.h>
+  ], [pthread_setname_np(pthread_self(), "name");])],
+        [ovs_cv_pthread_setname_np=glibc],
+         [AC_LINK_IFELSE(
+          [AC_LANG_PROGRAM([#include <pthread.h>
+], [pthread_setname_np(pthread_self(), "%s", "name");])],
+           [ovs_cv_pthread_setname_np=netbsd],
+          [ovs_cv_pthread_setname_np=none])])])
+     case $ovs_cv_pthread_setname_np in # (
+       glibc)
+         AC_DEFINE(
+           [HAVE_GLIBC_PTHREAD_SETNAME_NP], [1],
+           [Define to 1 if pthread_setname_np() is available and takes 2 parameters (like glibc).])
+         ;; # (
+       netbsd)
+         AC_DEFINE(
+           [HAVE_NETBSD_PTHREAD_SETNAME_NP], [1],
+           [Define to 1 if pthread_setname_np() is available and takes 3 parameters (like NetBSD).])
+         ;;
+     esac
+   fi])
diff --git a/configure.ac b/configure.ac

index 89541aa..7988633 100644 (file)
--- a/configure.ac
+++ b/configure.ac
@@ -64,8 +64,7 @@ AC_CHECK_DECLS([sys_siglist], [], [], [[#include <signal.h>]])
  AC_CHECK_MEMBERS([struct stat.st_mtim.tv_nsec, struct stat.st_mtimensec],
    [], [], [[#include <sys/stat.h>]])
  AC_CHECK_MEMBERS([struct ifreq.ifr_flagshigh], [], [], [[#include <net/if.h>]])
-AC_CHECK_FUNCS([mlockall strnlen getloadavg statvfs getmntent_r \
-  pthread_setname_np pthread_set_name_np])
+AC_CHECK_FUNCS([mlockall strnlen getloadavg statvfs getmntent_r])
  AC_CHECK_HEADERS([mntent.h sys/statvfs.h linux/types.h linux/if_ether.h stdatomic.h])
  AC_CHECK_HEADERS([net/if_mib.h], [], [], [[#include <sys/types.h>
  #include <net/if.h>]])
@@ -89,6 +88,7 @@ OVS_CHECK_ATOMIC_ALWAYS_LOCK_FREE(2)
  OVS_CHECK_ATOMIC_ALWAYS_LOCK_FREE(4)
  OVS_CHECK_ATOMIC_ALWAYS_LOCK_FREE(8)
  OVS_CHECK_POSIX_AIO
+OVS_CHECK_PTHREAD_SET_NAME
  
  OVS_ENABLE_OPTION([-Wall])
  OVS_ENABLE_OPTION([-Wextra])
diff --git a/datapath/datapath.c b/datapath/datapath.c

index 4330ce3..e5e0616 100644 (file)
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -63,8 +63,8 @@
  #include "vport-netdev.h"
  
  #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) || \
-    LINUX_VERSION_CODE >= KERNEL_VERSION(3,9,0)
-#error Kernels before 2.6.18 or after 3.8 are not supported by this version of Open vSwitch.
+    LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0)
+#error Kernels before 2.6.18 or after 3.9 are not supported by this version of Open vSwitch.
  #endif
  
  #define REHASH_FLOW_INTERVAL (10 * 60 * HZ)
diff --git a/datapath/flow.c b/datapath/flow.c

index 62fdf85..e259e6d 100644 (file)
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -135,13 +135,10 @@ static bool ovs_match_validate(const struct sw_flow_match *match,
                         | (1ULL << OVS_KEY_ATTR_ARP)
                         | (1ULL << OVS_KEY_ATTR_ND));
  
-       if (match->key->phy.in_port == DP_MAX_PORTS &&
-           match->mask && (match->mask->key.phy.in_port == 0xffff))
-               mask_allowed |= (1ULL << OVS_KEY_ATTR_IN_PORT);
-
-       if (match->key->eth.type == htons(ETH_P_802_2) &&
-           match->mask && (match->mask->key.eth.type == htons(0xffff)))
-               mask_allowed |= (1ULL << OVS_KEY_ATTR_ETHERTYPE);
+       /* Always allowed mask fields. */
+       mask_allowed |= ((1ULL << OVS_KEY_ATTR_TUNNEL)
+                      | (1ULL << OVS_KEY_ATTR_IN_PORT)
+                      | (11ULL << OVS_KEY_ATTR_ETHERTYPE));
  
         /* Check key attributes. */
         if (match->key->eth.type == htons(ETH_P_ARP)
@@ -1251,14 +1248,16 @@ int ipv4_tun_from_nlattr(const struct nlattr *attr,
                 return -EINVAL;
         }
  
-       if (!match->key->tun_key.ipv4_dst) {
-               OVS_NLERR("IPv4 tunnel destination address is zero.\n");
-               return -EINVAL;
-       }
+       if (!is_mask) {
+               if (!match->key->tun_key.ipv4_dst) {
+                       OVS_NLERR("IPv4 tunnel destination address is zero.\n");
+                       return -EINVAL;
+               }
  
-       if (!ttl) {
-               OVS_NLERR("IPv4 tunnel TTL not specified.\n");
-               return -EINVAL;
+               if (!ttl) {
+                       OVS_NLERR("IPv4 tunnel TTL not specified.\n");
+                       return -EINVAL;
+               }
         }
  
         return 0;
@@ -1312,8 +1311,11 @@ static int metadata_from_nlattrs(struct sw_flow_match *match,  u64 *attrs,
         if (*attrs & (1ULL << OVS_KEY_ATTR_IN_PORT)) {
                 u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]);
  
-               if (!is_mask && in_port >= DP_MAX_PORTS)
+               if (is_mask)
+                       in_port = 0xffffffff; /* Always exact match in_port. */
+               else if (in_port >= DP_MAX_PORTS)
                         return -EINVAL;
+
                 SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask);
                 *attrs &= ~(1ULL << OVS_KEY_ATTR_IN_PORT);
         } else if (!is_mask) {
@@ -1383,7 +1385,10 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match,  u64 attrs,
                 __be16 eth_type;
  
                 eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
-               if (!is_mask && ntohs(eth_type) < ETH_P_802_3_MIN) {
+               if (is_mask) {
+                       /* Always exact match EtherType. */
+                       eth_type = htons(0xffff);
+               } else if (ntohs(eth_type) < ETH_P_802_3_MIN) {
                         OVS_NLERR("EtherType is less than mimimum (type=%x, min=%x).\n",
                                         ntohs(eth_type), ETH_P_802_3_MIN);
                         return -EINVAL;
@@ -1693,21 +1698,22 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey,
  {
         struct ovs_key_ethernet *eth_key;
         struct nlattr *nla, *encap;
+       bool is_mask = (swkey != output);
  
         if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
                 goto nla_put_failure;
  
-       if (swkey->tun_key.ipv4_dst &&
+       if ((swkey->tun_key.ipv4_dst || is_mask) &&
             ipv4_tun_to_nlattr(skb, &swkey->tun_key, &output->tun_key))
                 goto nla_put_failure;
  
         if (swkey->phy.in_port == DP_MAX_PORTS) {
-               if ((swkey != output) && (output->phy.in_port == 0xffff))
+               if (is_mask && (output->phy.in_port == 0xffff))
                         if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff))
                                 goto nla_put_failure;
         } else {
                 u16 upper_u16;
-               upper_u16 = (swkey == output) ? 0 : 0xffff;
+               upper_u16 = !is_mask ? 0 : 0xffff;
  
                 if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT,
                                 (upper_u16 << 16) | output->phy.in_port))
@@ -1727,7 +1733,7 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey,
  
         if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) {
                 __be16 eth_type;
-               eth_type = (swkey == output) ? htons(ETH_P_8021Q) : htons(0xffff) ;
+               eth_type = !is_mask ? htons(ETH_P_8021Q) : htons(0xffff);
                 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) ||
                     nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.tci))
                         goto nla_put_failure;
@@ -1744,7 +1750,7 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey,
                  * 0xffff in the mask attribute.  Ethertype can also
                  * be wildcarded.
                  */
-               if (swkey != output && output->eth.type)
+               if (is_mask && output->eth.type)
                         if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE,
                                                 output->eth.type))
                                 goto nla_put_failure;
diff --git a/datapath/linux/compat/gso.c b/datapath/linux/compat/gso.c

index 43418d3..30332a2 100644 (file)
--- a/datapath/linux/compat/gso.c
+++ b/datapath/linux/compat/gso.c
@@ -65,6 +65,7 @@ static struct sk_buff *tnl_skb_gso_segment(struct sk_buff *skb,
         struct sk_buff *skb1 = skb;
         struct sk_buff *segs;
         __be16 proto = skb->protocol;
+       char cb[sizeof(skb->cb)];
  
         /* setup whole inner packet to get protocol. */
         __skb_pull(skb, mac_offset);
@@ -76,6 +77,10 @@ static struct sk_buff *tnl_skb_gso_segment(struct sk_buff *skb,
         skb_reset_network_header(skb);
         skb_reset_transport_header(skb);
  
+       /* From 3.9 kernel skb->cb is used by skb gso. Therefore
+        * make copy of it to restore it back. */
+       memcpy(cb, skb->cb, sizeof(cb));
+
         segs = __skb_gso_segment(skb, 0, tx_path);
         if (!segs || IS_ERR(segs))
                 goto free;
@@ -89,6 +94,7 @@ static struct sk_buff *tnl_skb_gso_segment(struct sk_buff *skb,
                 skb->mac_len = 0;
  
                 memcpy(ip_hdr(skb), iph, pkt_hlen);
+               memcpy(skb->cb, cb, sizeof(cb));
                 if (OVS_GSO_CB(skb)->fix_segment)
                         OVS_GSO_CB(skb)->fix_segment(skb);
  
diff --git a/datapath/tunnel.c b/datapath/tunnel.c

index ef46a69..bd63da5 100644 (file)
--- a/datapath/tunnel.c
+++ b/datapath/tunnel.c
@@ -144,6 +144,9 @@ static struct sk_buff *handle_offloads(struct sk_buff *skb)
  
         if (skb_is_gso(skb)) {
                 struct sk_buff *nskb;
+               char cb[sizeof(skb->cb)];
+
+               memcpy(cb, skb->cb, sizeof(cb));
  
                 nskb = __skb_gso_segment(skb, 0, false);
                 if (IS_ERR(nskb)) {
@@ -153,6 +156,10 @@ static struct sk_buff *handle_offloads(struct sk_buff *skb)
  
                 consume_skb(skb);
                 skb = nskb;
+               while (nskb) {
+                       memcpy(nskb->cb, cb, sizeof(cb));
+                       nskb = nskb->next;
+               }
         } else if (get_ip_summed(skb) == OVS_CSUM_PARTIAL) {
                 /* Pages aren't locked and could change at any time.
                  * If this happens after we compute the checksum, the
diff --git a/debian/changelog b/debian/changelog

index 09b97f8..10068fa 100644 (file)
--- a/debian/changelog
+++ b/debian/changelog
@@ -23,6 +23,7 @@ openvswitch (1.12.0-1) unstable; urgency=low
        through database paths (e.g. Private key option with the database name
        should look like "--private-key=db:Open_vSwitch,SSL,private_key").
      - Added ovs-dev.py, a utility script helpful for Open vSwitch developers.
+    - Support for Linux kernels up to 3.9
  
   -- Open vSwitch team <dev@openvswitch.org>  Tue, 03 Jul 2013 15:02:34 -0700
  
diff --git a/lib/async-append-aio.c b/lib/async-append-aio.c

index 48edc38..23430a4 100644 (file)
--- a/lib/async-append-aio.c
+++ b/lib/async-append-aio.c
@@ -50,16 +50,6 @@ struct async_append {
      struct byteq byteq;
  };
  
-static bool async_append_enabled;
-
-void
-async_append_enable(void)
-{
-    assert_single_threaded();
-    forbid_forking("async i/o enabled");
-    async_append_enabled = true;
-}
-
  struct async_append *
  async_append_create(int fd)
  {
@@ -128,11 +118,6 @@ async_append_write(struct async_append *ap, const void *data_, size_t size)
  {
      const uint8_t *data = data_;
  
-    if (!async_append_enabled) {
-        ignore(write(ap->fd, data, size));
-        return;
-    }
-
      while (size > 0) {
          struct aiocb *aiocb;
          size_t chunk_size;
diff --git a/lib/async-append-sync.c b/lib/async-append-null.c

similarity index 63%

rename from lib/async-append-sync.c

rename to lib/async-append-null.c

index d40fdc8..3eef26e 100644 (file)
--- a/lib/async-append-sync.c
+++ b/lib/async-append-null.c
@@ -15,8 +15,8 @@
  
  #include <config.h>
  
-/* This implementation of the async-append.h interface uses ordinary
- * synchronous I/O, so it should be portable everywhere. */
+/* This is a null implementation of the asynchronous I/O interface for systems
+ * that don't have a form of asynchronous I/O. */
  
  #include "async-append.h"
  
@@ -25,38 +25,27 @@
  
  #include "util.h"
  
-struct async_append {
-    int fd;
-};
-
-void
-async_append_enable(void)
-{
-    /* Nothing to do. */
-}
-
  struct async_append *
-async_append_create(int fd)
+async_append_create(int fd OVS_UNUSED)
  {
-    struct async_append *ap = xmalloc(sizeof *ap);
-    ap->fd = fd;
-    return ap;
+    return NULL;
  }
  
  void
  async_append_destroy(struct async_append *ap)
  {
-    free(ap);
+    ovs_assert(ap == NULL);
  }
  
  void
-async_append_write(struct async_append *ap, const void *data, size_t size)
+async_append_write(struct async_append *ap OVS_UNUSED,
+                   const void *data OVS_UNUSED, size_t size OVS_UNUSED)
  {
-    ignore(write(ap->fd, data, size));
+    NOT_REACHED();
  }
  
  void
  async_append_flush(struct async_append *ap OVS_UNUSED)
  {
-    /* Nothing to do. */
+    NOT_REACHED();
  }
diff --git a/lib/async-append.h b/lib/async-append.h

index fb0ce52..0f7a4ae 100644 (file)
--- a/lib/async-append.h
+++ b/lib/async-append.h
@@ -32,24 +32,10 @@
   * Only a single thread may use a given 'struct async_append' at one time.
   */
  
-/* Enables using asynchronous I/O.  Some implementations may treat this as a
- * no-op.
- *
- * Before this function is called, the POSIX aio implementation uses ordinary
- * synchronous I/O because some POSIX aio libraries actually use threads
- * internally, which has enough cost and robustness implications that it's
- * better to use asynchronous I/O only when it has real expected benefits.
- *
- * Must be called while the process is still single-threaded.  May forbid the
- * process from subsequently forking. */
-void async_append_enable(void);
-
  /* Creates and returns a new asynchronous appender for file descriptor 'fd',
- * which the caller must have opened in append mode (O_APPEND).
- *
- * This function must always succeed.  If the system is for some reason unable
- * to support asynchronous I/O on 'fd' then the library must fall back to
- * syncrhonous I/O. */
+ * which the caller must have opened in append mode (O_APPEND).  If the system
+ * is for some reason unable to support asynchronous I/O on 'fd' this function
+ * may return NULL. */
  struct async_append *async_append_create(int fd);
  
  /* Destroys 'ap', without closing its underlying file descriptor. */
diff --git a/lib/automake.mk b/lib/automake.mk

index 22a9b65..cd50e91 100644 (file)
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -193,8 +193,6 @@ lib_libopenvswitch_a_SOURCES = \
         lib/svec.h \
         lib/table.c \
         lib/table.h \
-       lib/tag.c \
-       lib/tag.h \
         lib/timer.c \
         lib/timer.h \
         lib/timeval.c \
@@ -267,7 +265,7 @@ endif
  if HAVE_POSIX_AIO
  lib_libopenvswitch_a_SOURCES += lib/async-append-aio.c
  else
-lib_libopenvswitch_a_SOURCES += lib/async-append-sync.c
+lib_libopenvswitch_a_SOURCES += lib/async-append-null.c
  endif
  
  if ESX
diff --git a/lib/bfd.c b/lib/bfd.c

index 3ac257a..d1b8237 100644 (file)
--- a/lib/bfd.c
+++ b/lib/bfd.c
@@ -16,6 +16,8 @@
  #include "bfd.h"
  
  #include <arpa/inet.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
  
  #include "byte-order.h"
  #include "csum.h"
@@ -28,6 +30,7 @@
  #include "netlink.h"
  #include "odp-util.h"
  #include "ofpbuf.h"
+#include "ovs-thread.h"
  #include "openvswitch/types.h"
  #include "packets.h"
  #include "poll-loop.h"
@@ -55,9 +58,7 @@ VLOG_DEFINE_THIS_MODULE(bfd);
   *
   * - Unit tests.
   *
- * - BFD show into ovs-bugtool.
- *
- * - Set TOS/PCP on inner BFD frame, and outer tunnel header when encapped.
+ * - Set TOS/PCP on the outer tunnel header when encapped.
   *
   * - Sending BFD messages should be in its own thread/process.
   *
@@ -160,6 +161,9 @@ struct bfd {
  
      uint32_t rmt_disc;            /* bfd.RemoteDiscr. */
  
+    uint8_t eth_dst[ETH_ADDR_LEN];/* Ethernet destination address. */
+    bool eth_dst_set;             /* 'eth_dst' set through database. */
+
      uint16_t udp_src;             /* UDP source port. */
  
      /* All timers in milliseconds. */
@@ -177,54 +181,63 @@ struct bfd {
      long long int next_tx;        /* Next TX time. */
      long long int detect_time;    /* RFC 5880 6.8.4 Detection time. */
  
-    int ref_cnt;
      int forwarding_override;      /* Manual override of 'forwarding' status. */
-    bool check_tnl_key;           /* Verify tunnel key of inbound packets? */
+
+    atomic_bool check_tnl_key;    /* Verify tunnel key of inbound packets? */
+    atomic_int ref_cnt;
  };
  
-static bool bfd_in_poll(const struct bfd *);
-static void bfd_poll(struct bfd *bfd);
-static const char *bfd_diag_str(enum diag);
-static const char *bfd_state_str(enum state);
-static long long int bfd_min_tx(const struct bfd *);
-static long long int bfd_tx_interval(const struct bfd *);
-static long long int bfd_rx_interval(const struct bfd *);
-static void bfd_set_next_tx(struct bfd *);
-static void bfd_set_state(struct bfd *, enum state, enum diag);
-static uint32_t generate_discriminator(void);
-static void bfd_put_details(struct ds *, const struct bfd *);
+static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER;
+static struct hmap all_bfds__ = HMAP_INITIALIZER(&all_bfds__);
+static struct hmap *const all_bfds OVS_GUARDED_BY(mutex) = &all_bfds__;
+
+static bool bfd_forwarding__(const struct bfd *) OVS_REQ_WRLOCK(mutex);
+static bool bfd_in_poll(const struct bfd *) OVS_REQ_WRLOCK(&mutex);
+static void bfd_poll(struct bfd *bfd) OVS_REQ_WRLOCK(&mutex);
+static const char *bfd_diag_str(enum diag) OVS_REQ_WRLOCK(&mutex);
+static const char *bfd_state_str(enum state) OVS_REQ_WRLOCK(&mutex);
+static long long int bfd_min_tx(const struct bfd *) OVS_REQ_WRLOCK(&mutex);
+static long long int bfd_tx_interval(const struct bfd *)
+    OVS_REQ_WRLOCK(&mutex);
+static long long int bfd_rx_interval(const struct bfd *)
+    OVS_REQ_WRLOCK(&mutex);
+static void bfd_set_next_tx(struct bfd *) OVS_REQ_WRLOCK(&mutex);
+static void bfd_set_state(struct bfd *, enum state, enum diag)
+    OVS_REQ_WRLOCK(&mutex);
+static uint32_t generate_discriminator(void) OVS_REQ_WRLOCK(&mutex);
+static void bfd_put_details(struct ds *, const struct bfd *)
+    OVS_REQ_WRLOCK(&mutex);
  static void bfd_unixctl_show(struct unixctl_conn *, int argc,
                               const char *argv[], void *aux OVS_UNUSED);
  static void bfd_unixctl_set_forwarding_override(struct unixctl_conn *,
                                                  int argc, const char *argv[],
                                                  void *aux OVS_UNUSED);
  static void log_msg(enum vlog_level, const struct msg *, const char *message,
-                    const struct bfd *);
+                    const struct bfd *) OVS_REQ_WRLOCK(&mutex);
  
  static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(20, 20);
-static struct hmap all_bfds = HMAP_INITIALIZER(&all_bfds);
  
  /* Returns true if the interface on which 'bfd' is running may be used to
   * forward traffic according to the BFD session state. */
  bool
-bfd_forwarding(const struct bfd *bfd)
+bfd_forwarding(const struct bfd *bfd) OVS_EXCLUDED(mutex)
  {
-    if (bfd->forwarding_override != -1) {
-        return bfd->forwarding_override == 1;
-    }
+    bool ret;
  
-    return bfd->state == STATE_UP
-        && bfd->rmt_diag != DIAG_PATH_DOWN
-        && bfd->rmt_diag != DIAG_CPATH_DOWN
-        && bfd->rmt_diag != DIAG_RCPATH_DOWN;
+    ovs_mutex_lock(&mutex);
+    ret = bfd_forwarding__(bfd);
+    ovs_mutex_unlock(&mutex);
+    return ret;
  }
  
  /* Returns a 'smap' of key value pairs representing the status of 'bfd'
   * intended for the OVS database. */
  void
  bfd_get_status(const struct bfd *bfd, struct smap *smap)
+    OVS_EXCLUDED(mutex)
  {
-    smap_add(smap, "forwarding", bfd_forwarding(bfd) ? "true" : "false");
+    ovs_mutex_lock(&mutex);
+    smap_add(smap, "forwarding", bfd_forwarding__(bfd)? "true" : "false");
      smap_add(smap, "state", bfd_state_str(bfd->state));
      smap_add(smap, "diagnostic", bfd_diag_str(bfd->diag));
  
@@ -232,6 +245,7 @@ bfd_get_status(const struct bfd *bfd, struct smap *smap)
          smap_add(smap, "remote_state", bfd_state_str(bfd->rmt_state));
          smap_add(smap, "remote_diagnostic", bfd_diag_str(bfd->rmt_diag));
      }
+    ovs_mutex_unlock(&mutex);
  }
  
  /* Initializes, destroys, or reconfigures the BFD session 'bfd' (named 'name'),
@@ -240,22 +254,24 @@ bfd_get_status(const struct bfd *bfd, struct smap *smap)
   * handle for the session, or NULL if BFD is not enabled according to 'cfg'.
   * Also returns NULL if cfg is NULL. */
  struct bfd *
-bfd_configure(struct bfd *bfd, const char *name,
-              const struct smap *cfg)
+bfd_configure(struct bfd *bfd, const char *name, const struct smap *cfg)
+    OVS_EXCLUDED(mutex)
  {
-    static uint16_t udp_src = 0;
-    static bool init = false;
+    static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
+    static atomic_uint16_t udp_src = ATOMIC_VAR_INIT(0);
  
      long long int min_tx, min_rx;
      bool cpath_down;
+    const char *hwaddr;
+    uint8_t ea[ETH_ADDR_LEN];
  
-    if (!init) {
+    if (ovsthread_once_start(&once)) {
          unixctl_command_register("bfd/show", "[interface]", 0, 1,
                                   bfd_unixctl_show, NULL);
          unixctl_command_register("bfd/set-forwarding",
                                   "[interface] normal|false|true", 1, 2,
                                   bfd_unixctl_set_forwarding_override, NULL);
-        init = true;
+        ovsthread_once_done(&once);
      }
  
      if (!cfg || !smap_get_bool(cfg, "enable", false)) {
@@ -263,29 +279,34 @@ bfd_configure(struct bfd *bfd, const char *name,
          return NULL;
      }
  
+    ovs_mutex_lock(&mutex);
      if (!bfd) {
          bfd = xzalloc(sizeof *bfd);
          bfd->name = xstrdup(name);
          bfd->forwarding_override = -1;
          bfd->disc = generate_discriminator();
-        hmap_insert(&all_bfds, &bfd->node, bfd->disc);
+        hmap_insert(all_bfds, &bfd->node, bfd->disc);
  
          bfd->diag = DIAG_NONE;
          bfd->min_tx = 1000;
          bfd->mult = 3;
-        bfd->ref_cnt = 1;
+        atomic_init(&bfd->ref_cnt, 1);
  
          /* RFC 5881 section 4
           * The source port MUST be in the range 49152 through 65535.  The same
           * UDP source port number MUST be used for all BFD Control packets
           * associated with a particular session.  The source port number SHOULD
           * be unique among all BFD sessions on the system. */
-        bfd->udp_src = (udp_src++ % 16384) + 49152;
+        atomic_add(&udp_src, 1, &bfd->udp_src);
+        bfd->udp_src = (bfd->udp_src % 16384) + 49152;
  
          bfd_set_state(bfd, STATE_DOWN, DIAG_NONE);
+
+        memcpy(bfd->eth_dst, eth_addr_bfd, ETH_ADDR_LEN);
      }
  
-    bfd->check_tnl_key = smap_get_bool(cfg, "check_tnl_key", false);
+    atomic_store(&bfd->check_tnl_key,
+                 smap_get_bool(cfg, "check_tnl_key", false));
      min_tx = smap_get_int(cfg, "min_tx", 100);
      min_tx = MAX(min_tx, 100);
      if (bfd->cfg_min_tx != min_tx) {
@@ -316,6 +337,17 @@ bfd_configure(struct bfd *bfd, const char *name,
          }
          bfd_poll(bfd);
      }
+
+    hwaddr = smap_get(cfg, "bfd_dst_mac");
+    if (hwaddr && eth_addr_from_string(hwaddr, ea) && !eth_addr_is_zero(ea)) {
+        memcpy(bfd->eth_dst, ea, ETH_ADDR_LEN);
+        bfd->eth_dst_set = true;
+    } else if (bfd->eth_dst_set) {
+        memcpy(bfd->eth_dst, eth_addr_bfd, ETH_ADDR_LEN);
+        bfd->eth_dst_set = false;
+    }
+
+    ovs_mutex_unlock(&mutex);
      return bfd;
  }
  
@@ -324,28 +356,35 @@ bfd_ref(const struct bfd *bfd_)
  {
      struct bfd *bfd = CONST_CAST(struct bfd *, bfd_);
      if (bfd) {
-        ovs_assert(bfd->ref_cnt > 0);
-        bfd->ref_cnt++;
+        int orig;
+        atomic_add(&bfd->ref_cnt, 1, &orig);
+        ovs_assert(orig > 0);
      }
      return bfd;
  }
  
  void
-bfd_unref(struct bfd *bfd)
+bfd_unref(struct bfd *bfd) OVS_EXCLUDED(mutex)
  {
      if (bfd) {
-        ovs_assert(bfd->ref_cnt > 0);
-        if (!--bfd->ref_cnt) {
-            hmap_remove(&all_bfds, &bfd->node);
+        int orig;
+
+        atomic_sub(&bfd->ref_cnt, 1, &orig);
+        ovs_assert(orig > 0);
+        if (orig == 1) {
+            ovs_mutex_lock(&mutex);
+            hmap_remove(all_bfds, &bfd->node);
              free(bfd->name);
              free(bfd);
+            ovs_mutex_unlock(&mutex);
          }
      }
  }
  
  void
-bfd_wait(const struct bfd *bfd)
+bfd_wait(const struct bfd *bfd) OVS_EXCLUDED(mutex)
  {
+    ovs_mutex_lock(&mutex);
      if (bfd->flags & FLAG_FINAL) {
          poll_immediate_wake();
      }
@@ -354,11 +393,13 @@ bfd_wait(const struct bfd *bfd)
      if (bfd->state > STATE_DOWN) {
          poll_timer_wait_until(bfd->detect_time);
      }
+    ovs_mutex_unlock(&mutex);
  }
  
  void
-bfd_run(struct bfd *bfd)
+bfd_run(struct bfd *bfd) OVS_EXCLUDED(mutex)
  {
+    ovs_mutex_lock(&mutex);
      if (bfd->state > STATE_DOWN && time_msec() >= bfd->detect_time) {
          bfd_set_state(bfd, STATE_DOWN, DIAG_EXPIRED);
      }
@@ -366,17 +407,22 @@ bfd_run(struct bfd *bfd)
      if (bfd->min_tx != bfd->cfg_min_tx || bfd->min_rx != bfd->cfg_min_rx) {
          bfd_poll(bfd);
      }
+    ovs_mutex_unlock(&mutex);
  }
  
  bool
-bfd_should_send_packet(const struct bfd *bfd)
+bfd_should_send_packet(const struct bfd *bfd) OVS_EXCLUDED(mutex)
  {
-    return bfd->flags & FLAG_FINAL || time_msec() >= bfd->next_tx;
+    bool ret;
+    ovs_mutex_lock(&mutex);
+    ret = bfd->flags & FLAG_FINAL || time_msec() >= bfd->next_tx;
+    ovs_mutex_unlock(&mutex);
+    return ret;
  }
  
  void
  bfd_put_packet(struct bfd *bfd, struct ofpbuf *p,
-               uint8_t eth_src[ETH_ADDR_LEN])
+               uint8_t eth_src[ETH_ADDR_LEN]) OVS_EXCLUDED(mutex)
  {
      long long int min_tx, min_rx;
      struct udp_header *udp;
@@ -384,11 +430,12 @@ bfd_put_packet(struct bfd *bfd, struct ofpbuf *p,
      struct ip_header *ip;
      struct msg *msg;
  
+    ovs_mutex_lock(&mutex);
      if (bfd->next_tx) {
          long long int delay = time_msec() - bfd->next_tx;
          long long int interval = bfd_tx_interval(bfd);
          if (delay > interval * 3 / 2) {
-            VLOG_WARN("%s: long delay of %lldms (expected %lldms) sending BFD"
+            VLOG_INFO("%s: long delay of %lldms (expected %lldms) sending BFD"
                        " control message", bfd->name, delay, interval);
          }
      }
@@ -400,14 +447,15 @@ bfd_put_packet(struct bfd *bfd, struct ofpbuf *p,
  
      ofpbuf_reserve(p, 2); /* Properly align after the ethernet header. */
      eth = ofpbuf_put_uninit(p, sizeof *eth);
-    memcpy(eth->eth_dst, eth_addr_broadcast, ETH_ADDR_LEN);
      memcpy(eth->eth_src, eth_src, ETH_ADDR_LEN);
+    memcpy(eth->eth_dst, bfd->eth_dst, ETH_ADDR_LEN);
      eth->eth_type = htons(ETH_TYPE_IP);
  
      ip = ofpbuf_put_zeros(p, sizeof *ip);
      ip->ip_ihl_ver = IP_IHL_VER(5, 4);
      ip->ip_tot_len = htons(sizeof *ip + sizeof *udp + sizeof *msg);
-    ip->ip_ttl = 255;
+    ip->ip_ttl = MAXTTL;
+    ip->ip_tos = IPTOS_LOWDELAY | IPTOS_THROUGHPUT;
      ip->ip_proto = IPPROTO_UDP;
      ip->ip_src = htonl(0xA9FE0100); /* 169.254.1.0 Link Local. */
      ip->ip_dst = htonl(0xA9FE0101); /* 169.254.1.1 Link Local. */
@@ -445,26 +493,35 @@ bfd_put_packet(struct bfd *bfd, struct ofpbuf *p,
  
      bfd->last_tx = time_msec();
      bfd_set_next_tx(bfd);
+    ovs_mutex_unlock(&mutex);
  }
  
  bool
  bfd_should_process_flow(const struct bfd *bfd, const struct flow *flow,
                          struct flow_wildcards *wc)
  {
+    bool check_tnl_key;
+    memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst);
+    if (bfd->eth_dst_set && memcmp(bfd->eth_dst, flow->dl_dst, ETH_ADDR_LEN)) {
+        return false;
+    }
+
      memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
      memset(&wc->masks.tp_dst, 0xff, sizeof wc->masks.tp_dst);
-    if (bfd->check_tnl_key) {
+
+    atomic_read(&bfd->check_tnl_key, &check_tnl_key);
+    if (check_tnl_key) {
          memset(&wc->masks.tunnel.tun_id, 0xff, sizeof wc->masks.tunnel.tun_id);
      }
      return (flow->dl_type == htons(ETH_TYPE_IP)
              && flow->nw_proto == IPPROTO_UDP
-            && flow->tp_dst == htons(3784)
-            && (!bfd->check_tnl_key || flow->tunnel.tun_id == htonll(0)));
+            && flow->tp_dst == htons(BFD_DEST_PORT)
+            && (check_tnl_key || flow->tunnel.tun_id == htonll(0)));
  }
  
  void
  bfd_process_packet(struct bfd *bfd, const struct flow *flow,
-                   const struct ofpbuf *p)
+                   const struct ofpbuf *p) OVS_EXCLUDED(mutex)
  {
      uint32_t rmt_min_rx, pkt_your_disc;
      enum state rmt_state;
@@ -474,16 +531,17 @@ bfd_process_packet(struct bfd *bfd, const struct flow *flow,
  
      /* This function is designed to follow section RFC 5880 6.8.6 closely. */
  
+    ovs_mutex_lock(&mutex);
      if (flow->nw_ttl != 255) {
          /* XXX Should drop in the kernel to prevent DOS. */
-        return;
+        goto out;
      }
  
      msg = ofpbuf_at(p, (uint8_t *)p->l7 - (uint8_t *)p->data, BFD_PACKET_LEN);
      if (!msg) {
          VLOG_INFO_RL(&rl, "%s: Received unparseable BFD control message.",
                       bfd->name);
-        return;
+        goto out;
      }
  
      /* RFC 5880 Section 6.8.6
@@ -503,7 +561,7 @@ bfd_process_packet(struct bfd *bfd, const struct flow *flow,
  
      if (version != BFD_VERSION) {
          log_msg(VLL_WARN, msg, "Incorrect version", bfd);
-        return;
+        goto out;
      }
  
      /* Technically this should happen after the length check. We don't support
@@ -511,29 +569,29 @@ bfd_process_packet(struct bfd *bfd, const struct flow *flow,
      if (flags & FLAG_AUTH) {
          log_msg(VLL_WARN, msg, "Authenticated control message with"
                     " authentication disabled", bfd);
-        return;
+        goto out;
      }
  
      if (msg->length != BFD_PACKET_LEN) {
          log_msg(VLL_WARN, msg, "Unexpected length", bfd);
          if (msg->length < BFD_PACKET_LEN) {
-            return;
+            goto out;
          }
      }
  
      if (!msg->mult) {
          log_msg(VLL_WARN, msg, "Zero multiplier", bfd);
-        return;
+        goto out;
      }
  
      if (flags & FLAG_MULTIPOINT) {
          log_msg(VLL_WARN, msg, "Unsupported multipoint flag", bfd);
-        return;
+        goto out;
      }
  
      if (!msg->my_disc) {
          log_msg(VLL_WARN, msg, "NULL my_disc", bfd);
-        return;
+        goto out;
      }
  
      pkt_your_disc = ntohl(msg->your_disc);
@@ -545,11 +603,11 @@ bfd_process_packet(struct bfd *bfd, const struct flow *flow,
           * well, so in this respect, we are not compliant. */
         if (pkt_your_disc != bfd->disc) {
             log_msg(VLL_WARN, msg, "Incorrect your_disc", bfd);
-           return;
+           goto out;
         }
      } else if (rmt_state > STATE_DOWN) {
          log_msg(VLL_WARN, msg, "Null your_disc", bfd);
-        return;
+        goto out;
      }
  
      bfd->rmt_disc = ntohl(msg->my_disc);
@@ -586,7 +644,7 @@ bfd_process_packet(struct bfd *bfd, const struct flow *flow,
  
      if (bfd->state == STATE_ADMIN_DOWN) {
          VLOG_DBG_RL(&rl, "Administratively down, dropping control message.");
-        return;
+        goto out;
      }
  
      if (rmt_state == STATE_ADMIN_DOWN) {
@@ -619,17 +677,33 @@ bfd_process_packet(struct bfd *bfd, const struct flow *flow,
          }
      }
      /* XXX: RFC 5880 Section 6.8.6 Demand mode related calculations here. */
+
+out:
+    ovs_mutex_unlock(&mutex);
  }
  \f
+static bool
+bfd_forwarding__(const struct bfd *bfd) OVS_REQ_WRLOCK(mutex)
+{
+    if (bfd->forwarding_override != -1) {
+        return bfd->forwarding_override == 1;
+    }
+
+    return bfd->state == STATE_UP
+        && bfd->rmt_diag != DIAG_PATH_DOWN
+        && bfd->rmt_diag != DIAG_CPATH_DOWN
+        && bfd->rmt_diag != DIAG_RCPATH_DOWN;
+}
+
  /* Helpers. */
  static bool
-bfd_in_poll(const struct bfd *bfd)
+bfd_in_poll(const struct bfd *bfd) OVS_REQ_WRLOCK(mutex)
  {
      return (bfd->flags & FLAG_POLL) != 0;
  }
  
  static void
-bfd_poll(struct bfd *bfd)
+bfd_poll(struct bfd *bfd) OVS_REQ_WRLOCK(mutex)
  {
      if (bfd->state > STATE_DOWN && !bfd_in_poll(bfd)
          && !(bfd->flags & FLAG_FINAL)) {
@@ -642,7 +716,7 @@ bfd_poll(struct bfd *bfd)
  }
  
  static long long int
-bfd_min_tx(const struct bfd *bfd)
+bfd_min_tx(const struct bfd *bfd) OVS_REQ_WRLOCK(mutex)
  {
      /* RFC 5880 Section 6.8.3
       * When bfd.SessionState is not Up, the system MUST set
@@ -654,20 +728,20 @@ bfd_min_tx(const struct bfd *bfd)
  }
  
  static long long int
-bfd_tx_interval(const struct bfd *bfd)
+bfd_tx_interval(const struct bfd *bfd) OVS_REQ_WRLOCK(mutex)
  {
      long long int interval = bfd_min_tx(bfd);
      return MAX(interval, bfd->rmt_min_rx);
  }
  
  static long long int
-bfd_rx_interval(const struct bfd *bfd)
+bfd_rx_interval(const struct bfd *bfd) OVS_REQ_WRLOCK(mutex)
  {
      return MAX(bfd->min_rx, bfd->rmt_min_tx);
  }
  
  static void
-bfd_set_next_tx(struct bfd *bfd)
+bfd_set_next_tx(struct bfd *bfd) OVS_REQ_WRLOCK(mutex)
  {
      long long int interval = bfd_tx_interval(bfd);
      interval -= interval * random_range(26) / 100;
@@ -743,7 +817,7 @@ bfd_diag_str(enum diag diag) {
  
  static void
  log_msg(enum vlog_level level, const struct msg *p, const char *message,
-        const struct bfd *bfd)
+        const struct bfd *bfd) OVS_REQ_WRLOCK(mutex)
  {
      struct ds ds = DS_EMPTY_INITIALIZER;
  
@@ -775,6 +849,7 @@ log_msg(enum vlog_level level, const struct msg *p, const char *message,
  
  static void
  bfd_set_state(struct bfd *bfd, enum state state, enum diag diag)
+    OVS_REQ_WRLOCK(mutex)
  {
      if (diag == DIAG_NONE && bfd->cpath_down) {
          diag = DIAG_CPATH_DOWN;
@@ -824,7 +899,7 @@ generate_discriminator(void)
          /* 'disc' is by definition random, so there's no reason to waste time
           * hashing it. */
          disc = random_uint32();
-        HMAP_FOR_EACH_IN_BUCKET (bfd, node, disc, &all_bfds) {
+        HMAP_FOR_EACH_IN_BUCKET (bfd, node, disc, all_bfds) {
              if (bfd->disc == disc) {
                  disc = 0;
                  break;
@@ -836,11 +911,11 @@ generate_discriminator(void)
  }
  
  static struct bfd *
-bfd_find_by_name(const char *name)
+bfd_find_by_name(const char *name) OVS_REQ_WRLOCK(mutex)
  {
      struct bfd *bfd;
  
-    HMAP_FOR_EACH (bfd, node, &all_bfds) {
+    HMAP_FOR_EACH (bfd, node, all_bfds) {
          if (!strcmp(bfd->name, name)) {
              return bfd;
          }
@@ -849,10 +924,10 @@ bfd_find_by_name(const char *name)
  }
  
  static void
-bfd_put_details(struct ds *ds, const struct bfd *bfd)
+bfd_put_details(struct ds *ds, const struct bfd *bfd) OVS_REQ_WRLOCK(mutex)
  {
      ds_put_format(ds, "\tForwarding: %s\n",
-                  bfd_forwarding(bfd) ? "true" : "false");
+                  bfd_forwarding__(bfd) ? "true" : "false");
      ds_put_format(ds, "\tDetect Multiplier: %d\n", bfd->mult);
      ds_put_format(ds, "\tConcatenated Path Down: %s\n",
                    bfd->cpath_down ? "true" : "false");
@@ -892,37 +967,43 @@ bfd_put_details(struct ds *ds, const struct bfd *bfd)
  
  static void
  bfd_unixctl_show(struct unixctl_conn *conn, int argc, const char *argv[],
-                 void *aux OVS_UNUSED)
+                 void *aux OVS_UNUSED) OVS_EXCLUDED(mutex)
  {
      struct ds ds = DS_EMPTY_INITIALIZER;
      struct bfd *bfd;
  
+    ovs_mutex_lock(&mutex);
      if (argc > 1) {
          bfd = bfd_find_by_name(argv[1]);
          if (!bfd) {
              unixctl_command_reply_error(conn, "no such bfd object");
-            return;
+            goto out;
          }
          bfd_put_details(&ds, bfd);
      } else {
-        HMAP_FOR_EACH (bfd, node, &all_bfds) {
+        HMAP_FOR_EACH (bfd, node, all_bfds) {
              ds_put_format(&ds, "---- %s ----\n", bfd->name);
              bfd_put_details(&ds, bfd);
          }
      }
      unixctl_command_reply(conn, ds_cstr(&ds));
      ds_destroy(&ds);
+
+out:
+    ovs_mutex_unlock(&mutex);
  }
  
  
  static void
  bfd_unixctl_set_forwarding_override(struct unixctl_conn *conn, int argc,
                                      const char *argv[], void *aux OVS_UNUSED)
+    OVS_EXCLUDED(mutex)
  {
      const char *forward_str = argv[argc - 1];
      int forwarding_override;
      struct bfd *bfd;
  
+    ovs_mutex_lock(&mutex);
      if (!strcasecmp("true", forward_str)) {
          forwarding_override = 1;
      } else if (!strcasecmp("false", forward_str)) {
@@ -931,21 +1012,24 @@ bfd_unixctl_set_forwarding_override(struct unixctl_conn *conn, int argc,
          forwarding_override = -1;
      } else {
          unixctl_command_reply_error(conn, "unknown fault string");
-        return;
+        goto out;
      }
  
      if (argc > 2) {
          bfd = bfd_find_by_name(argv[1]);
          if (!bfd) {
              unixctl_command_reply_error(conn, "no such BFD object");
-            return;
+            goto out;
          }
          bfd->forwarding_override = forwarding_override;
      } else {
-        HMAP_FOR_EACH (bfd, node, &all_bfds) {
+        HMAP_FOR_EACH (bfd, node, all_bfds) {
              bfd->forwarding_override = forwarding_override;
          }
      }
  
      unixctl_command_reply(conn, "OK");
+
+out:
+    ovs_mutex_unlock(&mutex);
  }
diff --git a/lib/bond.c b/lib/bond.c

index b3ae0c4..06dd362 100644 (file)
--- a/lib/bond.c
+++ b/lib/bond.c
@@ -35,7 +35,6 @@
  #include "packets.h"
  #include "poll-loop.h"
  #include "shash.h"
-#include "tag.h"
  #include "timeval.h"
  #include "unixctl.h"
  #include "vlog.h"
@@ -51,7 +50,6 @@ VLOG_DEFINE_THIS_MODULE(bond);
  struct bond_entry {
      struct bond_slave *slave;   /* Assigned slave, NULL if unassigned. */
      uint64_t tx_bytes;          /* Count of bytes recently transmitted. */
-    tag_type tag;               /* Tag for entry<->facet association. */
      struct list list_node;      /* In bond_slave's 'entries' list. */
  };
  
@@ -69,7 +67,6 @@ struct bond_slave {
      long long delay_expires;    /* Time after which 'enabled' may change. */
      bool enabled;               /* May be chosen for flows? */
      bool may_enable;            /* Client considers this slave bondable. */
-    tag_type tag;               /* Tag associated with this slave. */
  
      /* Rebalancing info.  Used only by bond_rebalance(). */
      struct list bal_node;       /* In bond_rebalance()'s 'bals' list. */
@@ -89,7 +86,6 @@ struct bond {
      /* Bonding info. */
      enum bond_mode balance;     /* Balancing mode, one of BM_*. */
      struct bond_slave *active_slave;
-    tag_type no_slaves_tag;     /* Tag for flows when all slaves disabled. */
      int updelay, downdelay;     /* Delay before slave goes up/down, in ms. */
      enum lacp_status lacp_status; /* Status of LACP negotiations. */
      bool bond_revalidate;       /* True if flows need revalidation. */
@@ -104,35 +100,37 @@ struct bond {
      /* Legacy compatibility. */
      long long int next_fake_iface_update; /* LLONG_MAX if disabled. */
  
-    /* Tag set saved for next bond_run().  This tag set is a kluge for cases
-     * where we can't otherwise provide revalidation feedback to the client.
-     * That's only unixctl commands now; I hope no other cases will arise. */
-    struct tag_set unixctl_tags;
-
-    int ref_cnt;
+    atomic_int ref_cnt;
  };
  
-static struct hmap all_bonds = HMAP_INITIALIZER(&all_bonds);
-
-static void bond_entry_reset(struct bond *);
-static struct bond_slave *bond_slave_lookup(struct bond *, const void *slave_);
-static void bond_enable_slave(struct bond_slave *, bool enable,
-                              struct tag_set *);
-static void bond_link_status_update(struct bond_slave *, struct tag_set *);
-static void bond_choose_active_slave(struct bond *, struct tag_set *);
+static struct ovs_rwlock rwlock = OVS_RWLOCK_INITIALIZER;
+static struct hmap all_bonds__ = HMAP_INITIALIZER(&all_bonds__);
+static struct hmap *const all_bonds OVS_GUARDED_BY(rwlock) = &all_bonds__;
+
+static void bond_entry_reset(struct bond *) OVS_REQ_WRLOCK(rwlock);
+static struct bond_slave *bond_slave_lookup(struct bond *, const void *slave_)
+    OVS_REQ_RDLOCK(rwlock);
+static void bond_enable_slave(struct bond_slave *, bool enable)
+    OVS_REQ_WRLOCK(rwlock);
+static void bond_link_status_update(struct bond_slave *)
+    OVS_REQ_WRLOCK(rwlock);
+static void bond_choose_active_slave(struct bond *)
+    OVS_REQ_WRLOCK(rwlock);;
  static unsigned int bond_hash_src(const uint8_t mac[ETH_ADDR_LEN],
                                    uint16_t vlan, uint32_t basis);
  static unsigned int bond_hash_tcp(const struct flow *, uint16_t vlan,
                                    uint32_t basis);
  static struct bond_entry *lookup_bond_entry(const struct bond *,
                                              const struct flow *,
-                                            uint16_t vlan);
-static tag_type bond_get_active_slave_tag(const struct bond *);
+                                            uint16_t vlan)
+    OVS_REQ_RDLOCK(rwlock);
  static struct bond_slave *choose_output_slave(const struct bond *,
                                                const struct flow *,
                                                struct flow_wildcards *,
-                                              uint16_t vlan, tag_type *tags);
-static void bond_update_fake_slave_stats(struct bond *);
+                                              uint16_t vlan)
+    OVS_REQ_RDLOCK(rwlock);
+static void bond_update_fake_slave_stats(struct bond *)
+    OVS_REQ_RDLOCK(rwlock);
  
  /* Attempts to parse 's' as the name of a bond balancing mode.  If successful,
   * stores the mode in '*balance' and returns true.  Otherwise returns false
@@ -179,14 +177,10 @@ bond_create(const struct bond_settings *s)
  
      bond = xzalloc(sizeof *bond);
      hmap_init(&bond->slaves);
-    bond->no_slaves_tag = tag_create_random();
      bond->next_fake_iface_update = LLONG_MAX;
-    bond->ref_cnt = 1;
+    atomic_init(&bond->ref_cnt, 1);
  
      bond_reconfigure(bond, s);
-
-    tag_set_init(&bond->unixctl_tags);
-
      return bond;
  }
  
@@ -196,8 +190,9 @@ bond_ref(const struct bond *bond_)
      struct bond *bond = CONST_CAST(struct bond *, bond_);
  
      if (bond) {
-        ovs_assert(bond->ref_cnt > 0);
-        bond->ref_cnt++;
+        int orig;
+        atomic_add(&bond->ref_cnt, 1, &orig);
+        ovs_assert(orig > 0);
      }
      return bond;
  }
@@ -207,17 +202,21 @@ void
  bond_unref(struct bond *bond)
  {
      struct bond_slave *slave, *next_slave;
+    int orig;
  
      if (!bond) {
          return;
      }
  
-    ovs_assert(bond->ref_cnt > 0);
-    if (--bond->ref_cnt) {
+    atomic_sub(&bond->ref_cnt, 1, &orig);
+    ovs_assert(orig > 0);
+    if (orig != 1) {
          return;
      }
  
-    hmap_remove(&all_bonds, &bond->hmap_node);
+    ovs_rwlock_wrlock(&rwlock);
+    hmap_remove(all_bonds, &bond->hmap_node);
+    ovs_rwlock_unlock(&rwlock);
  
      HMAP_FOR_EACH_SAFE (slave, next_slave, hmap_node, &bond->slaves) {
          hmap_remove(&bond->slaves, &slave->hmap_node);
@@ -246,13 +245,14 @@ bond_reconfigure(struct bond *bond, const struct bond_settings *s)
  {
      bool revalidate = false;
  
+    ovs_rwlock_wrlock(&rwlock);
      if (!bond->name || strcmp(bond->name, s->name)) {
          if (bond->name) {
-            hmap_remove(&all_bonds, &bond->hmap_node);
+            hmap_remove(all_bonds, &bond->hmap_node);
              free(bond->name);
          }
          bond->name = xstrdup(s->name);
-        hmap_insert(&all_bonds, &bond->hmap_node, hash_string(bond->name, 0));
+        hmap_insert(all_bonds, &bond->hmap_node, hash_string(bond->name, 0));
      }
  
      bond->updelay = s->up_delay;
@@ -290,11 +290,13 @@ bond_reconfigure(struct bond *bond, const struct bond_settings *s)
          bond_entry_reset(bond);
      }
  
+    ovs_rwlock_unlock(&rwlock);
      return revalidate;
  }
  
  static void
  bond_slave_set_netdev__(struct bond_slave *slave, struct netdev *netdev)
+    OVS_REQ_WRLOCK(rwlock)
  {
      if (slave->netdev != netdev) {
          slave->netdev = netdev;
@@ -314,8 +316,10 @@ bond_slave_set_netdev__(struct bond_slave *slave, struct netdev *netdev)
  void
  bond_slave_register(struct bond *bond, void *slave_, struct netdev *netdev)
  {
-    struct bond_slave *slave = bond_slave_lookup(bond, slave_);
+    struct bond_slave *slave;
  
+    ovs_rwlock_wrlock(&rwlock);
+    slave = bond_slave_lookup(bond, slave_);
      if (!slave) {
          slave = xzalloc(sizeof *slave);
  
@@ -327,13 +331,14 @@ bond_slave_register(struct bond *bond, void *slave_, struct netdev *netdev)
          bond->bond_revalidate = true;
  
          slave->enabled = false;
-        bond_enable_slave(slave, netdev_get_carrier(netdev), NULL);
+        bond_enable_slave(slave, netdev_get_carrier(netdev));
      }
  
      bond_slave_set_netdev__(slave, netdev);
  
      free(slave->name);
      slave->name = xstrdup(netdev_get_name(netdev));
+    ovs_rwlock_unlock(&rwlock);
  }
  
  /* Updates the network device to be used with 'slave_' to 'netdev'.
@@ -344,10 +349,14 @@ bond_slave_register(struct bond *bond, void *slave_, struct netdev *netdev)
  void
  bond_slave_set_netdev(struct bond *bond, void *slave_, struct netdev *netdev)
  {
-    struct bond_slave *slave = bond_slave_lookup(bond, slave_);
+    struct bond_slave *slave;
+
+    ovs_rwlock_wrlock(&rwlock);
+    slave = bond_slave_lookup(bond, slave_);
      if (slave) {
          bond_slave_set_netdev__(slave, netdev);
      }
+    ovs_rwlock_unlock(&rwlock);
  }
  
  /* Unregisters 'slave_' from 'bond'.  If 'bond' does not contain such a slave
@@ -357,14 +366,17 @@ bond_slave_set_netdev(struct bond *bond, void *slave_, struct netdev *netdev)
  void
  bond_slave_unregister(struct bond *bond, const void *slave_)
  {
-    struct bond_slave *slave = bond_slave_lookup(bond, slave_);
+    struct bond_slave *slave;
      bool del_active;
  
+    ovs_rwlock_wrlock(&rwlock);
+    slave = bond_slave_lookup(bond, slave_);
      if (!slave) {
-        return;
+        goto out;
      }
  
-    bond_enable_slave(slave, false, NULL);
+    bond->bond_revalidate = true;
+    bond_enable_slave(slave, false);
  
      del_active = bond->active_slave == slave;
      if (bond->hash) {
@@ -383,12 +395,11 @@ bond_slave_unregister(struct bond *bond, const void *slave_)
      free(slave);
  
      if (del_active) {
-        struct tag_set tags;
-
-        tag_set_init(&tags);
-        bond_choose_active_slave(bond, &tags);
+        bond_choose_active_slave(bond);
          bond->send_learning_packets = true;
      }
+out:
+    ovs_rwlock_unlock(&rwlock);
  }
  
  /* Should be called on each slave in 'bond' before bond_run() to indicate
@@ -399,18 +410,23 @@ bond_slave_unregister(struct bond *bond, const void *slave_)
  void
  bond_slave_set_may_enable(struct bond *bond, void *slave_, bool may_enable)
  {
+    ovs_rwlock_wrlock(&rwlock);
      bond_slave_lookup(bond, slave_)->may_enable = may_enable;
+    ovs_rwlock_unlock(&rwlock);
  }
  
-/* Performs periodic maintenance on 'bond'.  The caller must provide 'tags' to
- * allow tagged flows to be invalidated.
+/* Performs periodic maintenance on 'bond'.
+ *
+ * Returns true if the caller should revalidate its flows.
   *
   * The caller should check bond_should_send_learning_packets() afterward. */
-void
-bond_run(struct bond *bond, struct tag_set *tags, enum lacp_status lacp_status)
+bool
+bond_run(struct bond *bond, enum lacp_status lacp_status)
  {
      struct bond_slave *slave;
+    bool revalidate;
  
+    ovs_rwlock_wrlock(&rwlock);
      if (bond->lacp_status != lacp_status) {
          bond->lacp_status = lacp_status;
          bond->bond_revalidate = true;
@@ -418,11 +434,11 @@ bond_run(struct bond *bond, struct tag_set *tags, enum lacp_status lacp_status)
  
      /* Enable slaves based on link status and LACP feedback. */
      HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) {
-        bond_link_status_update(slave, tags);
+        bond_link_status_update(slave);
          slave->change_seq = netdev_change_seq(slave->netdev);
      }
      if (!bond->active_slave || !bond->active_slave->enabled) {
-        bond_choose_active_slave(bond, tags);
+        bond_choose_active_slave(bond);
      }
  
      /* Update fake bond interface stats. */
@@ -431,20 +447,11 @@ bond_run(struct bond *bond, struct tag_set *tags, enum lacp_status lacp_status)
          bond->next_fake_iface_update = time_msec() + 1000;
      }
  
-    if (bond->bond_revalidate) {
-        struct bond_slave *slave;
-
-        bond->bond_revalidate = false;
-        bond_entry_reset(bond);
-        HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) {
-            tag_set_add(tags, slave->tag);
-        }
-        tag_set_add(tags, bond->no_slaves_tag);
-    }
+    revalidate = bond->bond_revalidate;
+    bond->bond_revalidate = false;
+    ovs_rwlock_unlock(&rwlock);
  
-    /* Invalidate any tags required by  */
-    tag_set_union(tags, &bond->unixctl_tags);
-    tag_set_init(&bond->unixctl_tags);
+    return revalidate;
  }
  
  /* Causes poll_block() to wake up when 'bond' needs something to be done. */
@@ -453,6 +460,7 @@ bond_wait(struct bond *bond)
  {
      struct bond_slave *slave;
  
+    ovs_rwlock_rdlock(&rwlock);
      HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) {
          if (slave->delay_expires != LLONG_MAX) {
              poll_timer_wait_until(slave->delay_expires);
@@ -467,10 +475,10 @@ bond_wait(struct bond *bond)
          poll_timer_wait_until(bond->next_fake_iface_update);
      }
  
-    /* Ensure that any saved tags get revalidated right away. */
-    if (!tag_set_is_empty(&bond->unixctl_tags)) {
+    if (!bond->bond_revalidate) {
          poll_immediate_wake();
      }
+    ovs_rwlock_unlock(&rwlock);
  
      /* We don't wait for bond->next_rebalance because rebalancing can only run
       * at a flow account checkpoint.  ofproto does checkpointing on its own
@@ -502,8 +510,12 @@ may_send_learning_packets(const struct bond *bond)
  bool
  bond_should_send_learning_packets(struct bond *bond)
  {
-    bool send = bond->send_learning_packets && may_send_learning_packets(bond);
+    bool send;
+
+    ovs_rwlock_wrlock(&rwlock);
+    send = bond->send_learning_packets && may_send_learning_packets(bond);
      bond->send_learning_packets = false;
+    ovs_rwlock_unlock(&rwlock);
      return send;
  }
  
@@ -519,14 +531,13 @@ bond_compose_learning_packet(struct bond *bond,
  {
      struct bond_slave *slave;
      struct ofpbuf *packet;
-    tag_type tags = 0;
      struct flow flow;
  
+    ovs_rwlock_rdlock(&rwlock);
      ovs_assert(may_send_learning_packets(bond));
-
      memset(&flow, 0, sizeof flow);
      memcpy(flow.dl_src, eth_src, ETH_ADDR_LEN);
-    slave = choose_output_slave(bond, &flow, NULL, vlan, &tags);
+    slave = choose_output_slave(bond, &flow, NULL, vlan);
  
      packet = ofpbuf_new(0);
      compose_rarp(packet, eth_src);
@@ -535,6 +546,7 @@ bond_compose_learning_packet(struct bond *bond,
      }
  
      *port_aux = slave->aux;
+    ovs_rwlock_unlock(&rwlock);
      return packet;
  }
  \f
@@ -555,12 +567,15 @@ bond_compose_learning_packet(struct bond *bond,
   */
  enum bond_verdict
  bond_check_admissibility(struct bond *bond, const void *slave_,
-                         const uint8_t eth_dst[ETH_ADDR_LEN], tag_type *tags)
+                         const uint8_t eth_dst[ETH_ADDR_LEN])
  {
-    struct bond_slave *slave = bond_slave_lookup(bond, slave_);
+    enum bond_verdict verdict = BV_DROP;
+    struct bond_slave *slave;
  
+    ovs_rwlock_rdlock(&rwlock);
+    slave = bond_slave_lookup(bond, slave_);
      if (!slave) {
-        return BV_DROP;
+        goto out;
      }
  
      /* LACP bonds have very loose admissibility restrictions because we can
@@ -572,16 +587,19 @@ bond_check_admissibility(struct bond *bond, const void *slave_,
       * If LACP is configured, but LACP negotiations have been unsuccessful, we
       * drop all incoming traffic. */
      switch (bond->lacp_status) {
-    case LACP_NEGOTIATED: return slave->enabled ? BV_ACCEPT : BV_DROP;
-    case LACP_CONFIGURED: return BV_DROP;
-    case LACP_DISABLED: break;
+    case LACP_NEGOTIATED:
+        verdict = slave->enabled ? BV_ACCEPT : BV_DROP;
+        goto out;
+    case LACP_CONFIGURED:
+        goto out;
+    case LACP_DISABLED:
+        break;
      }
  
      /* Drop all multicast packets on inactive slaves. */
      if (eth_addr_is_multicast(eth_dst)) {
-        *tags |= bond_get_active_slave_tag(bond);
          if (bond->active_slave != slave) {
-            return BV_DROP;
+            goto out;
          }
      }
  
@@ -589,22 +607,22 @@ bond_check_admissibility(struct bond *bond, const void *slave_,
      case BM_AB:
          /* Drop all packets which arrive on backup slaves.  This is similar to
           * how Linux bonding handles active-backup bonds. */
-        *tags |= bond_get_active_slave_tag(bond);
          if (bond->active_slave != slave) {
              static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
  
              VLOG_DBG_RL(&rl, "active-backup bond received packet on backup"
                          " slave (%s) destined for " ETH_ADDR_FMT,
                          slave->name, ETH_ADDR_ARGS(eth_dst));
-            return BV_DROP;
+            goto out;
          }
-        return BV_ACCEPT;
+        verdict = BV_ACCEPT;
+        goto out;
  
      case BM_TCP:
          /* TCP balanced bonds require successful LACP negotiated. Based on the
           * above check, LACP is off on this bond.  Therfore, we drop all
           * incoming traffic. */
-        return BV_DROP;
+        goto out;
  
      case BM_SLB:
          /* Drop all packets for which we have learned a different input port,
@@ -613,10 +631,15 @@ bond_check_admissibility(struct bond *bond, const void *slave_,
           * the host has moved to another switch.  The exception to the
           * exception is if we locked the learning table to avoid reflections on
           * bond slaves. */
-        return BV_DROP_IF_MOVED;
+        verdict = BV_DROP_IF_MOVED;
+        goto out;
      }
  
      NOT_REACHED();
+out:
+    ovs_rwlock_unlock(&rwlock);
+    return verdict;
+
  }
  
  /* Returns the slave (registered on 'bond' by bond_slave_register()) to which
@@ -629,31 +652,26 @@ bond_check_admissibility(struct bond *bond, const void *slave_,
   * nonzero only for trunk ports), whereas 'vlan' is the logical VLAN that the
   * packet belongs to (so for an access port it will be the access port's VLAN).
   *
- * Adds a tag to '*tags' that associates the flow with the returned slave.
- *
   * If 'wc' is non-NULL, bitwise-OR's 'wc' with the set of bits that were
   * significant in the selection.  At some point earlier, 'wc' should
   * have been initialized (e.g., by flow_wildcards_init_catchall()).
   */
  void *
  bond_choose_output_slave(struct bond *bond, const struct flow *flow,
-                         struct flow_wildcards *wc, uint16_t vlan,
-                         tag_type *tags)
+                         struct flow_wildcards *wc, uint16_t vlan)
  {
-    struct bond_slave *slave = choose_output_slave(bond, flow, wc, vlan, tags);
-    if (slave) {
-        *tags |= slave->tag;
-        return slave->aux;
-    } else {
-        *tags |= bond->no_slaves_tag;
-        return NULL;
-    }
+    struct bond_slave *slave;
+
+    ovs_rwlock_rdlock(&rwlock);
+    slave = choose_output_slave(bond, flow, wc, vlan);
+    ovs_rwlock_unlock(&rwlock);
+    return slave;
  }
  \f
  /* Rebalancing. */
  
  static bool
-bond_is_balanced(const struct bond *bond)
+bond_is_balanced(const struct bond *bond) OVS_REQ_RDLOCK(rwlock)
  {
      return bond->rebalance_interval
          && (bond->balance == BM_SLB || bond->balance == BM_TCP);
@@ -664,13 +682,15 @@ void
  bond_account(struct bond *bond, const struct flow *flow, uint16_t vlan,
               uint64_t n_bytes)
  {
+    ovs_rwlock_wrlock(&rwlock);
      if (bond_is_balanced(bond)) {
          lookup_bond_entry(bond, flow, vlan)->tx_bytes += n_bytes;
      }
+    ovs_rwlock_unlock(&rwlock);
  }
  
  static struct bond_slave *
-bond_slave_from_bal_node(struct list *bal)
+bond_slave_from_bal_node(struct list *bal) OVS_REQ_RDLOCK(rwlock)
  {
      return CONTAINER_OF(bal, struct bond_slave, bal_node);
  }
@@ -713,8 +733,7 @@ log_bals(struct bond *bond, const struct list *bals)
  
  /* Shifts 'hash' from its current slave to 'to'. */
  static void
-bond_shift_load(struct bond_entry *hash, struct bond_slave *to,
-                struct tag_set *set)
+bond_shift_load(struct bond_entry *hash, struct bond_slave *to)
  {
      struct bond_slave *from = hash->slave;
      struct bond *bond = from->bond;
@@ -733,9 +752,7 @@ bond_shift_load(struct bond_entry *hash, struct bond_slave *to,
      to->tx_bytes += delta;
  
      /* Arrange for flows to be revalidated. */
-    tag_set_add(set, hash->tag);
-    hash->slave = to;
-    hash->tag = tag_create_random();
+    bond->bond_revalidate = true;
  }
  
  /* Picks and returns a bond_entry to migrate from 'from' (the most heavily
@@ -810,13 +827,15 @@ reinsert_bal(struct list *bals, struct bond_slave *slave)
   * The caller should have called bond_account() for each active flow, to ensure
   * that flow data is consistently accounted at this point. */
  void
-bond_rebalance(struct bond *bond, struct tag_set *tags)
+bond_rebalance(struct bond *bond)
  {
      struct bond_slave *slave;
      struct bond_entry *e;
      struct list bals;
  
+    ovs_rwlock_wrlock(&rwlock);
      if (!bond_is_balanced(bond) || time_msec() < bond->next_rebalance) {
+        ovs_rwlock_unlock(&rwlock);
          return;
      }
      bond->next_rebalance = time_msec() + bond->rebalance_interval;
@@ -864,7 +883,7 @@ bond_rebalance(struct bond *bond, struct tag_set *tags)
           * to move from 'from' to 'to'. */
          e = choose_entry_to_migrate(from, to->tx_bytes);
          if (e) {
-            bond_shift_load(e, to, tags);
+            bond_shift_load(e, to);
  
              /* Delete element from from->entries.
               *
@@ -892,17 +911,18 @@ bond_rebalance(struct bond *bond, struct tag_set *tags)
              e->slave = NULL;
          }
      }
+    ovs_rwlock_unlock(&rwlock);
  }
  \f
  /* Bonding unixctl user interface functions. */
  
  static struct bond *
-bond_find(const char *name)
+bond_find(const char *name) OVS_REQ_RDLOCK(rwlock)
  {
      struct bond *bond;
  
      HMAP_FOR_EACH_WITH_HASH (bond, hmap_node, hash_string(name, 0),
-                             &all_bonds) {
+                             all_bonds) {
          if (!strcmp(bond->name, name)) {
              return bond;
          }
@@ -933,7 +953,8 @@ bond_unixctl_list(struct unixctl_conn *conn,
  
      ds_put_cstr(&ds, "bond\ttype\tslaves\n");
  
-    HMAP_FOR_EACH (bond, hmap_node, &all_bonds) {
+    ovs_rwlock_rdlock(&rwlock);
+    HMAP_FOR_EACH (bond, hmap_node, all_bonds) {
          const struct bond_slave *slave;
          size_t i;
  
@@ -949,12 +970,14 @@ bond_unixctl_list(struct unixctl_conn *conn,
          }
          ds_put_char(&ds, '\n');
      }
+    ovs_rwlock_unlock(&rwlock);
      unixctl_command_reply(conn, ds_cstr(&ds));
      ds_destroy(&ds);
  }
  
  static void
  bond_print_details(struct ds *ds, const struct bond *bond)
+    OVS_REQ_RDLOCK(rwlock)
  {
      struct shash slave_shash = SHASH_INITIALIZER(&slave_shash);
      const struct shash_node **sorted_slaves = NULL;
@@ -1046,24 +1069,28 @@ bond_unixctl_show(struct unixctl_conn *conn,
  {
      struct ds ds = DS_EMPTY_INITIALIZER;
  
+    ovs_rwlock_rdlock(&rwlock);
      if (argc > 1) {
          const struct bond *bond = bond_find(argv[1]);
  
          if (!bond) {
              unixctl_command_reply_error(conn, "no such bond");
-            return;
+            goto out;
          }
          bond_print_details(&ds, bond);
      } else {
          const struct bond *bond;
  
-        HMAP_FOR_EACH (bond, hmap_node, &all_bonds) {
+        HMAP_FOR_EACH (bond, hmap_node, all_bonds) {
              bond_print_details(&ds, bond);
          }
      }
  
      unixctl_command_reply(conn, ds_cstr(&ds));
      ds_destroy(&ds);
+
+out:
+    ovs_rwlock_unlock(&rwlock);
  }
  
  static void
@@ -1079,40 +1106,43 @@ bond_unixctl_migrate(struct unixctl_conn *conn,
      struct bond_entry *entry;
      int hash;
  
+    ovs_rwlock_wrlock(&rwlock);
      bond = bond_find(bond_s);
      if (!bond) {
          unixctl_command_reply_error(conn, "no such bond");
-        return;
+        goto out;
      }
  
      if (bond->balance != BM_SLB) {
          unixctl_command_reply_error(conn, "not an SLB bond");
-        return;
+        goto out;
      }
  
      if (strspn(hash_s, "0123456789") == strlen(hash_s)) {
          hash = atoi(hash_s) & BOND_MASK;
      } else {
          unixctl_command_reply_error(conn, "bad hash");
-        return;
+        goto out;
      }
  
      slave = bond_lookup_slave(bond, slave_s);
      if (!slave) {
          unixctl_command_reply_error(conn, "no such slave");
-        return;
+        goto out;
      }
  
      if (!slave->enabled) {
          unixctl_command_reply_error(conn, "cannot migrate to disabled slave");
-        return;
+        goto out;
      }
  
      entry = &bond->hash[hash];
-    tag_set_add(&bond->unixctl_tags, entry->tag);
+    bond->bond_revalidate = true;
      entry->slave = slave;
-    entry->tag = tag_create_random();
      unixctl_command_reply(conn, "migrated");
+
+out:
+    ovs_rwlock_unlock(&rwlock);
  }
  
  static void
@@ -1125,27 +1155,27 @@ bond_unixctl_set_active_slave(struct unixctl_conn *conn,
      struct bond *bond;
      struct bond_slave *slave;
  
+    ovs_rwlock_wrlock(&rwlock);
      bond = bond_find(bond_s);
      if (!bond) {
          unixctl_command_reply_error(conn, "no such bond");
-        return;
+        goto out;
      }
  
      slave = bond_lookup_slave(bond, slave_s);
      if (!slave) {
          unixctl_command_reply_error(conn, "no such slave");
-        return;
+        goto out;
      }
  
      if (!slave->enabled) {
          unixctl_command_reply_error(conn, "cannot make disabled slave active");
-        return;
+        goto out;
      }
  
      if (bond->active_slave != slave) {
-        tag_set_add(&bond->unixctl_tags, bond_get_active_slave_tag(bond));
+        bond->bond_revalidate = true;
          bond->active_slave = slave;
-        bond->active_slave->tag = tag_create_random();
          VLOG_INFO("bond %s: active interface is now %s",
                    bond->name, slave->name);
          bond->send_learning_packets = true;
@@ -1153,6 +1183,8 @@ bond_unixctl_set_active_slave(struct unixctl_conn *conn,
      } else {
          unixctl_command_reply(conn, "no change");
      }
+out:
+    ovs_rwlock_unlock(&rwlock);
  }
  
  static void
@@ -1163,20 +1195,24 @@ enable_slave(struct unixctl_conn *conn, const char *argv[], bool enable)
      struct bond *bond;
      struct bond_slave *slave;
  
+    ovs_rwlock_wrlock(&rwlock);
      bond = bond_find(bond_s);
      if (!bond) {
          unixctl_command_reply_error(conn, "no such bond");
-        return;
+        goto out;
      }
  
      slave = bond_lookup_slave(bond, slave_s);
      if (!slave) {
          unixctl_command_reply_error(conn, "no such slave");
-        return;
+        goto out;
      }
  
-    bond_enable_slave(slave, enable, &bond->unixctl_tags);
+    bond_enable_slave(slave, enable);
      unixctl_command_reply(conn, enable ? "enabled" : "disabled");
+
+out:
+    ovs_rwlock_unlock(&rwlock);
  }
  
  static void
@@ -1290,25 +1326,19 @@ bond_slave_lookup(struct bond *bond, const void *slave_)
  }
  
  static void
-bond_enable_slave(struct bond_slave *slave, bool enable, struct tag_set *tags)
+bond_enable_slave(struct bond_slave *slave, bool enable)
  {
      slave->delay_expires = LLONG_MAX;
      if (enable != slave->enabled) {
+        slave->bond->bond_revalidate = true;
          slave->enabled = enable;
-        if (!slave->enabled) {
-            VLOG_INFO("interface %s: disabled", slave->name);
-            if (tags) {
-                tag_set_add(tags, slave->tag);
-            }
-        } else {
-            VLOG_INFO("interface %s: enabled", slave->name);
-            slave->tag = tag_create_random();
-        }
+        VLOG_INFO("interface %s: %s", slave->name,
+                  slave->enabled ? "enabled" : "disabled");
      }
  }
  
  static void
-bond_link_status_update(struct bond_slave *slave, struct tag_set *tags)
+bond_link_status_update(struct bond_slave *slave)
  {
      struct bond *bond = slave->bond;
      bool up;
@@ -1338,7 +1368,7 @@ bond_link_status_update(struct bond_slave *slave, struct tag_set *tags)
      }
  
      if (time_msec() >= slave->delay_expires) {
-        bond_enable_slave(slave, up, tags);
+        bond_enable_slave(slave, up);
      }
  }
  
@@ -1379,7 +1409,7 @@ lookup_bond_entry(const struct bond *bond, const struct flow *flow,
  
  static struct bond_slave *
  choose_output_slave(const struct bond *bond, const struct flow *flow,
-                    struct flow_wildcards *wc, uint16_t vlan, tag_type *tags)
+                    struct flow_wildcards *wc, uint16_t vlan)
  {
      struct bond_entry *e;
  
@@ -1413,9 +1443,7 @@ choose_output_slave(const struct bond *bond, const struct flow *flow,
              if (!e->slave->enabled) {
                  e->slave = bond->active_slave;
              }
-            e->tag = tag_create_random();
          }
-        *tags |= e->tag;
          return e->slave;
  
      default:
@@ -1449,7 +1477,7 @@ bond_choose_slave(const struct bond *bond)
  }
  
  static void
-bond_choose_active_slave(struct bond *bond, struct tag_set *tags)
+bond_choose_active_slave(struct bond *bond)
  {
      static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
      struct bond_slave *old_active_slave = bond->active_slave;
@@ -1464,11 +1492,7 @@ bond_choose_active_slave(struct bond *bond, struct tag_set *tags)
                           "remaining %lld ms updelay (since no interface was "
                           "enabled)", bond->name, bond->active_slave->name,
                           bond->active_slave->delay_expires - time_msec());
-            bond_enable_slave(bond->active_slave, true, tags);
-        }
-
-        if (!old_active_slave) {
-            tag_set_add(tags, bond->no_slaves_tag);
+            bond_enable_slave(bond->active_slave, true);
          }
  
          bond->send_learning_packets = true;
@@ -1477,16 +1501,6 @@ bond_choose_active_slave(struct bond *bond, struct tag_set *tags)
      }
  }
  
-/* Returns the tag for 'bond''s active slave, or 'bond''s no_slaves_tag if
- * there is no active slave. */
-static tag_type
-bond_get_active_slave_tag(const struct bond *bond)
-{
-    return (bond->active_slave
-            ? bond->active_slave->tag
-            : bond->no_slaves_tag);
-}
-
  /* Attempts to make the sum of the bond slaves' statistics appear on the fake
   * bond interface. */
  static void
diff --git a/lib/bond.h b/lib/bond.h

index 7190935..f80fead 100644 (file)
--- a/lib/bond.h
+++ b/lib/bond.h
@@ -21,7 +21,6 @@
  #include <stdint.h>
  
  #include "packets.h"
-#include "tag.h"
  
  struct flow;
  struct netdev;
@@ -69,7 +68,7 @@ void bond_slave_register(struct bond *, void *slave_, struct netdev *);
  void bond_slave_set_netdev(struct bond *, void *slave_, struct netdev *);
  void bond_slave_unregister(struct bond *, const void *slave);
  
-void bond_run(struct bond *, struct tag_set *, enum lacp_status);
+bool bond_run(struct bond *, enum lacp_status);
  void bond_wait(struct bond *);
  
  void bond_slave_set_may_enable(struct bond *, void *slave_, bool may_enable);
@@ -87,15 +86,13 @@ enum bond_verdict {
      BV_DROP_IF_MOVED            /* Drop if we've learned a different port. */
  };
  enum bond_verdict bond_check_admissibility(struct bond *, const void *slave_,
-                                           const uint8_t eth_dst[ETH_ADDR_LEN],
-                                           tag_type *);
+                                           const uint8_t dst[ETH_ADDR_LEN]);
  void *bond_choose_output_slave(struct bond *, const struct flow *,
-                               struct flow_wildcards *, uint16_t vlan,
-                               tag_type *);
+                               struct flow_wildcards *, uint16_t vlan);
  
  /* Rebalancing. */
  void bond_account(struct bond *, const struct flow *, uint16_t vlan,
                    uint64_t n_bytes);
-void bond_rebalance(struct bond *, struct tag_set *);
+void bond_rebalance(struct bond *);
  
  #endif /* bond.h */
diff --git a/lib/cfm.c b/lib/cfm.c

index a76a3ec..0bd41bf 100644 (file)
--- a/lib/cfm.c
+++ b/lib/cfm.c
@@ -91,8 +91,6 @@ struct cfm {
      uint64_t rx_packets;        /* Packets received by 'netdev'. */
  
      uint64_t mpid;
-    bool check_tnl_key;    /* Verify the tunnel key of inbound packets? */
-    bool extended;         /* Extended mode. */
      bool demand;           /* Demand mode. */
      bool booted;           /* A full fault interval has occurred. */
      enum cfm_fault_reason fault;  /* Connectivity fault status. */
@@ -128,7 +126,9 @@ struct cfm {
                                   recomputed. */
      long long int last_tx;    /* Last CCM transmission time. */
  
-    int ref_cnt;
+    atomic_bool check_tnl_key; /* Verify the tunnel key of inbound packets? */
+    atomic_bool extended;      /* Extended mode. */
+    atomic_int ref_cnt;
  };
  
  /* Remote MPs represent foreign network entities that are configured to have
@@ -147,13 +147,16 @@ struct remote_mp {
  };
  
  static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(20, 30);
-static struct hmap all_cfms = HMAP_INITIALIZER(&all_cfms);
+
+static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER;
+static struct hmap all_cfms__ = HMAP_INITIALIZER(&all_cfms__);
+static struct hmap *const all_cfms OVS_GUARDED_BY(mutex) = &all_cfms__;
  
  static unixctl_cb_func cfm_unixctl_show;
  static unixctl_cb_func cfm_unixctl_set_fault;
  
  static uint64_t
-cfm_rx_packets(const struct cfm *cfm)
+cfm_rx_packets(const struct cfm *cfm) OVS_REQ_WRLOCK(mutex)
  {
      struct netdev_stats stats;
  
@@ -167,12 +170,15 @@ cfm_rx_packets(const struct cfm *cfm)
  static const uint8_t *
  cfm_ccm_addr(const struct cfm *cfm)
  {
-    return cfm->extended ? eth_addr_ccm_x : eth_addr_ccm;
+    bool extended;
+    atomic_read(&cfm->extended, &extended);
+    return extended ? eth_addr_ccm_x : eth_addr_ccm;
  }
  
  /* Returns the string representation of the given cfm_fault_reason 'reason'. */
  const char *
-cfm_fault_reason_to_str(int reason) {
+cfm_fault_reason_to_str(int reason)
+{
      switch (reason) {
  #define CFM_FAULT_REASON(NAME, STR) case CFM_FAULT_##NAME: return #STR;
          CFM_FAULT_REASONS
@@ -198,7 +204,7 @@ ds_put_cfm_fault(struct ds *ds, int fault)
  }
  
  static void
-cfm_generate_maid(struct cfm *cfm)
+cfm_generate_maid(struct cfm *cfm) OVS_REQ_WRLOCK(mutex)
  {
      const char *ovs_md_name = "ovs";
      const char *ovs_ma_name = "ovs";
@@ -241,7 +247,7 @@ ccm_interval_to_ms(uint8_t interval)
  }
  
  static long long int
-cfm_fault_interval(struct cfm *cfm)
+cfm_fault_interval(struct cfm *cfm) OVS_REQ_WRLOCK(mutex)
  {
      /* According to the 802.1ag specification we should assume every other MP
       * with the same MAID has the same transmission interval that we have.  If
@@ -283,7 +289,7 @@ cfm_is_valid_mpid(bool extended, uint64_t mpid)
  }
  
  static struct remote_mp *
-lookup_remote_mp(const struct cfm *cfm, uint64_t mpid)
+lookup_remote_mp(const struct cfm *cfm, uint64_t mpid) OVS_REQ_WRLOCK(mutex)
  {
      struct remote_mp *rmp;
  
@@ -308,7 +314,7 @@ cfm_init(void)
  /* Allocates a 'cfm' object called 'name'.  'cfm' should be initialized by
   * cfm_configure() before use. */
  struct cfm *
-cfm_create(const struct netdev *netdev)
+cfm_create(const struct netdev *netdev) OVS_EXCLUDED(mutex)
  {
      struct cfm *cfm;
  
@@ -316,37 +322,47 @@ cfm_create(const struct netdev *netdev)
      cfm->netdev = netdev_ref(netdev);
      cfm->name = netdev_get_name(cfm->netdev);
      hmap_init(&cfm->remote_mps);
-    cfm_generate_maid(cfm);
-    hmap_insert(&all_cfms, &cfm->hmap_node, hash_string(cfm->name, 0));
      cfm->remote_opup = true;
      cfm->fault_override = -1;
      cfm->health = -1;
      cfm->last_tx = 0;
-    cfm->ref_cnt = 1;
+    atomic_init(&cfm->extended, false);
+    atomic_init(&cfm->check_tnl_key, false);
+    atomic_init(&cfm->ref_cnt, 1);
+
+    ovs_mutex_lock(&mutex);
+    cfm_generate_maid(cfm);
+    hmap_insert(all_cfms, &cfm->hmap_node, hash_string(cfm->name, 0));
+    ovs_mutex_unlock(&mutex);
      return cfm;
  }
  
  void
-cfm_unref(struct cfm *cfm)
+cfm_unref(struct cfm *cfm) OVS_EXCLUDED(mutex)
  {
      struct remote_mp *rmp, *rmp_next;
+    int orig;
  
      if (!cfm) {
          return;
      }
  
-    ovs_assert(cfm->ref_cnt);
-    if (--cfm->ref_cnt) {
+    atomic_sub(&cfm->ref_cnt, 1, &orig);
+    ovs_assert(orig > 0);
+    if (orig != 1) {
          return;
      }
  
+    ovs_mutex_lock(&mutex);
+    hmap_remove(all_cfms, &cfm->hmap_node);
+    ovs_mutex_unlock(&mutex);
+
      HMAP_FOR_EACH_SAFE (rmp, rmp_next, node, &cfm->remote_mps) {
          hmap_remove(&cfm->remote_mps, &rmp->node);
          free(rmp);
      }
  
      hmap_destroy(&cfm->remote_mps);
-    hmap_remove(&all_cfms, &cfm->hmap_node);
      netdev_close(cfm->netdev);
      free(cfm->rmps_array);
      free(cfm);
@@ -357,21 +373,25 @@ cfm_ref(const struct cfm *cfm_)
  {
      struct cfm *cfm = CONST_CAST(struct cfm *, cfm_);
      if (cfm) {
-        ovs_assert(cfm->ref_cnt > 0);
-        cfm->ref_cnt++;
+        int orig;
+        atomic_add(&cfm->ref_cnt, 1, &orig);
+        ovs_assert(orig > 0);
      }
      return cfm;
  }
  
  /* Should be run periodically to update fault statistics messages. */
  void
-cfm_run(struct cfm *cfm)
+cfm_run(struct cfm *cfm) OVS_EXCLUDED(mutex)
  {
+    ovs_mutex_lock(&mutex);
      if (timer_expired(&cfm->fault_timer)) {
          long long int interval = cfm_fault_interval(cfm);
          struct remote_mp *rmp, *rmp_next;
          bool old_cfm_fault = cfm->fault;
          bool demand_override;
+        bool rmp_set_opup = false;
+        bool rmp_set_opdown = false;
  
          cfm->fault = cfm->recv_fault;
          cfm->recv_fault = 0;
@@ -381,7 +401,6 @@ cfm_run(struct cfm *cfm)
          cfm->rmps_array = xmalloc(hmap_count(&cfm->remote_mps) *
                                    sizeof *cfm->rmps_array);
  
-        cfm->remote_opup = true;
          if (cfm->health_interval == CFM_HEALTH_INTERVAL) {
              /* Calculate the cfm health of the interface.  If the number of
               * remote_mpids of a cfm interface is > 1, the cfm health is
@@ -433,14 +452,23 @@ cfm_run(struct cfm *cfm)
              } else {
                  rmp->recv = false;
  
-                if (!rmp->opup) {
-                    cfm->remote_opup = rmp->opup;
+                if (rmp->opup) {
+                    rmp_set_opup = true;
+                } else {
+                    rmp_set_opdown = true;
                  }
  
                  cfm->rmps_array[cfm->rmps_array_len++] = rmp->mpid;
              }
          }
  
+        if (rmp_set_opdown) {
+            cfm->remote_opup = false;
+        }
+        else if (rmp_set_opup) {
+            cfm->remote_opup = true;
+        }
+
          if (hmap_is_empty(&cfm->remote_mps)) {
              cfm->fault |= CFM_FAULT_RECV;
          }
@@ -461,25 +489,33 @@ cfm_run(struct cfm *cfm)
          timer_set_duration(&cfm->fault_timer, interval);
          VLOG_DBG("%s: new fault interval", cfm->name);
      }
+    ovs_mutex_unlock(&mutex);
  }
  
  /* Should be run periodically to check if the CFM module has a CCM message it
   * wishes to send. */
  bool
-cfm_should_send_ccm(struct cfm *cfm)
+cfm_should_send_ccm(struct cfm *cfm) OVS_EXCLUDED(mutex)
  {
-    return timer_expired(&cfm->tx_timer);
+    bool ret;
+
+    ovs_mutex_lock(&mutex);
+    ret = timer_expired(&cfm->tx_timer);
+    ovs_mutex_unlock(&mutex);
+    return ret;
  }
  
  /* Composes a CCM message into 'packet'.  Messages generated with this function
   * should be sent whenever cfm_should_send_ccm() indicates. */
  void
  cfm_compose_ccm(struct cfm *cfm, struct ofpbuf *packet,
-                uint8_t eth_src[ETH_ADDR_LEN])
+                uint8_t eth_src[ETH_ADDR_LEN]) OVS_EXCLUDED(mutex)
  {
      uint16_t ccm_vlan;
      struct ccm *ccm;
+    bool extended;
  
+    ovs_mutex_lock(&mutex);
      timer_set_duration(&cfm->tx_timer, cfm->ccm_interval_ms);
      eth_compose(packet, cfm_ccm_addr(cfm), eth_src, ETH_TYPE_CFM, sizeof *ccm);
  
@@ -503,7 +539,8 @@ cfm_compose_ccm(struct cfm *cfm, struct ofpbuf *packet,
      memset(ccm->zero, 0, sizeof ccm->zero);
      ccm->end_tlv = 0;
  
-    if (cfm->extended) {
+    atomic_read(&cfm->extended, &extended);
+    if (extended) {
          ccm->mpid = htons(hash_mpid(cfm->mpid));
          ccm->mpid64 = htonll(cfm->mpid);
          ccm->opdown = !cfm->opup;
@@ -514,7 +551,7 @@ cfm_compose_ccm(struct cfm *cfm, struct ofpbuf *packet,
      }
  
      if (cfm->ccm_interval == 0) {
-        ovs_assert(cfm->extended);
+        ovs_assert(extended);
          ccm->interval_ms_x = htons(cfm->ccm_interval_ms);
      } else {
          ccm->interval_ms_x = htons(0);
@@ -533,18 +570,22 @@ cfm_compose_ccm(struct cfm *cfm, struct ofpbuf *packet,
          }
      }
      cfm->last_tx = time_msec();
+    ovs_mutex_unlock(&mutex);
  }
  
  void
-cfm_wait(struct cfm *cfm)
+cfm_wait(struct cfm *cfm) OVS_EXCLUDED(mutex)
  {
+    ovs_mutex_lock(&mutex);
      timer_wait(&cfm->tx_timer);
      timer_wait(&cfm->fault_timer);
+    ovs_mutex_unlock(&mutex);
  }
  
  /* Configures 'cfm' with settings from 's'. */
  bool
  cfm_configure(struct cfm *cfm, const struct cfm_settings *s)
+    OVS_EXCLUDED(mutex)
  {
      uint8_t interval;
      int interval_ms;
@@ -553,21 +594,23 @@ cfm_configure(struct cfm *cfm, const struct cfm_settings *s)
          return false;
      }
  
+    ovs_mutex_lock(&mutex);
      cfm->mpid = s->mpid;
-    cfm->check_tnl_key = s->check_tnl_key;
-    cfm->extended = s->extended;
      cfm->opup = s->opup;
      interval = ms_to_ccm_interval(s->interval);
      interval_ms = ccm_interval_to_ms(interval);
  
+    atomic_store(&cfm->check_tnl_key, s->check_tnl_key);
+    atomic_store(&cfm->extended, s->extended);
+
      cfm->ccm_vlan = s->ccm_vlan;
      cfm->ccm_pcp = s->ccm_pcp & (VLAN_PCP_MASK >> VLAN_PCP_SHIFT);
-    if (cfm->extended && interval_ms != s->interval) {
+    if (s->extended && interval_ms != s->interval) {
          interval = 0;
          interval_ms = MIN(s->interval, UINT16_MAX);
      }
  
-    if (cfm->extended && s->demand) {
+    if (s->extended && s->demand) {
          interval_ms = MAX(interval_ms, 500);
          if (!cfm->demand) {
              cfm->demand = true;
@@ -585,17 +628,21 @@ cfm_configure(struct cfm *cfm, const struct cfm_settings *s)
          timer_set_duration(&cfm->fault_timer, cfm_fault_interval(cfm));
      }
  
+    ovs_mutex_unlock(&mutex);
      return true;
  }
  
  /* Must be called when the netdev owned by 'cfm' should change. */
  void
  cfm_set_netdev(struct cfm *cfm, const struct netdev *netdev)
+    OVS_EXCLUDED(mutex)
  {
+    ovs_mutex_lock(&mutex);
      if (cfm->netdev != netdev) {
          netdev_close(cfm->netdev);
          cfm->netdev = netdev_ref(netdev);
      }
+    ovs_mutex_unlock(&mutex);
  }
  
  /* Returns true if 'cfm' should process packets from 'flow'.  Sets
@@ -604,13 +651,16 @@ bool
  cfm_should_process_flow(const struct cfm *cfm, const struct flow *flow,
                          struct flow_wildcards *wc)
  {
+    bool check_tnl_key;
+
+    atomic_read(&cfm->check_tnl_key, &check_tnl_key);
      memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst);
-    if (cfm->check_tnl_key) {
+    if (check_tnl_key) {
          memset(&wc->masks.tunnel.tun_id, 0xff, sizeof wc->masks.tunnel.tun_id);
      }
      return (ntohs(flow->dl_type) == ETH_TYPE_CFM
              && eth_addr_equals(flow->dl_dst, cfm_ccm_addr(cfm))
-            && (!cfm->check_tnl_key || flow->tunnel.tun_id == htonll(0)));
+            && (!check_tnl_key || flow->tunnel.tun_id == htonll(0)));
  }
  
  /* Updates internal statistics relevant to packet 'p'.  Should be called on
@@ -618,23 +668,26 @@ cfm_should_process_flow(const struct cfm *cfm, const struct flow *flow,
   * cfm_should_process_flow. */
  void
  cfm_process_heartbeat(struct cfm *cfm, const struct ofpbuf *p)
+    OVS_EXCLUDED(mutex)
  {
      struct ccm *ccm;
      struct eth_header *eth;
  
+    ovs_mutex_lock(&mutex);
+
      eth = p->l2;
      ccm = ofpbuf_at(p, (uint8_t *)p->l3 - (uint8_t *)p->data, CCM_ACCEPT_LEN);
  
      if (!ccm) {
          VLOG_INFO_RL(&rl, "%s: Received an unparseable 802.1ag CCM heartbeat.",
                       cfm->name);
-        return;
+        goto out;
      }
  
      if (ccm->opcode != CCM_OPCODE) {
          VLOG_INFO_RL(&rl, "%s: Received an unsupported 802.1ag message. "
                       "(opcode %u)", cfm->name, ccm->opcode);
-        return;
+        goto out;
      }
  
      /* According to the 802.1ag specification, reception of a CCM with an
@@ -659,9 +712,11 @@ cfm_process_heartbeat(struct cfm *cfm, const struct ofpbuf *p)
          uint64_t ccm_mpid;
          uint32_t ccm_seq;
          bool ccm_opdown;
+        bool extended;
          enum cfm_fault_reason cfm_fault = 0;
  
-        if (cfm->extended) {
+        atomic_read(&cfm->extended, &extended);
+        if (extended) {
              ccm_mpid = ntohll(ccm->mpid64);
              ccm_opdown = ccm->opdown;
          } else {
@@ -677,7 +732,7 @@ cfm_process_heartbeat(struct cfm *cfm, const struct ofpbuf *p)
                           ccm_interval, ccm_mpid);
          }
  
-        if (cfm->extended && ccm_interval == 0
+        if (extended && ccm_interval == 0
              && ccm_interval_ms_x != cfm->ccm_interval_ms) {
              cfm_fault |= CFM_FAULT_INTERVAL;
              VLOG_WARN_RL(&rl, "%s: received a CCM with an unexpected extended"
@@ -734,13 +789,13 @@ cfm_process_heartbeat(struct cfm *cfm, const struct ofpbuf *p)
              rmp->last_rx = time_msec();
          }
      }
+
+out:
+    ovs_mutex_unlock(&mutex);
  }
  
-/* Gets the fault status of 'cfm'.  Returns a bit mask of 'cfm_fault_reason's
- * indicating the cause of the connectivity fault, or zero if there is no
- * fault. */
-int
-cfm_get_fault(const struct cfm *cfm)
+static int
+cfm_get_fault__(const struct cfm *cfm) OVS_REQ_WRLOCK(mutex)
  {
      if (cfm->fault_override >= 0) {
          return cfm->fault_override ? CFM_FAULT_OVERRIDE : 0;
@@ -748,15 +803,34 @@ cfm_get_fault(const struct cfm *cfm)
      return cfm->fault;
  }
  
+/* Gets the fault status of 'cfm'.  Returns a bit mask of 'cfm_fault_reason's
+ * indicating the cause of the connectivity fault, or zero if there is no
+ * fault. */
+int
+cfm_get_fault(const struct cfm *cfm) OVS_EXCLUDED(mutex)
+{
+    int fault;
+
+    ovs_mutex_lock(&mutex);
+    fault = cfm_get_fault__(cfm);
+    ovs_mutex_unlock(&mutex);
+    return fault;
+}
+
  /* Gets the health of 'cfm'.  Returns an integer between 0 and 100 indicating
   * the health of the link as a percentage of ccm frames received in
   * CFM_HEALTH_INTERVAL * 'fault_interval' if there is only 1 remote_mpid,
   * returns 0 if there are no remote_mpids, and returns -1 if there are more
   * than 1 remote_mpids. */
  int
-cfm_get_health(const struct cfm *cfm)
+cfm_get_health(const struct cfm *cfm) OVS_EXCLUDED(mutex)
  {
-    return cfm->health;
+    int health;
+
+    ovs_mutex_lock(&mutex);
+    health = cfm->health;
+    ovs_mutex_unlock(&mutex);
+    return health;
  }
  
  /* Gets the operational state of 'cfm'.  'cfm' is considered operationally down
@@ -765,32 +839,38 @@ cfm_get_health(const struct cfm *cfm)
   * 'cfm' is operationally down, or -1 if 'cfm' has no operational state
   * (because it isn't in extended mode). */
  int
-cfm_get_opup(const struct cfm *cfm)
+cfm_get_opup(const struct cfm *cfm) OVS_EXCLUDED(mutex)
  {
-    if (cfm->extended) {
-        return cfm->remote_opup;
-    } else {
-        return -1;
-    }
+    bool extended;
+    int opup;
+
+    ovs_mutex_lock(&mutex);
+    atomic_read(&cfm->extended, &extended);
+    opup = extended ? cfm->remote_opup : -1;
+    ovs_mutex_unlock(&mutex);
+
+    return opup;
  }
  
  /* Populates 'rmps' with an array of remote maintenance points reachable by
   * 'cfm'. The number of remote maintenance points is written to 'n_rmps'.
   * 'cfm' retains ownership of the array written to 'rmps' */
  void
-cfm_get_remote_mpids(const struct cfm *cfm, const uint64_t **rmps,
-                     size_t *n_rmps)
+cfm_get_remote_mpids(const struct cfm *cfm, uint64_t **rmps, size_t *n_rmps)
+    OVS_EXCLUDED(mutex)
  {
-    *rmps = cfm->rmps_array;
+    ovs_mutex_lock(&mutex);
+    *rmps = xmemdup(cfm->rmps_array, cfm->rmps_array_len);
      *n_rmps = cfm->rmps_array_len;
+    ovs_mutex_unlock(&mutex);
  }
  
  static struct cfm *
-cfm_find(const char *name)
+cfm_find(const char *name) OVS_REQ_WRLOCK(&mutex)
  {
      struct cfm *cfm;
  
-    HMAP_FOR_EACH_WITH_HASH (cfm, hmap_node, hash_string(name, 0), &all_cfms) {
+    HMAP_FOR_EACH_WITH_HASH (cfm, hmap_node, hash_string(name, 0), all_cfms) {
          if (!strcmp(cfm->name, name)) {
              return cfm;
          }
@@ -799,17 +879,20 @@ cfm_find(const char *name)
  }
  
  static void
-cfm_print_details(struct ds *ds, const struct cfm *cfm)
+cfm_print_details(struct ds *ds, const struct cfm *cfm) OVS_REQ_WRLOCK(&mutex)
  {
      struct remote_mp *rmp;
+    bool extended;
      int fault;
  
+    atomic_read(&cfm->extended, &extended);
+
      ds_put_format(ds, "---- %s ----\n", cfm->name);
      ds_put_format(ds, "MPID %"PRIu64":%s%s\n", cfm->mpid,
-                  cfm->extended ? " extended" : "",
+                  extended ? " extended" : "",
                    cfm->fault_override >= 0 ? " fault_override" : "");
  
-    fault = cfm_get_fault(cfm);
+    fault = cfm_get_fault__(cfm);
      if (fault) {
          ds_put_cstr(ds, "\tfault: ");
          ds_put_cfm_fault(ds, fault);
@@ -840,36 +923,40 @@ cfm_print_details(struct ds *ds, const struct cfm *cfm)
  
  static void
  cfm_unixctl_show(struct unixctl_conn *conn, int argc, const char *argv[],
-                 void *aux OVS_UNUSED)
+                 void *aux OVS_UNUSED) OVS_EXCLUDED(mutex)
  {
      struct ds ds = DS_EMPTY_INITIALIZER;
      const struct cfm *cfm;
  
+    ovs_mutex_lock(&mutex);
      if (argc > 1) {
          cfm = cfm_find(argv[1]);
          if (!cfm) {
              unixctl_command_reply_error(conn, "no such CFM object");
-            return;
+            goto out;
          }
          cfm_print_details(&ds, cfm);
      } else {
-        HMAP_FOR_EACH (cfm, hmap_node, &all_cfms) {
+        HMAP_FOR_EACH (cfm, hmap_node, all_cfms) {
              cfm_print_details(&ds, cfm);
          }
      }
  
      unixctl_command_reply(conn, ds_cstr(&ds));
      ds_destroy(&ds);
+out:
+    ovs_mutex_unlock(&mutex);
  }
  
  static void
  cfm_unixctl_set_fault(struct unixctl_conn *conn, int argc, const char *argv[],
-                      void *aux OVS_UNUSED)
+                      void *aux OVS_UNUSED) OVS_EXCLUDED(mutex)
  {
      const char *fault_str = argv[argc - 1];
      int fault_override;
      struct cfm *cfm;
  
+    ovs_mutex_lock(&mutex);
      if (!strcasecmp("true", fault_str)) {
          fault_override = 1;
      } else if (!strcasecmp("false", fault_str)) {
@@ -878,21 +965,24 @@ cfm_unixctl_set_fault(struct unixctl_conn *conn, int argc, const char *argv[],
          fault_override = -1;
      } else {
          unixctl_command_reply_error(conn, "unknown fault string");
-        return;
+        goto out;
      }
  
      if (argc > 2) {
          cfm = cfm_find(argv[1]);
          if (!cfm) {
              unixctl_command_reply_error(conn, "no such CFM object");
-            return;
+            goto out;
          }
          cfm->fault_override = fault_override;
      } else {
-        HMAP_FOR_EACH (cfm, hmap_node, &all_cfms) {
+        HMAP_FOR_EACH (cfm, hmap_node, all_cfms) {
              cfm->fault_override = fault_override;
          }
      }
  
      unixctl_command_reply(conn, "OK");
+
+out:
+    ovs_mutex_unlock(&mutex);
  }
diff --git a/lib/cfm.h b/lib/cfm.h

index 8002f3e..0f3e97c 100644 (file)
--- a/lib/cfm.h
+++ b/lib/cfm.h
@@ -80,8 +80,7 @@ void cfm_process_heartbeat(struct cfm *, const struct ofpbuf *packet);
  int cfm_get_fault(const struct cfm *);
  int cfm_get_health(const struct cfm *);
  int cfm_get_opup(const struct cfm *);
-void cfm_get_remote_mpids(const struct cfm *, const uint64_t **rmps,
-                          size_t *n_rmps);
+void cfm_get_remote_mpids(const struct cfm *, uint64_t **rmps, size_t *n_rmps);
  const char *cfm_fault_reason_to_str(int fault);
  
  #endif /* cfm.h */
diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c

index 27c622a..1b97410 100644 (file)
--- a/lib/dpif-linux.c
+++ b/lib/dpif-linux.c
@@ -799,19 +799,21 @@ dpif_linux_port_poll(const struct dpif *dpif_, char **devnamep)
                      VLOG_DBG("port_changed: dpif:%s vport:%s cmd:%"PRIu8,
                               dpif->dpif.full_name, vport.name, vport.cmd);
                      *devnamep = xstrdup(vport.name);
+                    ofpbuf_uninit(&buf);
                      return 0;
-                } else {
-                    continue;
                  }
              }
-        } else if (error == EAGAIN) {
-            return EAGAIN;
+        } else if (error != EAGAIN) {
+            VLOG_WARN_RL(&rl, "error reading or parsing netlink (%s)",
+                         ovs_strerror(error));
+            nl_sock_drain(dpif->port_notifier);
+            error = ENOBUFS;
          }
  
-        VLOG_WARN_RL(&rl, "error reading or parsing netlink (%s)",
-                     ovs_strerror(error));
-        nl_sock_drain(dpif->port_notifier);
-        return ENOBUFS;
+        ofpbuf_uninit(&buf);
+        if (error) {
+            return error;
+        }
      }
  }
  
diff --git a/lib/dpif.c b/lib/dpif.c

index 9c5cf3d..1c1a524 100644 (file)
--- a/lib/dpif.c
+++ b/lib/dpif.c
@@ -1352,7 +1352,7 @@ log_flow_message(const struct dpif *dpif, int error, const char *operation,
      if (error) {
          ds_put_format(&ds, "(%s) ", ovs_strerror(error));
      }
-    odp_flow_format(key, key_len, mask, mask_len, &ds);
+    odp_flow_format(key, key_len, mask, mask_len, &ds, true);
      if (stats) {
          ds_put_cstr(&ds, ", ");
          dpif_flow_stats_format(stats, &ds);
diff --git a/lib/learning-switch.c b/lib/learning-switch.c

index 872e58d..9c1aff7 100644 (file)
--- a/lib/learning-switch.c
+++ b/lib/learning-switch.c
@@ -251,7 +251,9 @@ lswitch_run(struct lswitch *sw)
      int i;
  
      if (sw->ml) {
-        mac_learning_run(sw->ml, NULL);
+        ovs_rwlock_wrlock(&sw->ml->rwlock);
+        mac_learning_run(sw->ml);
+        ovs_rwlock_unlock(&sw->ml->rwlock);
      }
  
      rconn_run(sw->rconn);
@@ -283,7 +285,9 @@ void
  lswitch_wait(struct lswitch *sw)
  {
      if (sw->ml) {
+        ovs_rwlock_rdlock(&sw->ml->rwlock);
          mac_learning_wait(sw->ml);
+        ovs_rwlock_unlock(&sw->ml->rwlock);
      }
      rconn_run_wait(sw->rconn);
      rconn_recv_wait(sw->rconn);
@@ -472,18 +476,19 @@ lswitch_choose_destination(struct lswitch *sw, const struct flow *flow)
      ofp_port_t out_port;
  
      /* Learn the source MAC. */
+    ovs_rwlock_wrlock(&sw->ml->rwlock);
      if (mac_learning_may_learn(sw->ml, flow->dl_src, 0)) {
          struct mac_entry *mac = mac_learning_insert(sw->ml, flow->dl_src, 0);
-        if (mac_entry_is_new(mac)
-            || mac->port.ofp_port != flow->in_port.ofp_port) {
+        if (mac->port.ofp_port != flow->in_port.ofp_port) {
              VLOG_DBG_RL(&rl, "%016llx: learned that "ETH_ADDR_FMT" is on "
                          "port %"PRIu16, sw->datapath_id,
                          ETH_ADDR_ARGS(flow->dl_src), flow->in_port.ofp_port);
  
              mac->port.ofp_port = flow->in_port.ofp_port;
-            mac_learning_changed(sw->ml, mac);
+            mac_learning_changed(sw->ml);
          }
      }
+    ovs_rwlock_unlock(&sw->ml->rwlock);
  
      /* Drop frames for reserved multicast addresses. */
      if (eth_addr_is_reserved(flow->dl_dst)) {
@@ -494,14 +499,17 @@ lswitch_choose_destination(struct lswitch *sw, const struct flow *flow)
      if (sw->ml) {
          struct mac_entry *mac;
  
-        mac = mac_learning_lookup(sw->ml, flow->dl_dst, 0, NULL);
+        ovs_rwlock_rdlock(&sw->ml->rwlock);
+        mac = mac_learning_lookup(sw->ml, flow->dl_dst, 0);
          if (mac) {
              out_port = mac->port.ofp_port;
              if (out_port == flow->in_port.ofp_port) {
                  /* Don't send a packet back out its input port. */
+                ovs_rwlock_unlock(&sw->ml->rwlock);
                  return OFPP_NONE;
              }
          }
+        ovs_rwlock_unlock(&sw->ml->rwlock);
      }
  
      /* Check if we need to use "NORMAL" action. */
diff --git a/lib/mac-learning.c b/lib/mac-learning.c

index e2ca02b..80dac69 100644 (file)
--- a/lib/mac-learning.c
+++ b/lib/mac-learning.c
@@ -25,7 +25,6 @@
  #include "hash.h"
  #include "list.h"
  #include "poll-loop.h"
-#include "tag.h"
  #include "timeval.h"
  #include "unaligned.h"
  #include "util.h"
@@ -60,16 +59,6 @@ mac_entry_from_lru_node(struct list *list)
      return CONTAINER_OF(list, struct mac_entry, lru_node);
  }
  
-/* Returns a tag that represents that 'mac' is on an unknown port in 'vlan'.
- * (When we learn where 'mac' is in 'vlan', this allows flows that were
- * flooded to be revalidated.) */
-static tag_type
-make_unknown_mac_tag(const struct mac_learning *ml,
-                     const uint8_t mac[ETH_ADDR_LEN], uint16_t vlan)
-{
-    return tag_create_deterministic(mac_table_hash(ml, mac, vlan));
-}
-
  static struct mac_entry *
  mac_entry_lookup(const struct mac_learning *ml,
                   const uint8_t mac[ETH_ADDR_LEN], uint16_t vlan)
@@ -90,6 +79,7 @@ mac_entry_lookup(const struct mac_learning *ml,
   * and return false. */
  static bool
  get_lru(struct mac_learning *ml, struct mac_entry **e)
+    OVS_REQ_RDLOCK(ml->rwlock)
  {
      if (!list_is_empty(&ml->lrus)) {
          *e = mac_entry_from_lru_node(ml->lrus.next);
@@ -123,8 +113,9 @@ mac_learning_create(unsigned int idle_time)
      ml->flood_vlans = NULL;
      ml->idle_time = normalize_idle_time(idle_time);
      ml->max_entries = MAC_DEFAULT_MAX;
-    tag_set_init(&ml->tags);
-    ml->ref_cnt = 1;
+    ml->need_revalidate = false;
+    atomic_init(&ml->ref_cnt, 1);
+    ovs_rwlock_init(&ml->rwlock);
      return ml;
  }
  
@@ -133,8 +124,9 @@ mac_learning_ref(const struct mac_learning *ml_)
  {
      struct mac_learning *ml = CONST_CAST(struct mac_learning *, ml_);
      if (ml) {
-        ovs_assert(ml->ref_cnt > 0);
-        ml->ref_cnt++;
+        int orig;
+        atomic_add(&ml->ref_cnt, 1, &orig);
+        ovs_assert(orig > 0);
      }
      return ml;
  }
@@ -143,12 +135,15 @@ mac_learning_ref(const struct mac_learning *ml_)
  void
  mac_learning_unref(struct mac_learning *ml)
  {
+    int orig;
+
      if (!ml) {
          return;
      }
  
-    ovs_assert(ml->ref_cnt > 0);
-    if (!--ml->ref_cnt) {
+    atomic_sub(&ml->ref_cnt, 1, &orig);
+    ovs_assert(orig > 0);
+    if (orig == 1) {
          struct mac_entry *e, *next;
  
          HMAP_FOR_EACH_SAFE (e, next, hmap_node, &ml->table) {
@@ -158,6 +153,7 @@ mac_learning_unref(struct mac_learning *ml)
          hmap_destroy(&ml->table);
  
          bitmap_free(ml->flood_vlans);
+        ovs_rwlock_destroy(&ml->rwlock);
          free(ml);
      }
  }
@@ -250,8 +246,8 @@ mac_learning_insert(struct mac_learning *ml,
          hmap_insert(&ml->table, &e->hmap_node, hash);
          memcpy(e->mac, src_mac, ETH_ADDR_LEN);
          e->vlan = vlan;
-        e->tag = 0;
          e->grat_arp_lock = TIME_MIN;
+        e->port.p = NULL;
      } else {
          list_remove(&e->lru_node);
      }
@@ -272,14 +268,10 @@ mac_learning_insert(struct mac_learning *ml,
   * from mac_learning_insert(), if the entry is either new or if its learned
   * port has changed. */
  void
-mac_learning_changed(struct mac_learning *ml, struct mac_entry *e)
+mac_learning_changed(struct mac_learning *ml)
  {
-    tag_type tag = e->tag ? e->tag : make_unknown_mac_tag(ml, e->mac, e->vlan);
-
      COVERAGE_INC(mac_learning_learned);
-
-    e->tag = tag_create_random();
-    tag_set_add(&ml->tags, tag);
+    ml->need_revalidate = true;
  }
  
  /* Looks up MAC 'dst' for VLAN 'vlan' in 'ml' and returns the associated MAC
@@ -288,8 +280,7 @@ mac_learning_changed(struct mac_learning *ml, struct mac_entry *e)
   * '*tag'. */
  struct mac_entry *
  mac_learning_lookup(const struct mac_learning *ml,
-                    const uint8_t dst[ETH_ADDR_LEN], uint16_t vlan,
-                    tag_type *tag)
+                    const uint8_t dst[ETH_ADDR_LEN], uint16_t vlan)
  {
      if (eth_addr_is_multicast(dst)) {
          /* No tag because the treatment of multicast destinations never
@@ -302,11 +293,7 @@ mac_learning_lookup(const struct mac_learning *ml,
      } else {
          struct mac_entry *e = mac_entry_lookup(ml, dst, vlan);
  
-        ovs_assert(e == NULL || e->tag != 0);
-        if (tag) {
-            /* Tag either the learned port or the lack thereof. */
-            *tag |= e ? e->tag : make_unknown_mac_tag(ml, dst, vlan);
-        }
+        ovs_assert(e == NULL || e->port.p != NULL)
          return e;
      }
  }
@@ -325,44 +312,42 @@ mac_learning_expire(struct mac_learning *ml, struct mac_entry *e)
   * is responsible for revalidating any flows that depend on 'ml', if
   * necessary. */
  void
-mac_learning_flush(struct mac_learning *ml, struct tag_set *tags)
+mac_learning_flush(struct mac_learning *ml)
  {
      struct mac_entry *e;
      while (get_lru(ml, &e)){
-        if (tags) {
-            tag_set_add(tags, e->tag);
-        }
+        ml->need_revalidate = true;
          mac_learning_expire(ml, e);
      }
      hmap_shrink(&ml->table);
  }
  
-void
-mac_learning_run(struct mac_learning *ml, struct tag_set *set)
+/* Does periodic work required by 'ml'.  Returns true if something changed that
+ * may require flow revalidation. */
+bool
+mac_learning_run(struct mac_learning *ml)
  {
+    bool need_revalidate;
      struct mac_entry *e;
  
-    if (set) {
-        tag_set_union(set, &ml->tags);
-    }
-    tag_set_init(&ml->tags);
-
      while (get_lru(ml, &e)
             && (hmap_count(&ml->table) > ml->max_entries
                 || time_now() >= e->expires)) {
          COVERAGE_INC(mac_learning_expired);
-        if (set) {
-            tag_set_add(set, e->tag);
-        }
+        ml->need_revalidate = true;
          mac_learning_expire(ml, e);
      }
+
+    need_revalidate = ml->need_revalidate;
+    ml->need_revalidate = false;
+    return need_revalidate;
  }
  
  void
  mac_learning_wait(struct mac_learning *ml)
  {
      if (hmap_count(&ml->table) > ml->max_entries
-        || !tag_set_is_empty(&ml->tags)) {
+        || ml->need_revalidate) {
          poll_immediate_wake();
      } else if (!list_is_empty(&ml->lrus)) {
          struct mac_entry *e = mac_entry_from_lru_node(ml->lrus.next);
diff --git a/lib/mac-learning.h b/lib/mac-learning.h

index 06e99f3..ba7f734 100644 (file)
--- a/lib/mac-learning.h
+++ b/lib/mac-learning.h
@@ -20,8 +20,9 @@
  #include <time.h>
  #include "hmap.h"
  #include "list.h"
+#include "ovs-atomic.h"
+#include "ovs-thread.h"
  #include "packets.h"
-#include "tag.h"
  #include "timeval.h"
  
  struct mac_learning;
@@ -36,32 +37,26 @@ struct mac_learning;
   * relearning based on a reflection from a bond slave. */
  #define MAC_GRAT_ARP_LOCK_TIME 5
  
-/* A MAC learning table entry. */
+/* A MAC learning table entry.
+ * Guarded by owning 'mac_learning''s rwlock */
  struct mac_entry {
      struct hmap_node hmap_node; /* Node in a mac_learning hmap. */
-    struct list lru_node;       /* Element in 'lrus' list. */
      time_t expires;             /* Expiration time. */
      time_t grat_arp_lock;       /* Gratuitous ARP lock expiration time. */
      uint8_t mac[ETH_ADDR_LEN];  /* Known MAC address. */
      uint16_t vlan;              /* VLAN tag. */
-    tag_type tag;               /* Tag for this learning entry. */
+
+    /* The following are marked guarded to prevent users from iterating over or
+     * accessing a mac_entry without hodling the parent mac_learning rwlock. */
+    struct list lru_node OVS_GUARDED; /* Element in 'lrus' list. */
  
      /* Learned port. */
      union {
          void *p;
          ofp_port_t ofp_port;
-    } port;
+    } port OVS_GUARDED;
  };
  
-int mac_entry_age(const struct mac_learning *, const struct mac_entry *);
-
-/* Returns true if mac_learning_insert() just created 'mac' and the caller has
- * not yet properly initialized it. */
-static inline bool mac_entry_is_new(const struct mac_entry *mac)
-{
-    return !mac->tag;
-}
-
  /* Sets a gratuitous ARP lock on 'mac' that will expire in
   * MAC_GRAT_ARP_LOCK_TIME seconds. */
  static inline void mac_entry_set_grat_arp_lock(struct mac_entry *mac)
@@ -79,46 +74,59 @@ static inline bool mac_entry_is_grat_arp_locked(const struct mac_entry *mac)
  /* MAC learning table. */
  struct mac_learning {
      struct hmap table;          /* Learning table. */
-    struct list lrus;           /* In-use entries, least recently used at the
-                                   front, most recently used at the back. */
+    struct list lrus OVS_GUARDED; /* In-use entries, least recently used at the
+                                     front, most recently used at the back. */
      uint32_t secret;            /* Secret for randomizing hash table. */
      unsigned long *flood_vlans; /* Bitmap of learning disabled VLANs. */
      unsigned int idle_time;     /* Max age before deleting an entry. */
      size_t max_entries;         /* Max number of learned MACs. */
-    struct tag_set tags;        /* Tags which have changed. */
-    int ref_cnt;
+    atomic_int ref_cnt;
+    struct ovs_rwlock rwlock;
+    bool need_revalidate;
  };
  
+int mac_entry_age(const struct mac_learning *ml, const struct mac_entry *e)
+    OVS_REQ_RDLOCK(ml->rwlock);
+
  /* Basics. */
  struct mac_learning *mac_learning_create(unsigned int idle_time);
  struct mac_learning *mac_learning_ref(const struct mac_learning *);
  void mac_learning_unref(struct mac_learning *);
  
-void mac_learning_run(struct mac_learning *, struct tag_set *);
-void mac_learning_wait(struct mac_learning *);
+bool mac_learning_run(struct mac_learning *ml) OVS_REQ_WRLOCK(ml->rwlock);
+void mac_learning_wait(struct mac_learning *ml)
+    OVS_REQ_RDLOCK(ml->rwlock);
  
  /* Configuration. */
-bool mac_learning_set_flood_vlans(struct mac_learning *,
-                                  const unsigned long *bitmap);
-void mac_learning_set_idle_time(struct mac_learning *, unsigned int idle_time);
-void mac_learning_set_max_entries(struct mac_learning *, size_t max_entries);
+bool mac_learning_set_flood_vlans(struct mac_learning *ml,
+                                  const unsigned long *bitmap)
+    OVS_REQ_WRLOCK(ml->rwlock);
+void mac_learning_set_idle_time(struct mac_learning *ml,
+                                unsigned int idle_time)
+    OVS_REQ_WRLOCK(ml->rwlock);
+void mac_learning_set_max_entries(struct mac_learning *ml, size_t max_entries)
+    OVS_REQ_WRLOCK(ml->rwlock);
  
  /* Learning. */
-bool mac_learning_may_learn(const struct mac_learning *,
+bool mac_learning_may_learn(const struct mac_learning *ml,
                              const uint8_t src_mac[ETH_ADDR_LEN],
-                            uint16_t vlan);
-struct mac_entry *mac_learning_insert(struct mac_learning *,
+                            uint16_t vlan)
+    OVS_REQ_RDLOCK(ml->rwlock);
+struct mac_entry *mac_learning_insert(struct mac_learning *ml,
                                        const uint8_t src[ETH_ADDR_LEN],
-                                      uint16_t vlan);
-void mac_learning_changed(struct mac_learning *, struct mac_entry *);
+                                      uint16_t vlan)
+    OVS_REQ_WRLOCK(ml->rwlock);
+void mac_learning_changed(struct mac_learning *ml) OVS_REQ_WRLOCK(ml->rwlock);
  
  /* Lookup. */
-struct mac_entry *mac_learning_lookup(const struct mac_learning *,
+struct mac_entry *mac_learning_lookup(const struct mac_learning *ml,
                                        const uint8_t dst[ETH_ADDR_LEN],
-                                      uint16_t vlan, tag_type *);
+                                      uint16_t vlan)
+    OVS_REQ_RDLOCK(ml->rwlock);
  
  /* Flushing. */
-void mac_learning_expire(struct mac_learning *, struct mac_entry *);
-void mac_learning_flush(struct mac_learning *, struct tag_set *);
+void mac_learning_expire(struct mac_learning *ml, struct mac_entry *e)
+    OVS_REQ_WRLOCK(ml->rwlock);
+void mac_learning_flush(struct mac_learning *ml) OVS_REQ_WRLOCK(ml->rwlock);
  
  #endif /* mac-learning.h */
diff --git a/lib/multipath.c b/lib/multipath.c

index 6c0560d..4b9e4af 100644 (file)
--- a/lib/multipath.c
+++ b/lib/multipath.c
@@ -114,7 +114,7 @@ multipath_execute(const struct ofpact_multipath *mp, struct flow *flow,
                                          mp->max_link + 1, mp->arg);
  
      flow_mask_hash_fields(flow, wc, mp->fields);
-    nxm_reg_load(&mp->dst, link, flow);
+    nxm_reg_load(&mp->dst, link, flow, wc);
  }
  
  static uint16_t
diff --git a/lib/netdev-bsd.c b/lib/netdev-bsd.c

index 401d03a..5c23d38 100644 (file)
--- a/lib/netdev-bsd.c
+++ b/lib/netdev-bsd.c
@@ -83,6 +83,7 @@ struct netdev_bsd {
      int ifindex;
      uint8_t etheraddr[ETH_ADDR_LEN];
      struct in_addr in4;
+    struct in_addr netmask;
      struct in6_addr in6;
      int mtu;
      int carrier;
@@ -254,6 +255,7 @@ netdev_bsd_cache_cb(const struct rtbsd_change *change,
                  dev->cache_valid = 0;
                  netdev_bsd_changed(dev);
              }
+            netdev_close(base_dev);
          }
      } else {
          /*
@@ -266,9 +268,11 @@ netdev_bsd_cache_cb(const struct rtbsd_change *change,
          shash_init(&device_shash);
          netdev_get_devices(&netdev_bsd_class, &device_shash);
          SHASH_FOR_EACH (node, &device_shash) {
-            dev = node->data;
+            struct netdev *netdev = node->data;
+            dev = netdev_bsd_cast(netdev);
              dev->cache_valid = 0;
              netdev_bsd_changed(dev);
+            netdev_close(netdev);
          }
          shash_destroy(&device_shash);
      }
@@ -323,6 +327,7 @@ netdev_bsd_create_system(const struct netdev_class *class, const char *name,
      /* Verify that the netdev really exists by attempting to read its flags */
      error = netdev_get_flags(&netdev->up, &flags);
      if (error == ENXIO) {
+        free(netdev->kernel_name);
          netdev_uninit(&netdev->up, false);
          free(netdev);
          cache_notifier_unref();
@@ -362,14 +367,15 @@ netdev_bsd_create_tap(const struct netdev_class *class, const char *name,
      if (netdev->tap_fd < 0) {
          error = errno;
          VLOG_WARN("opening \"/dev/tap\" failed: %s", ovs_strerror(error));
-        goto error_undef_notifier;
+        goto error_unref_notifier;
      }
  
      /* Retrieve tap name (e.g. tap0) */
      if (ioctl(netdev->tap_fd, TAPGIFNAME, &ifr) == -1) {
          /* XXX Need to destroy the device? */
          error = errno;
-        goto error_undef_notifier;
+        close(netdev->tap_fd);
+        goto error_unref_notifier;
      }
  
      /* Change the name of the tap device */
@@ -378,7 +384,7 @@ netdev_bsd_create_tap(const struct netdev_class *class, const char *name,
      if (ioctl(af_inet_sock, SIOCSIFNAME, &ifr) == -1) {
          error = errno;
          destroy_tap(netdev->tap_fd, ifr.ifr_name);
-        goto error_undef_notifier;
+        goto error_unref_notifier;
      }
      kernel_name = xstrdup(name);
  #else
@@ -393,7 +399,7 @@ netdev_bsd_create_tap(const struct netdev_class *class, const char *name,
      error = set_nonblocking(netdev->tap_fd);
      if (error) {
          destroy_tap(netdev->tap_fd, kernel_name);
-        goto error_undef_notifier;
+        goto error_unref_notifier;
      }
  
      /* Turn device UP */
@@ -402,7 +408,7 @@ netdev_bsd_create_tap(const struct netdev_class *class, const char *name,
      if (ioctl(af_inet_sock, SIOCSIFFLAGS, &ifr) == -1) {
          error = errno;
          destroy_tap(netdev->tap_fd, kernel_name);
-        goto error_undef_notifier;
+        goto error_unref_notifier;
      }
  
      /* initialize the device structure and
@@ -413,7 +419,7 @@ netdev_bsd_create_tap(const struct netdev_class *class, const char *name,
  
      return 0;
  
-error_undef_notifier:
+error_unref_notifier:
      cache_notifier_unref();
  error:
      free(netdev);
@@ -1096,8 +1102,8 @@ cleanup:
  }
  
  /*
- * If 'netdev' has an assigned IPv4 address, sets '*in4' to that address (if
- * 'in4' is non-null) and returns true.  Otherwise, returns false.
+ * If 'netdev' has an assigned IPv4 address, sets '*in4' to that address and
+ * '*netmask' to its netmask and returns true.  Otherwise, returns false.
   */
  static int
  netdev_bsd_get_in4(const struct netdev *netdev_, struct in_addr *in4,
@@ -1119,15 +1125,16 @@ netdev_bsd_get_in4(const struct netdev *netdev_, struct in_addr *in4,
  
          sin = (struct sockaddr_in *) &ifr.ifr_addr;
          netdev->in4 = sin->sin_addr;
-        netdev->cache_valid |= VALID_IN4;
          error = netdev_bsd_do_ioctl(netdev_get_kernel_name(netdev_), &ifr,
                                      SIOCGIFNETMASK, "SIOCGIFNETMASK");
          if (error) {
              return error;
          }
-        *netmask = ((struct sockaddr_in*)&ifr.ifr_addr)->sin_addr;
+        netdev->netmask = sin->sin_addr;
+        netdev->cache_valid |= VALID_IN4;
      }
      *in4 = netdev->in4;
+    *netmask = netdev->netmask;
  
      return in4->s_addr == INADDR_ANY ? EADDRNOTAVAIL : 0;
  }
@@ -1146,11 +1153,14 @@ netdev_bsd_set_in4(struct netdev *netdev_, struct in_addr addr,
  
      error = do_set_addr(netdev_, SIOCSIFADDR, "SIOCSIFADDR", addr);
      if (!error) {
-        netdev->cache_valid |= VALID_IN4;
-        netdev->in4 = addr;
          if (addr.s_addr != INADDR_ANY) {
              error = do_set_addr(netdev_, SIOCSIFNETMASK,
                                  "SIOCSIFNETMASK", mask);
+            if (!error) {
+                netdev->cache_valid |= VALID_IN4;
+                netdev->in4 = addr;
+                netdev->netmask = mask;
+            }
          }
          netdev_bsd_changed(netdev);
      }
@@ -1192,36 +1202,28 @@ netdev_bsd_get_in6(const struct netdev *netdev_, struct in6_addr *in6)
  }
  
  #if defined(__NetBSD__)
-static struct netdev *
-find_netdev_by_kernel_name(const char *kernel_name)
+static char *
+netdev_bsd_kernel_name_to_ovs_name(const char *kernel_name)
  {
+    char *ovs_name = NULL;
      struct shash device_shash;
      struct shash_node *node;
  
      shash_init(&device_shash);
      netdev_get_devices(&netdev_tap_class, &device_shash);
      SHASH_FOR_EACH(node, &device_shash) {
-        struct netdev_bsd * const dev = node->data;
+        struct netdev *netdev = node->data;
+        struct netdev_bsd * const dev = netdev_bsd_cast(netdev);
  
          if (!strcmp(dev->kernel_name, kernel_name)) {
-            shash_destroy(&device_shash);
-            return &dev->up;
+            free(ovs_name);
+            ovs_name = xstrdup(netdev_get_name(&dev->up));
          }
+        netdev_close(netdev);
      }
      shash_destroy(&device_shash);
-    return NULL;
-}
  
-static const char *
-netdev_bsd_convert_kernel_name_to_ovs_name(const char *kernel_name)
-{
-    const struct netdev * const netdev =
-      find_netdev_by_kernel_name(kernel_name);
-
-    if (netdev == NULL) {
-        return NULL;
-    }
-    return netdev_get_name(netdev);
+    return ovs_name ? ovs_name : xstrdup(kernel_name);
  }
  #endif
  
@@ -1303,18 +1305,10 @@ netdev_bsd_get_next_hop(const struct in_addr *host OVS_UNUSED,
              if ((i == RTA_IFP) && sa->sa_family == AF_LINK) {
                  const struct sockaddr_dl * const sdl =
                    (const struct sockaddr_dl *)sa;
-                const size_t nlen = sdl->sdl_nlen;
-                char * const kernel_name = xmalloc(nlen + 1);
-                const char *name;
-
-                memcpy(kernel_name, sdl->sdl_data, nlen);
-                kernel_name[nlen] = 0;
-                name = netdev_bsd_convert_kernel_name_to_ovs_name(kernel_name);
-                if (name == NULL) {
-                    ifname = xstrdup(kernel_name);
-                } else {
-                    ifname = xstrdup(name);
-                }
+                char *kernel_name;
+
+                kernel_name = xmemdup0(sdl->sdl_data, sdl->sdl_nlen);
+                ifname = netdev_bsd_kernel_name_to_ovs_name(kernel_name);
                  free(kernel_name);
              }
              RT_ADVANCE(cp, sa);
diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c

index c4f58b7..e7dfe9f 100644 (file)
--- a/lib/netdev-dummy.c
+++ b/lib/netdev-dummy.c
@@ -351,7 +351,7 @@ netdev_rx_dummy_recv(struct netdev_rx *rx_, void *buffer, size_t size)
  {
      struct netdev_rx_dummy *rx = netdev_rx_dummy_cast(rx_);
      struct ofpbuf *packet;
-    size_t packet_size;
+    int retval;
  
      if (list_is_empty(&rx->recv_queue)) {
          return -EAGAIN;
@@ -359,15 +359,15 @@ netdev_rx_dummy_recv(struct netdev_rx *rx_, void *buffer, size_t size)
  
      packet = ofpbuf_from_list(list_pop_front(&rx->recv_queue));
      rx->recv_queue_len--;
-    if (packet->size > size) {
-        return -EMSGSIZE;
+    if (packet->size <= size) {
+        memcpy(buffer, packet->data, packet->size);
+        retval = packet->size;
+    } else {
+        retval = -EMSGSIZE;
      }
-    packet_size = packet->size;
-
-    memcpy(buffer, packet->data, packet->size);
      ofpbuf_delete(packet);
  
-    return packet_size;
+    return retval;
  }
  
  static void
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c

index 301a754..c59f590 100644 (file)
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -120,10 +120,6 @@ enum {
      VALID_DRVINFO           = 1 << 7,
      VALID_FEATURES          = 1 << 8,
  };
-
-struct tap_state {
-    int fd;
-};
  \f
  /* Traffic control. */
  
@@ -359,7 +355,6 @@ static int tc_calc_buffer(unsigned int Bps, int mtu, uint64_t burst_bytes);
  struct netdev_linux {
      struct netdev up;
  
-    struct shash_node *shash_node;
      unsigned int cache_valid;
      unsigned int change_seq;
  
@@ -389,14 +384,12 @@ struct netdev_linux {
      enum netdev_features current;    /* Cached from ETHTOOL_GSET. */
      enum netdev_features advertised; /* Cached from ETHTOOL_GSET. */
      enum netdev_features supported;  /* Cached from ETHTOOL_GSET. */
-    enum netdev_features peer;       /* Cached from ETHTOOL_GSET. */
  
      struct ethtool_drvinfo drvinfo;  /* Cached from ETHTOOL_GDRVINFO. */
      struct tc *tc;
  
-    union {
-        struct tap_state tap;
-    } state;
+    /* For devices of class netdev_tap_class only. */
+    int tap_fd;
  };
  
  struct netdev_rx_linux {
@@ -544,11 +537,11 @@ static void
  netdev_linux_cache_cb(const struct rtnetlink_link_change *change,
                        void *aux OVS_UNUSED)
  {
-    struct netdev_linux *dev;
      if (change) {
          struct netdev *base_dev = netdev_from_name(change->ifname);
          if (base_dev && is_netdev_linux_class(netdev_get_class(base_dev))) {
              netdev_linux_update(netdev_linux_cast(base_dev), change);
+            netdev_close(base_dev);
          }
      } else {
          struct shash device_shash;
@@ -557,12 +550,13 @@ netdev_linux_cache_cb(const struct rtnetlink_link_change *change,
          shash_init(&device_shash);
          netdev_get_devices(&netdev_linux_class, &device_shash);
          SHASH_FOR_EACH (node, &device_shash) {
+            struct netdev *netdev = node->data;
+            struct netdev_linux *dev = netdev_linux_cast(netdev);
              unsigned int flags;
  
-            dev = node->data;
-
              get_flags(&dev->up, &flags);
              netdev_linux_changed(dev, flags, 0);
+            netdev_close(netdev);
          }
          shash_destroy(&device_shash);
      }
@@ -644,13 +638,12 @@ netdev_linux_create_tap(const struct netdev_class *class OVS_UNUSED,
                          const char *name, struct netdev **netdevp)
  {
      struct netdev_linux *netdev;
-    struct tap_state *state;
      static const char tap_dev[] = "/dev/net/tun";
      struct ifreq ifr;
      int error;
  
      netdev = xzalloc(sizeof *netdev);
-    state = &netdev->state.tap;
+    netdev->change_seq = 1;
  
      error = cache_notifier_ref();
      if (error) {
@@ -658,8 +651,8 @@ netdev_linux_create_tap(const struct netdev_class *class OVS_UNUSED,
      }
  
      /* Open tap device. */
-    state->fd = open(tap_dev, O_RDWR);
-    if (state->fd < 0) {
+    netdev->tap_fd = open(tap_dev, O_RDWR);
+    if (netdev->tap_fd < 0) {
          error = errno;
          VLOG_WARN("opening \"%s\" failed: %s", tap_dev, ovs_strerror(error));
          goto error_unref_notifier;
@@ -668,23 +661,25 @@ netdev_linux_create_tap(const struct netdev_class *class OVS_UNUSED,
      /* Create tap device. */
      ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
      ovs_strzcpy(ifr.ifr_name, name, sizeof ifr.ifr_name);
-    if (ioctl(state->fd, TUNSETIFF, &ifr) == -1) {
+    if (ioctl(netdev->tap_fd, TUNSETIFF, &ifr) == -1) {
          VLOG_WARN("%s: creating tap device failed: %s", name,
                    ovs_strerror(errno));
          error = errno;
-        goto error_unref_notifier;
+        goto error_close;
      }
  
      /* Make non-blocking. */
-    error = set_nonblocking(state->fd);
+    error = set_nonblocking(netdev->tap_fd);
      if (error) {
-        goto error_unref_notifier;
+        goto error_close;
      }
  
      netdev_init(&netdev->up, name, &netdev_tap_class);
      *netdevp = &netdev->up;
      return 0;
  
+error_close:
+    close(netdev->tap_fd);
  error_unref_notifier:
      cache_notifier_unref();
  error:
@@ -692,17 +687,6 @@ error:
      return error;
  }
  
-static void
-destroy_tap(struct netdev_linux *netdev)
-{
-    struct tap_state *state = &netdev->state.tap;
-
-    if (state->fd >= 0) {
-        close(state->fd);
-    }
-}
-
-/* Destroys the netdev device 'netdev_'. */
  static void
  netdev_linux_destroy(struct netdev *netdev_)
  {
@@ -712,8 +696,10 @@ netdev_linux_destroy(struct netdev *netdev_)
          netdev->tc->ops->tc_destroy(netdev->tc);
      }
  
-    if (netdev_get_class(netdev_) == &netdev_tap_class) {
-        destroy_tap(netdev);
+    if (netdev_get_class(netdev_) == &netdev_tap_class
+        && netdev->tap_fd >= 0)
+    {
+        close(netdev->tap_fd);
      }
      free(netdev);
  
@@ -730,7 +716,7 @@ netdev_linux_rx_open(struct netdev *netdev_, struct netdev_rx **rxp)
      int fd;
  
      if (is_tap) {
-        fd = netdev->state.tap.fd;
+        fd = netdev->tap_fd;
      } else {
          struct sockaddr_ll sll;
          int ifindex;
@@ -920,7 +906,7 @@ netdev_linux_send(struct netdev *netdev_, const void *data, size_t size)
               * because we attach a socket filter to the rx socket. */
              struct netdev_linux *netdev = netdev_linux_cast(netdev_);
  
-            retval = write(netdev->state.tap.fd, data, size);
+            retval = write(netdev->tap_fd, data, size);
          }
  
          if (retval < 0) {
@@ -1191,10 +1177,12 @@ netdev_linux_miimon_run(void)
      shash_init(&device_shash);
      netdev_get_devices(&netdev_linux_class, &device_shash);
      SHASH_FOR_EACH (node, &device_shash) {
-        struct netdev_linux *dev = node->data;
+        struct netdev *netdev = node->data;
+        struct netdev_linux *dev = netdev_linux_cast(netdev);
          bool miimon;
  
          if (dev->miimon_interval <= 0 || !timer_expired(&dev->miimon_timer)) {
+            netdev_close(netdev);
              continue;
          }
  
@@ -1205,6 +1193,7 @@ netdev_linux_miimon_run(void)
          }
  
          timer_set_duration(&dev->miimon_timer, dev->miimon_interval);
+        netdev_close(netdev);
      }
  
      shash_destroy(&device_shash);
@@ -1219,11 +1208,13 @@ netdev_linux_miimon_wait(void)
      shash_init(&device_shash);
      netdev_get_devices(&netdev_linux_class, &device_shash);
      SHASH_FOR_EACH (node, &device_shash) {
-        struct netdev_linux *dev = node->data;
+        struct netdev *netdev = node->data;
+        struct netdev_linux *dev = netdev_linux_cast(netdev);
  
          if (dev->miimon_interval > 0) {
              timer_wait(&dev->miimon_timer);
          }
+        netdev_close(netdev);
      }
      shash_destroy(&device_shash);
  }
@@ -1418,8 +1409,7 @@ netdev_linux_get_stats(const struct netdev *netdev_,
  /* Retrieves current device stats for 'netdev-tap' netdev or
   * netdev-internal. */
  static int
-netdev_tap_get_stats(const struct netdev *netdev_,
-                        struct netdev_stats *stats)
+netdev_tap_get_stats(const struct netdev *netdev_, struct netdev_stats *stats)
  {
      struct netdev_linux *netdev = netdev_linux_cast(netdev_);
      struct netdev_stats dev_stats;
@@ -1644,18 +1634,14 @@ netdev_linux_read_features(struct netdev_linux *netdev)
          netdev->current |= NETDEV_F_AUTONEG;
      }
  
-    /* Peer advertisements. */
-    netdev->peer = 0;                  /* XXX */
-
  out:
      netdev->cache_valid |= VALID_FEATURES;
      netdev->get_features_error = error;
  }
  
-/* Stores the features supported by 'netdev' into each of '*current',
- * '*advertised', '*supported', and '*peer' that are non-null.  Each value is a
- * bitmap of NETDEV_* bits.  Returns 0 if successful, otherwise a positive
- * errno value. */
+/* Stores the features supported by 'netdev' into of '*current', '*advertised',
+ * '*supported', and '*peer'.  Each value is a bitmap of NETDEV_* bits.
+ * Returns 0 if successful, otherwise a positive errno value. */
  static int
  netdev_linux_get_features(const struct netdev *netdev_,
                            enum netdev_features *current,
@@ -1671,7 +1657,7 @@ netdev_linux_get_features(const struct netdev *netdev_,
          *current = netdev->current;
          *advertised = netdev->advertised;
          *supported = netdev->supported;
-        *peer = netdev->peer;
+        *peer = 0;              /* XXX */
      }
      return netdev->get_features_error;
  }
diff --git a/lib/netdev-linux.h b/lib/netdev-linux.h

index e404e46..7874dd6 100644 (file)
--- a/lib/netdev-linux.h
+++ b/lib/netdev-linux.h
@@ -24,8 +24,6 @@
   * Linux-specific code. */
  
  struct netdev;
-struct netdev_stats;
-struct rtnl_link_stats;
  
  int netdev_linux_ethtool_set_flag(struct netdev *netdev, uint32_t flag,
                                    const char *flag_name, bool enable);
diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h

index 9af0398..b705f89 100644 (file)
--- a/lib/netdev-provider.h
+++ b/lib/netdev-provider.h
@@ -50,12 +50,6 @@ struct netdev *netdev_from_name(const char *name);
  void netdev_get_devices(const struct netdev_class *,
                          struct shash *device_list);
  
-static inline void netdev_assert_class(const struct netdev *netdev,
-                                           const struct netdev_class *class_)
-{
-    ovs_assert(netdev->netdev_class == class_);
-}
-
  /* Network device class structure, to be defined by each implementation of a
   * network device.
   *
diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c

index 4214b38..14b3347 100644 (file)
--- a/lib/netdev-vport.c
+++ b/lib/netdev-vport.c
@@ -413,17 +413,17 @@ set_tunnel_config(struct netdev *dev_, const struct smap *args)
      }
  
      if (tnl_cfg.ipsec) {
-        static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
+        static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER;
          static pid_t pid = 0;
  
-        pthread_mutex_lock(&mutex);
+        ovs_mutex_lock(&mutex);
          if (pid <= 0) {
              char *file_name = xasprintf("%s/%s", ovs_rundir(),
                                          "ovs-monitor-ipsec.pid");
              pid = read_pidfile(file_name);
              free(file_name);
          }
-        pthread_mutex_unlock(&mutex);
+        ovs_mutex_unlock(&mutex);
  
          if (pid < 0) {
              VLOG_ERR("%s: IPsec requires the ovs-monitor-ipsec daemon",
diff --git a/lib/netdev.c b/lib/netdev.c

index a8bbedd..d5a51fa 100644 (file)
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -928,8 +928,7 @@ netdev_arp_lookup(const struct netdev *netdev,
                    ovs_be32 ip, uint8_t mac[ETH_ADDR_LEN])
  {
      int error = (netdev->netdev_class->arp_lookup
-                 ? netdev->netdev_class->arp_lookup(netdev,
-                        ip, mac)
+                 ? netdev->netdev_class->arp_lookup(netdev, ip, mac)
                   : EOPNOTSUPP);
      if (error) {
          memset(mac, 0, ETH_ADDR_LEN);
@@ -954,8 +953,7 @@ netdev_get_carrier(const struct netdev *netdev)
          return true;
      }
  
-    error = netdev->netdev_class->get_carrier(netdev,
-                                                              &carrier);
+    error = netdev->netdev_class->get_carrier(netdev, &carrier);
      if (error) {
          VLOG_DBG("%s: failed to get network device carrier status, assuming "
                   "down: %s", netdev_get_name(netdev), ovs_strerror(error));
@@ -1390,17 +1388,24 @@ netdev_get_class(const struct netdev *netdev)
  
  /* Returns the netdev with 'name' or NULL if there is none.
   *
- * The caller must not free the returned value. */
+ * The caller must free the returned netdev with netdev_close(). */
  struct netdev *
  netdev_from_name(const char *name)
  {
-    return shash_find_data(&netdev_shash, name);
+    struct netdev *netdev;
+
+    netdev = shash_find_data(&netdev_shash, name);
+    if (netdev) {
+        netdev_ref(netdev);
+    }
+
+    return netdev;
  }
  
  /* Fills 'device_list' with devices that match 'netdev_class'.
   *
- * The caller is responsible for initializing and destroying 'device_list'
- * but the contained netdevs must not be freed. */
+ * The caller is responsible for initializing and destroying 'device_list' and
+ * must close each device on the list. */
  void
  netdev_get_devices(const struct netdev_class *netdev_class,
                     struct shash *device_list)
@@ -1410,6 +1415,7 @@ netdev_get_devices(const struct netdev_class *netdev_class,
          struct netdev *dev = node->data;
  
          if (dev->netdev_class == netdev_class) {
+            dev->ref_cnt++;
              shash_add(device_list, node->name, node->data);
          }
      }
@@ -1418,8 +1424,10 @@ netdev_get_devices(const struct netdev_class *netdev_class,
  const char *
  netdev_get_type_from_name(const char *name)
  {
-    const struct netdev *dev = netdev_from_name(name);
-    return dev ? netdev_get_type(dev) : NULL;
+    struct netdev *dev = netdev_from_name(name);
+    const char *type = dev ? netdev_get_type(dev) : NULL;
+    netdev_close(dev);
+    return type;
  }
  \f
  void
diff --git a/lib/nx-match.c b/lib/nx-match.c

index bdb3a2b..940dd9a 100644 (file)
--- a/lib/nx-match.c
+++ b/lib/nx-match.c
@@ -1304,6 +1304,7 @@ nxm_execute_reg_move(const struct ofpact_reg_move *move,
      union mf_value dst_value;
  
      memset(&mask_value, 0xff, sizeof mask_value);
+    mf_write_subfield_flow(&move->dst, &mask_value, &wc->masks);
      mf_write_subfield_flow(&move->src, &mask_value, &wc->masks);
  
      mf_get_value(move->dst.field, flow, &dst_value);
@@ -1322,11 +1323,15 @@ nxm_execute_reg_load(const struct ofpact_reg_load *load, struct flow *flow)
  
  void
  nxm_reg_load(const struct mf_subfield *dst, uint64_t src_data,
-             struct flow *flow)
+             struct flow *flow, struct flow_wildcards *wc)
  {
      union mf_subvalue src_subvalue;
+    union mf_subvalue mask_value;
      ovs_be64 src_data_be = htonll(src_data);
  
+    memset(&mask_value, 0xff, sizeof mask_value);
+    mf_write_subfield_flow(dst, &mask_value, &wc->masks);
+
      bitwise_copy(&src_data_be, sizeof src_data_be, 0,
                   &src_subvalue, sizeof src_subvalue, 0,
                   sizeof src_data_be * 8);
@@ -1479,7 +1484,8 @@ nxm_execute_stack_push(const struct ofpact_stack *push,
  
  void
  nxm_execute_stack_pop(const struct ofpact_stack *pop,
-                      struct flow *flow, struct ofpbuf *stack)
+                      struct flow *flow, struct flow_wildcards *wc,
+                      struct ofpbuf *stack)
  {
      union mf_subvalue *src_value;
  
@@ -1487,6 +1493,10 @@ nxm_execute_stack_pop(const struct ofpact_stack *pop,
  
      /* Only pop if stack is not empty. Otherwise, give warning. */
      if (src_value) {
+        union mf_subvalue mask_value;
+
+        memset(&mask_value, 0xff, sizeof mask_value);
+        mf_write_subfield_flow(&pop->subfield, &mask_value, &wc->masks);
          mf_write_subfield_flow(&pop->subfield, src_value, flow);
      } else {
          if (!VLOG_DROP_WARN(&rl)) {
diff --git a/lib/nx-match.h b/lib/nx-match.h

index a6b7c52..9dcc19a 100644 (file)
--- a/lib/nx-match.h
+++ b/lib/nx-match.h
@@ -87,7 +87,7 @@ void nxm_execute_reg_move(const struct ofpact_reg_move *, struct flow *,
                            struct flow_wildcards *);
  void nxm_execute_reg_load(const struct ofpact_reg_load *, struct flow *);
  void nxm_reg_load(const struct mf_subfield *, uint64_t src_data,
-                  struct flow *);
+                  struct flow *, struct flow_wildcards *);
  
  char *nxm_parse_stack_action(struct ofpact_stack *, const char *)
      WARN_UNUSED_RESULT;
@@ -113,7 +113,8 @@ void nxm_execute_stack_push(const struct ofpact_stack *,
                              const struct flow *, struct flow_wildcards *,
                              struct ofpbuf *);
  void nxm_execute_stack_pop(const struct ofpact_stack *,
-                            struct flow *, struct ofpbuf *);
+                            struct flow *, struct flow_wildcards *,
+                            struct ofpbuf *);
  
  int nxm_field_bytes(uint32_t header);
  int nxm_field_bits(uint32_t header);
diff --git a/lib/odp-util.c b/lib/odp-util.c

index 3c3063d..78d5a1b 100644 (file)
--- a/lib/odp-util.c
+++ b/lib/odp-util.c
@@ -51,7 +51,8 @@ static const char *delimiters = ", \t\r\n";
  static int parse_odp_key_mask_attr(const char *, const struct simap *port_names,
                                struct ofpbuf *, struct ofpbuf *);
  static void format_odp_key_attr(const struct nlattr *a,
-                                const struct nlattr *ma, struct ds *ds);
+                                const struct nlattr *ma, struct ds *ds,
+                                bool verbose);
  
  /* Returns one the following for the action with the given OVS_ACTION_ATTR_*
   * 'type':
@@ -399,7 +400,7 @@ format_odp_action(struct ds *ds, const struct nlattr *a)
          break;
      case OVS_ACTION_ATTR_SET:
          ds_put_cstr(ds, "set(");
-        format_odp_key_attr(nl_attr_get(a), NULL, ds);
+        format_odp_key_attr(nl_attr_get(a), NULL, ds, true);
          ds_put_cstr(ds, ")");
          break;
      case OVS_ACTION_ATTR_PUSH_VLAN:
@@ -897,6 +898,12 @@ tun_key_to_attr(struct ofpbuf *a, const struct flow_tnl *tun_key)
      nl_msg_end_nested(a, tun_key_ofs);
  }
  
+static bool
+odp_mask_attr_is_wildcard(const struct nlattr *ma)
+{
+    return is_all_zeros(nl_attr_get(ma), nl_attr_get_size(ma));
+}
+
  static bool
  odp_mask_attr_is_exact(const struct nlattr *ma)
  {
@@ -929,7 +936,7 @@ odp_mask_attr_is_exact(const struct nlattr *ma)
  
  static void
  format_odp_key_attr(const struct nlattr *a, const struct nlattr *ma,
-                    struct ds *ds)
+                    struct ds *ds, bool verbose)
  {
      struct flow_tnl tun_key;
      enum ovs_key_attr attr = nl_attr_type(a);
@@ -972,9 +979,10 @@ format_odp_key_attr(const struct nlattr *a, const struct nlattr *ma,
      case OVS_KEY_ATTR_ENCAP:
          if (ma && nl_attr_get_size(ma) && nl_attr_get_size(a)) {
              odp_flow_format(nl_attr_get(a), nl_attr_get_size(a),
-                            nl_attr_get(ma), nl_attr_get_size(ma), ds);
+                            nl_attr_get(ma), nl_attr_get_size(ma), ds, verbose);
          } else if (nl_attr_get_size(a)) {
-            odp_flow_format(nl_attr_get(a), nl_attr_get_size(a), NULL, 0, ds);
+            odp_flow_format(nl_attr_get(a), nl_attr_get_size(a), NULL, 0, ds,
+                            verbose);
          }
          break;
  
@@ -1337,7 +1345,7 @@ generate_all_wildcard_mask(struct ofpbuf *ofp, const struct nlattr *key)
  void
  odp_flow_format(const struct nlattr *key, size_t key_len,
                  const struct nlattr *mask, size_t mask_len,
-                struct ds *ds)
+                struct ds *ds, bool verbose)
  {
      if (key_len) {
          const struct nlattr *a;
@@ -1345,22 +1353,35 @@ odp_flow_format(const struct nlattr *key, size_t key_len,
          bool has_ethtype_key = false;
          const struct nlattr *ma = NULL;
          struct ofpbuf ofp;
+        bool first_field = true;
  
          ofpbuf_init(&ofp, 100);
          NL_ATTR_FOR_EACH (a, left, key, key_len) {
-            if (a != key) {
-                ds_put_char(ds, ',');
-            }
-            if (nl_attr_type(a) == OVS_KEY_ATTR_ETHERTYPE) {
+            bool is_nested_attr;
+            bool is_wildcard = false;
+            int attr_type = nl_attr_type(a);
+
+            if (attr_type == OVS_KEY_ATTR_ETHERTYPE) {
                  has_ethtype_key = true;
              }
+
+            is_nested_attr = (odp_flow_key_attr_len(attr_type) == -2);
+
              if (mask && mask_len) {
                  ma = nl_attr_find__(mask, mask_len, nl_attr_type(a));
-                if (!ma) {
+                is_wildcard = ma ? odp_mask_attr_is_wildcard(ma) : true;
+            }
+
+            if (verbose || !is_wildcard  || is_nested_attr) {
+                if (is_wildcard && !ma) {
                      ma = generate_all_wildcard_mask(&ofp, a);
                  }
+                if (!first_field) {
+                    ds_put_char(ds, ',');
+                }
+                format_odp_key_attr(a, ma, ds, verbose);
+                first_field = false;
              }
-            format_odp_key_attr(a, ma, ds);
              ofpbuf_clear(&ofp);
          }
          ofpbuf_uninit(&ofp);
@@ -1395,7 +1416,7 @@ void
  odp_flow_key_format(const struct nlattr *key,
                      size_t key_len, struct ds *ds)
  {
-    odp_flow_format(key, key_len, NULL, 0, ds);
+    odp_flow_format(key, key_len, NULL, 0, ds, true);
  }
  
  static void
@@ -2311,6 +2332,17 @@ ovs_to_odp_frag(uint8_t nw_frag)
            : OVS_FRAG_TYPE_LATER);
  }
  
+static uint8_t
+ovs_to_odp_frag_mask(uint8_t nw_frag_mask)
+{
+    uint8_t frag_mask = ~(OVS_FRAG_TYPE_FIRST | OVS_FRAG_TYPE_LATER);
+
+    frag_mask |= (nw_frag_mask & FLOW_NW_FRAG_ANY) ? OVS_FRAG_TYPE_FIRST : 0;
+    frag_mask |= (nw_frag_mask & FLOW_NW_FRAG_LATER) ? OVS_FRAG_TYPE_LATER : 0;
+
+    return frag_mask;
+}
+
  static void
  odp_flow_key_from_flow__(struct ofpbuf *buf, const struct flow *data,
                           const struct flow *flow, odp_port_t odp_in_port)
@@ -2323,17 +2355,13 @@ odp_flow_key_from_flow__(struct ofpbuf *buf, const struct flow *data,
       * treat 'data' as a mask. */
      is_mask = (data != flow);
  
-    if (flow->skb_priority) {
-        nl_msg_put_u32(buf, OVS_KEY_ATTR_PRIORITY, data->skb_priority);
-    }
+    nl_msg_put_u32(buf, OVS_KEY_ATTR_PRIORITY, data->skb_priority);
  
-    if (flow->tunnel.ip_dst) {
+    if (flow->tunnel.ip_dst || is_mask) {
          tun_key_to_attr(buf, &data->tunnel);
      }
  
-    if (flow->skb_mark) {
-        nl_msg_put_u32(buf, OVS_KEY_ATTR_SKB_MARK, data->skb_mark);
-    }
+    nl_msg_put_u32(buf, OVS_KEY_ATTR_SKB_MARK, data->skb_mark);
  
      /* Add an ingress port attribute if this is a mask or 'odp_in_port'
       * is not the magical value "ODPP_NONE". */
@@ -2391,7 +2419,8 @@ odp_flow_key_from_flow__(struct ofpbuf *buf, const struct flow *data,
          ipv4_key->ipv4_proto = data->nw_proto;
          ipv4_key->ipv4_tos = data->nw_tos;
          ipv4_key->ipv4_ttl = data->nw_ttl;
-        ipv4_key->ipv4_frag = ovs_to_odp_frag(data->nw_frag);
+        ipv4_key->ipv4_frag = is_mask ? ovs_to_odp_frag_mask(data->nw_frag)
+                                      : ovs_to_odp_frag(data->nw_frag);
      } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
          struct ovs_key_ipv6 *ipv6_key;
  
@@ -2403,7 +2432,8 @@ odp_flow_key_from_flow__(struct ofpbuf *buf, const struct flow *data,
          ipv6_key->ipv6_proto = data->nw_proto;
          ipv6_key->ipv6_tclass = data->nw_tos;
          ipv6_key->ipv6_hlimit = data->nw_ttl;
-        ipv6_key->ipv6_frag = ovs_to_odp_frag(flow->nw_frag);
+        ipv6_key->ipv6_frag = is_mask ? ovs_to_odp_frag_mask(data->nw_frag)
+                                      : ovs_to_odp_frag(data->nw_frag);
      } else if (flow->dl_type == htons(ETH_TYPE_ARP) ||
                 flow->dl_type == htons(ETH_TYPE_RARP)) {
          struct ovs_key_arp *arp_key;
diff --git a/lib/odp-util.h b/lib/odp-util.h

index 0c10cfa..7e27888 100644 (file)
--- a/lib/odp-util.h
+++ b/lib/odp-util.h
@@ -94,7 +94,7 @@ enum odp_key_fitness odp_tun_key_from_attr(const struct nlattr *,
  
  void odp_flow_format(const struct nlattr *key, size_t key_len,
                       const struct nlattr *mask, size_t mask_len,
-                     struct ds *);
+                     struct ds *, bool verbose);
  void odp_flow_key_format(const struct nlattr *, size_t, struct ds *);
  int odp_flow_from_string(const char *s,
                           const struct simap *port_names,
diff --git a/lib/ofp-util.h b/lib/ofp-util.h

index f94982d..21311f7 100644 (file)
--- a/lib/ofp-util.h
+++ b/lib/ofp-util.h
@@ -212,6 +212,8 @@ struct ofpbuf *ofputil_make_flow_mod_table_id(bool flow_mod_table_id);
   * The handling of cookies across multiple versions of OpenFlow is a bit
   * confusing.  See DESIGN for the details. */
  struct ofputil_flow_mod {
+    struct list list_node;    /* For queuing flow_mods. */
+
      struct match match;
      unsigned int priority;
  
diff --git a/lib/ovs-atomic-pthreads.c b/lib/ovs-atomic-pthreads.c

index 7e7ef05..a501b82 100644 (file)
--- a/lib/ovs-atomic-pthreads.c
+++ b/lib/ovs-atomic-pthreads.c
@@ -26,10 +26,10 @@ atomic_flag_test_and_set(volatile atomic_flag *flag_)
      atomic_flag *flag = CONST_CAST(atomic_flag *, flag_);
      bool old_value;
  
-    ovs_mutex_lock(&flag->mutex);
+    xpthread_mutex_lock(&flag->mutex);
      old_value = flag->b;
      flag->b = true;
-    ovs_mutex_unlock(&flag->mutex);
+    xpthread_mutex_unlock(&flag->mutex);
  
      return old_value;
  }
@@ -46,9 +46,9 @@ atomic_flag_clear(volatile atomic_flag *flag_)
  {
      atomic_flag *flag = CONST_CAST(atomic_flag *, flag_);
  
-    ovs_mutex_lock(&flag->mutex);
+    xpthread_mutex_lock(&flag->mutex);
      flag->b = false;
-    ovs_mutex_unlock(&flag->mutex);
+    xpthread_mutex_unlock(&flag->mutex);
  }
  
  void
diff --git a/lib/ovs-atomic-pthreads.h b/lib/ovs-atomic-pthreads.h

index 61a9771..2f47a9c 100644 (file)
--- a/lib/ovs-atomic-pthreads.h
+++ b/lib/ovs-atomic-pthreads.h
@@ -144,7 +144,7 @@ atomic_signal_fence(memory_order order OVS_UNUSED)
  
  typedef struct {
      bool b;
-    struct ovs_mutex mutex;
+    pthread_mutex_t mutex;
  } atomic_flag;
  #define ATOMIC_FLAG_INIT { false, PTHREAD_MUTEX_INITIALIZER }
  
diff --git a/lib/ovs-thread.c b/lib/ovs-thread.c

index c8b2c15..4d64b92 100644 (file)
--- a/lib/ovs-thread.c
+++ b/lib/ovs-thread.c
@@ -114,6 +114,8 @@ UNLOCK_FUNCTION(rwlock, destroy);
          }                                               \
      }
  
+XPTHREAD_FUNC1(pthread_mutex_lock, pthread_mutex_t *);
+XPTHREAD_FUNC1(pthread_mutex_unlock, pthread_mutex_t *);
  XPTHREAD_FUNC1(pthread_mutexattr_init, pthread_mutexattr_t *);
  XPTHREAD_FUNC1(pthread_mutexattr_destroy, pthread_mutexattr_t *);
  XPTHREAD_FUNC2(pthread_mutexattr_settype, pthread_mutexattr_t *, int);
diff --git a/lib/ovs-thread.h b/lib/ovs-thread.h

index f5e171a..9b8eeef 100644 (file)
--- a/lib/ovs-thread.h
+++ b/lib/ovs-thread.h
@@ -84,6 +84,11 @@ int ovs_mutex_trylock_at(const struct ovs_mutex *mutex, const char *where)
  
  void ovs_mutex_cond_wait(pthread_cond_t *, const struct ovs_mutex *);
  
+/* Wrappers for pthread_mutex_*() that abort the process on any error.
+ * This is still needed when ovs-atomic-pthreads.h is used. */
+void xpthread_mutex_lock(pthread_mutex_t *mutex);
+void xpthread_mutex_unlock(pthread_mutex_t *mutex);
+
  /* Wrappers for pthread_mutexattr_*() that abort the process on any error. */
  void xpthread_mutexattr_init(pthread_mutexattr_t *);
  void xpthread_mutexattr_destroy(pthread_mutexattr_t *);
diff --git a/lib/packets.h b/lib/packets.h

index e852761..33be891 100644 (file)
--- a/lib/packets.h
+++ b/lib/packets.h
@@ -44,6 +44,9 @@ static const uint8_t eth_addr_stp[ETH_ADDR_LEN] OVS_UNUSED
  static const uint8_t eth_addr_lacp[ETH_ADDR_LEN] OVS_UNUSED
      = { 0x01, 0x80, 0xC2, 0x00, 0x00, 0x02 };
  
+static const uint8_t eth_addr_bfd[ETH_ADDR_LEN] OVS_UNUSED
+    = { 0x00, 0x23, 0x20, 0x00, 0x00, 0x01 };
+
  static inline bool eth_addr_is_broadcast(const uint8_t ea[6])
  {
      return (ea[0] & ea[1] & ea[2] & ea[3] & ea[4] & ea[5]) == 0xff;
diff --git a/lib/stp.c b/lib/stp.c

index 0b32cb5..2ff9df7 100644 (file)
--- a/lib/stp.c
+++ b/lib/stp.c
@@ -140,9 +140,13 @@ struct stp {
      struct stp_port *first_changed_port;
      void (*send_bpdu)(struct ofpbuf *bpdu, int port_no, void *aux);
      void *aux;
+
+    atomic_int ref_cnt;
  };
  
-static struct list all_stps = LIST_INITIALIZER(&all_stps);
+static struct ovs_mutex mutex;
+static struct list all_stps__ = LIST_INITIALIZER(&all_stps__);
+static struct list *const all_stps OVS_GUARDED_BY(&mutex) = &all_stps__;
  
  #define FOR_EACH_ENABLED_PORT(PORT, STP)                        \
      for ((PORT) = stp_next_enabled_port((STP), (STP)->ports);   \
@@ -150,6 +154,7 @@ static struct list all_stps = LIST_INITIALIZER(&all_stps);
           (PORT) = stp_next_enabled_port((STP), (PORT) + 1))
  static struct stp_port *
  stp_next_enabled_port(const struct stp *stp, const struct stp_port *port)
+    OVS_REQ_WRLOCK(mutex)
  {
      for (; port < &stp->ports[ARRAY_SIZE(stp->ports)]; port++) {
          if (port->state != STP_DISABLED) {
@@ -161,42 +166,57 @@ stp_next_enabled_port(const struct stp *stp, const struct stp_port *port)
  
  #define MESSAGE_AGE_INCREMENT 1
  
-static void stp_transmit_config(struct stp_port *);
+static void stp_transmit_config(struct stp_port *) OVS_REQ_WRLOCK(mutex);
  static bool stp_supersedes_port_info(const struct stp_port *,
-                                     const struct stp_config_bpdu *);
+                                     const struct stp_config_bpdu *)
+    OVS_REQ_WRLOCK(mutex);
  static void stp_record_config_information(struct stp_port *,
-                                          const struct stp_config_bpdu *);
+                                          const struct stp_config_bpdu *)
+    OVS_REQ_WRLOCK(mutex);
  static void stp_record_config_timeout_values(struct stp *,
-                                             const struct stp_config_bpdu  *);
-static bool stp_is_designated_port(const struct stp_port *);
-static void stp_config_bpdu_generation(struct stp *);
-static void stp_transmit_tcn(struct stp *);
-static void stp_configuration_update(struct stp *);
+                                             const struct stp_config_bpdu  *)
+    OVS_REQ_WRLOCK(mutex);
+static bool stp_is_designated_port(const struct stp_port *)
+    OVS_REQ_WRLOCK(mutex);
+static void stp_config_bpdu_generation(struct stp *) OVS_REQ_WRLOCK(mutex);
+static void stp_transmit_tcn(struct stp *) OVS_REQ_WRLOCK(mutex);
+static void stp_configuration_update(struct stp *) OVS_REQ_WRLOCK(mutex);
  static bool stp_supersedes_root(const struct stp_port *root,
-                                const struct stp_port *);
-static void stp_root_selection(struct stp *);
-static void stp_designated_port_selection(struct stp *);
-static void stp_become_designated_port(struct stp_port *);
-static void stp_port_state_selection(struct stp *);
-static void stp_make_forwarding(struct stp_port *);
-static void stp_make_blocking(struct stp_port *);
-static void stp_set_port_state(struct stp_port *, enum stp_state);
-static void stp_topology_change_detection(struct stp *);
-static void stp_topology_change_acknowledged(struct stp *);
-static void stp_acknowledge_topology_change(struct stp_port *);
+                                const struct stp_port *) OVS_REQ_WRLOCK(mutex);
+static void stp_root_selection(struct stp *) OVS_REQ_WRLOCK(mutex);
+static void stp_designated_port_selection(struct stp *) OVS_REQ_WRLOCK(mutex);
+static void stp_become_designated_port(struct stp_port *)
+    OVS_REQ_WRLOCK(mutex);
+static void stp_port_state_selection(struct stp *) OVS_REQ_WRLOCK(mutex);
+static void stp_make_forwarding(struct stp_port *) OVS_REQ_WRLOCK(mutex);
+static void stp_make_blocking(struct stp_port *) OVS_REQ_WRLOCK(mutex);
+static void stp_set_port_state(struct stp_port *, enum stp_state)
+    OVS_REQ_WRLOCK(mutex);
+static void stp_topology_change_detection(struct stp *) OVS_REQ_WRLOCK(mutex);
+static void stp_topology_change_acknowledged(struct stp *)
+    OVS_REQ_WRLOCK(mutex);
+static void stp_acknowledge_topology_change(struct stp_port *)
+    OVS_REQ_WRLOCK(mutex);
  static void stp_received_config_bpdu(struct stp *, struct stp_port *,
-                                     const struct stp_config_bpdu *);
-static void stp_received_tcn_bpdu(struct stp *, struct stp_port *);
-static void stp_hello_timer_expiry(struct stp *);
-static void stp_message_age_timer_expiry(struct stp_port *);
-static bool stp_is_designated_for_some_port(const struct stp *);
-static void stp_forward_delay_timer_expiry(struct stp_port *);
-static void stp_tcn_timer_expiry(struct stp *);
-static void stp_topology_change_timer_expiry(struct stp *);
-static void stp_hold_timer_expiry(struct stp_port *);
-static void stp_initialize_port(struct stp_port *, enum stp_state);
-static void stp_become_root_bridge(struct stp *);
-static void stp_update_bridge_timers(struct stp *);
+                                     const struct stp_config_bpdu *)
+    OVS_REQ_WRLOCK(mutex);
+static void stp_received_tcn_bpdu(struct stp *, struct stp_port *)
+    OVS_REQ_WRLOCK(mutex);
+static void stp_hello_timer_expiry(struct stp *) OVS_REQ_WRLOCK(mutex);
+static void stp_message_age_timer_expiry(struct stp_port *)
+    OVS_REQ_WRLOCK(mutex);
+static bool stp_is_designated_for_some_port(const struct stp *)
+    OVS_REQ_WRLOCK(mutex);
+static void stp_forward_delay_timer_expiry(struct stp_port *)
+    OVS_REQ_WRLOCK(mutex);
+static void stp_tcn_timer_expiry(struct stp *) OVS_REQ_WRLOCK(mutex);
+static void stp_topology_change_timer_expiry(struct stp *)
+    OVS_REQ_WRLOCK(mutex);
+static void stp_hold_timer_expiry(struct stp_port *) OVS_REQ_WRLOCK(mutex);
+static void stp_initialize_port(struct stp_port *, enum stp_state)
+    OVS_REQ_WRLOCK(mutex);
+static void stp_become_root_bridge(struct stp *) OVS_REQ_WRLOCK(mutex);
+static void stp_update_bridge_timers(struct stp *) OVS_REQ_WRLOCK(mutex);
  
  static int clamp(int x, int min, int max);
  static int ms_to_timer(int ms);
@@ -205,7 +225,8 @@ static void stp_start_timer(struct stp_timer *, int value);
  static void stp_stop_timer(struct stp_timer *);
  static bool stp_timer_expired(struct stp_timer *, int elapsed, int timeout);
  
-static void stp_send_bpdu(struct stp_port *, const void *, size_t);
+static void stp_send_bpdu(struct stp_port *, const void *, size_t)
+    OVS_REQ_WRLOCK(mutex);
  static void stp_unixctl_tcn(struct unixctl_conn *, int argc,
                              const char *argv[], void *aux);
  
@@ -234,9 +255,20 @@ stp_create(const char *name, stp_identifier bridge_id,
             void (*send_bpdu)(struct ofpbuf *bpdu, int port_no, void *aux),
             void *aux)
  {
+    static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
      struct stp *stp;
      struct stp_port *p;
  
+    if (ovsthread_once_start(&once)) {
+        /* We need a recursive mutex because stp_send_bpdu() could loop back
+         * into the stp module through a patch port.  This happens
+         * intentionally as part of the unit tests.  Ideally we'd ditch
+         * the call back function, but for now this is what we have. */
+        ovs_mutex_init(&mutex,  PTHREAD_MUTEX_RECURSIVE);
+        ovsthread_once_done(&once);
+    }
+
+    ovs_mutex_lock(&mutex);
      stp = xzalloc(sizeof *stp);
      stp->name = xstrdup(name);
      stp->bridge_id = bridge_id;
@@ -272,16 +304,41 @@ stp_create(const char *name, stp_identifier bridge_id,
          p->path_cost = 19;      /* Recommended default for 100 Mb/s link. */
          stp_initialize_port(p, STP_DISABLED);
      }
-    list_push_back(&all_stps, &stp->node);
+    atomic_init(&stp->ref_cnt, 1);
+
+    list_push_back(all_stps, &stp->node);
+    ovs_mutex_unlock(&mutex);
+    return stp;
+}
+
+struct stp *
+stp_ref(const struct stp *stp_)
+{
+    struct stp *stp = CONST_CAST(struct stp *, stp_);
+    if (stp) {
+        int orig;
+        atomic_add(&stp->ref_cnt, 1, &orig);
+        ovs_assert(orig > 0);
+    }
      return stp;
  }
  
  /* Destroys 'stp'. */
  void
-stp_destroy(struct stp *stp)
+stp_unref(struct stp *stp)
  {
-    if (stp) {
+    int orig;
+
+    if (!stp) {
+        return;
+    }
+
+    atomic_sub(&stp->ref_cnt, 1, &orig);
+    ovs_assert(orig > 0);
+    if (orig == 1) {
+        ovs_mutex_lock(&mutex);
          list_remove(&stp->node);
+        ovs_mutex_unlock(&mutex);
          free(stp->name);
          free(stp);
      }
@@ -294,6 +351,7 @@ stp_tick(struct stp *stp, int ms)
      struct stp_port *p;
      int elapsed;
  
+    ovs_mutex_lock(&mutex);
      /* Convert 'ms' to STP timer ticks.  Preserve any leftover milliseconds
       * from previous stp_tick() calls so that we don't lose STP ticks when we
       * are called too frequently. */
@@ -301,7 +359,7 @@ stp_tick(struct stp *stp, int ms)
      elapsed = ms_to_timer(ms);
      stp->elapsed_remainder = ms - timer_to_ms(elapsed);
      if (!elapsed) {
-        return;
+        goto out;
      }
  
      if (stp_timer_expired(&stp->hello_timer, elapsed, stp->hello_time)) {
@@ -328,10 +386,14 @@ stp_tick(struct stp *stp, int ms)
              stp_hold_timer_expiry(p);
          }
      }
+
+out:
+    ovs_mutex_unlock(&mutex);
  }
  
  static void
  set_bridge_id(struct stp *stp, stp_identifier new_bridge_id)
+    OVS_REQ_WRLOCK(mutex)
  {
      if (new_bridge_id != stp->bridge_id) {
          bool root;
@@ -357,15 +419,19 @@ stp_set_bridge_id(struct stp *stp, stp_identifier bridge_id)
  {
      const uint64_t mac_bits = (UINT64_C(1) << 48) - 1;
      const uint64_t pri_bits = ~mac_bits;
+    ovs_mutex_lock(&mutex);
      set_bridge_id(stp, (stp->bridge_id & pri_bits) | (bridge_id & mac_bits));
+    ovs_mutex_unlock(&mutex);
  }
  
  void
  stp_set_bridge_priority(struct stp *stp, uint16_t new_priority)
  {
      const uint64_t mac_bits = (UINT64_C(1) << 48) - 1;
+    ovs_mutex_lock(&mutex);
      set_bridge_id(stp, ((stp->bridge_id & mac_bits)
                          | ((uint64_t) new_priority << 48)));
+    ovs_mutex_unlock(&mutex);
  }
  
  /* Sets the desired hello time for 'stp' to 'ms', in milliseconds.  The actual
@@ -375,8 +441,10 @@ stp_set_bridge_priority(struct stp *stp, uint16_t new_priority)
  void
  stp_set_hello_time(struct stp *stp, int ms)
  {
+    ovs_mutex_lock(&mutex);
      stp->rq_hello_time = ms;
      stp_update_bridge_timers(stp);
+    ovs_mutex_unlock(&mutex);
  }
  
  /* Sets the desired max age for 'stp' to 'ms', in milliseconds.  The actual max
@@ -387,8 +455,10 @@ stp_set_hello_time(struct stp *stp, int ms)
  void
  stp_set_max_age(struct stp *stp, int ms)
  {
+    ovs_mutex_lock(&mutex);
      stp->rq_max_age = ms;
      stp_update_bridge_timers(stp);
+    ovs_mutex_unlock(&mutex);
  }
  
  /* Sets the desired forward delay for 'stp' to 'ms', in milliseconds.  The
@@ -398,29 +468,46 @@ stp_set_max_age(struct stp *stp, int ms)
  void
  stp_set_forward_delay(struct stp *stp, int ms)
  {
+    ovs_mutex_lock(&mutex);
      stp->rq_forward_delay = ms;
      stp_update_bridge_timers(stp);
+    ovs_mutex_unlock(&mutex);
  }
  
  /* Returns the name given to 'stp' in the call to stp_create(). */
  const char *
  stp_get_name(const struct stp *stp)
  {
-    return stp->name;
+    char *name;
+
+    ovs_mutex_lock(&mutex);
+    name = stp->name;
+    ovs_mutex_unlock(&mutex);
+    return name;
  }
  
  /* Returns the bridge ID for 'stp'. */
  stp_identifier
  stp_get_bridge_id(const struct stp *stp)
  {
-    return stp->bridge_id;
+    stp_identifier bridge_id;
+
+    ovs_mutex_lock(&mutex);
+    bridge_id = stp->bridge_id;
+    ovs_mutex_unlock(&mutex);
+    return bridge_id;
  }
  
  /* Returns the bridge ID of the bridge currently believed to be the root. */
  stp_identifier
  stp_get_designated_root(const struct stp *stp)
  {
-    return stp->designated_root;
+    stp_identifier designated_root;
+
+    ovs_mutex_lock(&mutex);
+    designated_root = stp->designated_root;
+    ovs_mutex_unlock(&mutex);
+    return designated_root;
  }
  
  /* Returns true if 'stp' believes itself to the be root of the spanning tree,
@@ -428,14 +515,24 @@ stp_get_designated_root(const struct stp *stp)
  bool
  stp_is_root_bridge(const struct stp *stp)
  {
-    return stp->bridge_id == stp->designated_root;
+    bool is_root;
+
+    ovs_mutex_lock(&mutex);
+    is_root = stp->bridge_id == stp->designated_root;
+    ovs_mutex_unlock(&mutex);
+    return is_root;
  }
  
  /* Returns the cost of the path from 'stp' to the root of the spanning tree. */
  int
  stp_get_root_path_cost(const struct stp *stp)
  {
-    return stp->root_path_cost;
+    int cost;
+
+    ovs_mutex_lock(&mutex);
+    cost = stp->root_path_cost;
+    ovs_mutex_unlock(&mutex);
+    return cost;
  }
  
  /* Returns the bridge hello time, in ms.  The returned value is not necessarily
@@ -444,7 +541,12 @@ stp_get_root_path_cost(const struct stp *stp)
  int
  stp_get_hello_time(const struct stp *stp)
  {
-    return timer_to_ms(stp->bridge_hello_time);
+    int time;
+
+    ovs_mutex_lock(&mutex);
+    time = timer_to_ms(stp->bridge_hello_time);
+    ovs_mutex_unlock(&mutex);
+    return time;
  }
  
  /* Returns the bridge max age, in ms.  The returned value is not necessarily
@@ -454,7 +556,12 @@ stp_get_hello_time(const struct stp *stp)
  int
  stp_get_max_age(const struct stp *stp)
  {
-    return timer_to_ms(stp->bridge_max_age);
+    int time;
+
+    ovs_mutex_lock(&mutex);
+    time = timer_to_ms(stp->bridge_max_age);
+    ovs_mutex_unlock(&mutex);
+    return time;
  }
  
  /* Returns the bridge forward delay, in ms.  The returned value is not
@@ -464,7 +571,12 @@ stp_get_max_age(const struct stp *stp)
  int
  stp_get_forward_delay(const struct stp *stp)
  {
-    return timer_to_ms(stp->bridge_forward_delay);
+    int time;
+
+    ovs_mutex_lock(&mutex);
+    time = timer_to_ms(stp->bridge_forward_delay);
+    ovs_mutex_unlock(&mutex);
+    return time;
  }
  
  /* Returns true if something has happened to 'stp' which necessitates flushing
@@ -473,8 +585,12 @@ stp_get_forward_delay(const struct stp *stp)
  bool
  stp_check_and_reset_fdb_flush(struct stp *stp)
  {
-    bool needs_flush = stp->fdb_needs_flush;
+    bool needs_flush;
+
+    ovs_mutex_lock(&mutex);
+    needs_flush = stp->fdb_needs_flush;
      stp->fdb_needs_flush = false;
+    ovs_mutex_unlock(&mutex);
      return needs_flush;
  }
  
@@ -483,8 +599,13 @@ stp_check_and_reset_fdb_flush(struct stp *stp)
  struct stp_port *
  stp_get_port(struct stp *stp, int port_no)
  {
+    struct stp_port *port;
+
+    ovs_mutex_lock(&mutex);
      ovs_assert(port_no >= 0 && port_no < ARRAY_SIZE(stp->ports));
-    return &stp->ports[port_no];
+    port = &stp->ports[port_no];
+    ovs_mutex_unlock(&mutex);
+    return port;
  }
  
  /* Returns the port connecting 'stp' to the root bridge, or a null pointer if
@@ -492,7 +613,12 @@ stp_get_port(struct stp *stp, int port_no)
  struct stp_port *
  stp_get_root_port(struct stp *stp)
  {
-    return stp->root_port;
+    struct stp_port *port;
+
+    ovs_mutex_lock(&mutex);
+    port = stp->root_port;
+    ovs_mutex_unlock(&mutex);
+    return port;
  }
  
  /* Finds a port whose state has changed.  If successful, stores the port whose
@@ -501,20 +627,26 @@ stp_get_root_port(struct stp *stp)
  bool
  stp_get_changed_port(struct stp *stp, struct stp_port **portp)
  {
-    struct stp_port *end = &stp->ports[ARRAY_SIZE(stp->ports)];
-    struct stp_port *p;
+    struct stp_port *end, *p;
+    bool changed = false;
  
+    ovs_mutex_lock(&mutex);
+    end = &stp->ports[ARRAY_SIZE(stp->ports)];
      for (p = stp->first_changed_port; p < end; p++) {
          if (p->state_changed) {
              p->state_changed = false;
              stp->first_changed_port = p + 1;
              *portp = p;
-            return true;
+            changed = true;
+            goto out;
          }
      }
      stp->first_changed_port = end;
      *portp = NULL;
-    return false;
+
+out:
+    ovs_mutex_unlock(&mutex);
+    return changed;
  }
  
  /* Returns the name for the given 'state' (for use in debugging and log
@@ -589,14 +721,15 @@ stp_received_bpdu(struct stp_port *p, const void *bpdu, size_t bpdu_size)
      struct stp *stp = p->stp;
      const struct stp_bpdu_header *header;
  
+    ovs_mutex_lock(&mutex);
      if (p->state == STP_DISABLED) {
-        return;
+        goto out;
      }
  
      if (bpdu_size < sizeof(struct stp_bpdu_header)) {
          VLOG_WARN("%s: received runt %zu-byte BPDU", stp->name, bpdu_size);
          p->error_count++;
-        return;
+        goto out;
      }
  
      header = bpdu;
@@ -604,7 +737,7 @@ stp_received_bpdu(struct stp_port *p, const void *bpdu, size_t bpdu_size)
          VLOG_WARN("%s: received BPDU with unexpected protocol ID %"PRIu16,
                    stp->name, ntohs(header->protocol_id));
          p->error_count++;
-        return;
+        goto out;
      }
      if (header->protocol_version != STP_PROTOCOL_VERSION) {
          VLOG_DBG("%s: received BPDU with unexpected protocol version %"PRIu8,
@@ -617,7 +750,7 @@ stp_received_bpdu(struct stp_port *p, const void *bpdu, size_t bpdu_size)
              VLOG_WARN("%s: received config BPDU with invalid size %zu",
                        stp->name, bpdu_size);
              p->error_count++;
-            return;
+            goto out;
          }
          stp_received_config_bpdu(stp, p, bpdu);
          break;
@@ -627,7 +760,7 @@ stp_received_bpdu(struct stp_port *p, const void *bpdu, size_t bpdu_size)
              VLOG_WARN("%s: received TCN BPDU with invalid size %zu",
                        stp->name, bpdu_size);
              p->error_count++;
-            return;
+            goto out;
          }
          stp_received_tcn_bpdu(stp, p);
          break;
@@ -636,16 +769,24 @@ stp_received_bpdu(struct stp_port *p, const void *bpdu, size_t bpdu_size)
          VLOG_WARN("%s: received BPDU of unexpected type %"PRIu8,
                    stp->name, header->bpdu_type);
          p->error_count++;
-        return;
+        goto out;
      }
      p->rx_count++;
+
+out:
+    ovs_mutex_unlock(&mutex);
  }
  
  /* Returns the STP entity in which 'p' is nested. */
  struct stp *
  stp_port_get_stp(struct stp_port *p)
  {
-    return p->stp;
+    struct stp *stp;
+
+    ovs_mutex_lock(&mutex);
+    stp = p->stp;
+    ovs_mutex_unlock(&mutex);
+    return stp;
  }
  
  /* Sets the 'aux' member of 'p'.
@@ -656,70 +797,104 @@ stp_port_get_stp(struct stp_port *p)
  void
  stp_port_set_aux(struct stp_port *p, void *aux)
  {
+    ovs_mutex_lock(&mutex);
      p->aux = aux;
+    ovs_mutex_unlock(&mutex);
  }
  
  /* Returns the 'aux' member of 'p'. */
  void *
  stp_port_get_aux(struct stp_port *p)
  {
-    return p->aux;
+    void *aux;
+
+    ovs_mutex_lock(&mutex);
+    aux = p->aux;
+    ovs_mutex_unlock(&mutex);
+    return aux;
  }
  
  /* Returns the index of port 'p' within its bridge. */
  int
  stp_port_no(const struct stp_port *p)
  {
-    struct stp *stp = p->stp;
+    struct stp *stp;
+    int index;
+
+    ovs_mutex_lock(&mutex);
+    stp = p->stp;
      ovs_assert(p >= stp->ports && p < &stp->ports[ARRAY_SIZE(stp->ports)]);
-    return p - stp->ports;
+    index = p - p->stp->ports;
+    ovs_mutex_unlock(&mutex);
+    return index;
  }
  
  /* Returns the port ID for 'p'. */
  int
  stp_port_get_id(const struct stp_port *p)
  {
-    return p->port_id;
+    int port_id;
+
+    ovs_mutex_lock(&mutex);
+    port_id = p->port_id;
+    ovs_mutex_unlock(&mutex);
+    return port_id;
  }
  
  /* Returns the state of port 'p'. */
  enum stp_state
  stp_port_get_state(const struct stp_port *p)
  {
-    return p->state;
+    enum stp_state state;
+
+    ovs_mutex_lock(&mutex);
+    state = p->state;
+    ovs_mutex_unlock(&mutex);
+    return state;
  }
  
  /* Returns the role of port 'p'. */
  enum stp_role
  stp_port_get_role(const struct stp_port *p)
  {
-    struct stp_port *root_port = stp_get_root_port(p->stp);
+    struct stp_port *root_port;
+    enum stp_role role;
  
+    ovs_mutex_lock(&mutex);
+    root_port = p->stp->root_port;
      if (root_port && root_port->port_id == p->port_id) {
-        return STP_ROLE_ROOT;
+        role = STP_ROLE_ROOT;
      } else if (stp_is_designated_port(p)) {
-        return STP_ROLE_DESIGNATED;
+        role = STP_ROLE_DESIGNATED;
      } else if (p->state == STP_DISABLED) {
-        return STP_ROLE_DISABLED;
+        role = STP_ROLE_DISABLED;
      } else {
-        return STP_ROLE_ALTERNATE;
+        role = STP_ROLE_ALTERNATE;
      }
+    ovs_mutex_unlock(&mutex);
+    return role;
  }
  
  /* Retrieves BPDU transmit and receive counts for 'p'. */
-void stp_port_get_counts(const struct stp_port *p,
-                         int *tx_count, int *rx_count, int *error_count)
+void
+stp_port_get_counts(const struct stp_port *p,
+                    int *tx_count, int *rx_count, int *error_count)
  {
+    ovs_mutex_lock(&mutex);
      *tx_count = p->tx_count;
      *rx_count = p->rx_count;
      *error_count = p->error_count;
+    ovs_mutex_unlock(&mutex);
  }
  
  /* Disables STP on port 'p'. */
  void
  stp_port_disable(struct stp_port *p)
  {
-    struct stp *stp = p->stp;
+    struct stp *stp;
+
+    ovs_mutex_lock(&mutex);
+    stp = p->stp;
      if (p->state != STP_DISABLED) {
          bool root = stp_is_root_bridge(stp);
          stp_become_designated_port(p);
@@ -735,16 +910,19 @@ stp_port_disable(struct stp_port *p)
          }
          p->aux = NULL;
      }
+    ovs_mutex_unlock(&mutex);
  }
  
  /* Enables STP on port 'p'.  The port will initially be in "blocking" state. */
  void
  stp_port_enable(struct stp_port *p)
  {
+    ovs_mutex_lock(&mutex);
      if (p->state == STP_DISABLED) {
          stp_initialize_port(p, STP_BLOCKING);
          stp_port_state_selection(p->stp);
      }
+    ovs_mutex_unlock(&mutex);
  }
  
  /* Sets the priority of port 'p' to 'new_priority'.  Lower numerical values
@@ -752,7 +930,10 @@ stp_port_enable(struct stp_port *p)
  void
  stp_port_set_priority(struct stp_port *p, uint8_t new_priority)
  {
-    uint16_t new_port_id = (p->port_id & 0xff) | (new_priority << 8);
+    uint16_t new_port_id;
+
+    ovs_mutex_lock(&mutex);
+    new_port_id  = (p->port_id & 0xff) | (new_priority << 8);
      if (p->port_id != new_port_id) {
          struct stp *stp = p->stp;
          if (stp_is_designated_port(p)) {
@@ -765,19 +946,25 @@ stp_port_set_priority(struct stp_port *p, uint8_t new_priority)
              stp_port_state_selection(stp);
          }
      }
+    ovs_mutex_unlock(&mutex);
  }
  
  /* Convert 'speed' (measured in Mb/s) into the path cost. */
  uint16_t
  stp_convert_speed_to_cost(unsigned int speed)
  {
-    return speed >= 10000 ? 2  /* 10 Gb/s. */
-           : speed >= 1000 ? 4 /* 1 Gb/s. */
-           : speed >= 100 ? 19 /* 100 Mb/s. */
-           : speed >= 16 ? 62  /* 16 Mb/s. */
-           : speed >= 10 ? 100 /* 10 Mb/s. */
-           : speed >= 4 ? 250  /* 4 Mb/s. */
-           : 19;             /* 100 Mb/s (guess). */
+    uint16_t ret;
+
+    ovs_mutex_lock(&mutex);
+    ret = speed >= 10000 ? 2  /* 10 Gb/s. */
+        : speed >= 1000 ? 4 /* 1 Gb/s. */
+        : speed >= 100 ? 19 /* 100 Mb/s. */
+        : speed >= 16 ? 62  /* 16 Mb/s. */
+        : speed >= 10 ? 100 /* 10 Mb/s. */
+        : speed >= 4 ? 250  /* 4 Mb/s. */
+        : 19;             /* 100 Mb/s (guess). */
+    ovs_mutex_unlock(&mutex);
+    return ret;
  }
  
  /* Sets the path cost of port 'p' to 'path_cost'.  Lower values are generally
@@ -786,12 +973,14 @@ stp_convert_speed_to_cost(unsigned int speed)
  void
  stp_port_set_path_cost(struct stp_port *p, uint16_t path_cost)
  {
+    ovs_mutex_lock(&mutex);
      if (p->path_cost != path_cost) {
          struct stp *stp = p->stp;
          p->path_cost = path_cost;
          stp_configuration_update(stp);
          stp_port_state_selection(stp);
      }
+    ovs_mutex_unlock(&mutex);
  }
  
  /* Sets the path cost of port 'p' based on 'speed' (measured in Mb/s). */
@@ -816,7 +1005,7 @@ stp_port_disable_change_detection(struct stp_port *p)
  }
  \f
  static void
-stp_transmit_config(struct stp_port *p)
+stp_transmit_config(struct stp_port *p) OVS_REQ_WRLOCK(mutex)
  {
      struct stp *stp = p->stp;
      bool root = stp_is_root_bridge(stp);
@@ -863,6 +1052,7 @@ stp_transmit_config(struct stp_port *p)
  static bool
  stp_supersedes_port_info(const struct stp_port *p,
                           const struct stp_config_bpdu *config)
+     OVS_REQ_WRLOCK(mutex)
  {
      if (ntohll(config->root_id) != p->designated_root) {
          return ntohll(config->root_id) < p->designated_root;
@@ -879,6 +1069,7 @@ stp_supersedes_port_info(const struct stp_port *p,
  static void
  stp_record_config_information(struct stp_port *p,
                                const struct stp_config_bpdu *config)
+     OVS_REQ_WRLOCK(mutex)
  {
      p->designated_root = ntohll(config->root_id);
      p->designated_cost = ntohl(config->root_path_cost);
@@ -890,6 +1081,7 @@ stp_record_config_information(struct stp_port *p,
  static void
  stp_record_config_timeout_values(struct stp *stp,
                                   const struct stp_config_bpdu  *config)
+     OVS_REQ_WRLOCK(mutex)
  {
      stp->max_age = ntohs(config->max_age);
      stp->hello_time = ntohs(config->hello_time);
@@ -898,14 +1090,14 @@ stp_record_config_timeout_values(struct stp *stp,
  }
  
  static bool
-stp_is_designated_port(const struct stp_port *p)
+stp_is_designated_port(const struct stp_port *p) OVS_REQ_WRLOCK(mutex)
  {
      return (p->designated_bridge == p->stp->bridge_id
              && p->designated_port == p->port_id);
  }
  
  static void
-stp_config_bpdu_generation(struct stp *stp)
+stp_config_bpdu_generation(struct stp *stp) OVS_REQ_WRLOCK(mutex)
  {
      struct stp_port *p;
  
@@ -917,7 +1109,7 @@ stp_config_bpdu_generation(struct stp *stp)
  }
  
  static void
-stp_transmit_tcn(struct stp *stp)
+stp_transmit_tcn(struct stp *stp) OVS_REQ_WRLOCK(mutex)
  {
      struct stp_port *p = stp->root_port;
      struct stp_tcn_bpdu tcn_bpdu;
@@ -931,7 +1123,7 @@ stp_transmit_tcn(struct stp *stp)
  }
  
  static void
-stp_configuration_update(struct stp *stp)
+stp_configuration_update(struct stp *stp) OVS_REQ_WRLOCK(mutex)
  {
      stp_root_selection(stp);
      stp_designated_port_selection(stp);
@@ -939,6 +1131,7 @@ stp_configuration_update(struct stp *stp)
  
  static bool
  stp_supersedes_root(const struct stp_port *root, const struct stp_port *p)
+    OVS_REQ_WRLOCK(mutex)
  {
      int p_cost = p->designated_cost + p->path_cost;
      int root_cost = root->designated_cost + root->path_cost;
@@ -957,7 +1150,7 @@ stp_supersedes_root(const struct stp_port *root, const struct stp_port *p)
  }
  
  static void
-stp_root_selection(struct stp *stp)
+stp_root_selection(struct stp *stp) OVS_REQ_WRLOCK(mutex)
  {
      struct stp_port *p, *root;
  
@@ -983,7 +1176,7 @@ stp_root_selection(struct stp *stp)
  }
  
  static void
-stp_designated_port_selection(struct stp *stp)
+stp_designated_port_selection(struct stp *stp) OVS_REQ_WRLOCK(mutex)
  {
      struct stp_port *p;
  
@@ -1002,7 +1195,7 @@ stp_designated_port_selection(struct stp *stp)
  }
  
  static void
-stp_become_designated_port(struct stp_port *p)
+stp_become_designated_port(struct stp_port *p) OVS_REQ_WRLOCK(mutex)
  {
      struct stp *stp = p->stp;
      p->designated_root = stp->designated_root;
@@ -1012,7 +1205,7 @@ stp_become_designated_port(struct stp_port *p)
  }
  
  static void
-stp_port_state_selection(struct stp *stp)
+stp_port_state_selection(struct stp *stp) OVS_REQ_WRLOCK(mutex)
  {
      struct stp_port *p;
  
@@ -1033,7 +1226,7 @@ stp_port_state_selection(struct stp *stp)
  }
  
  static void
-stp_make_forwarding(struct stp_port *p)
+stp_make_forwarding(struct stp_port *p) OVS_REQ_WRLOCK(mutex)
  {
      if (p->state == STP_BLOCKING) {
          stp_set_port_state(p, STP_LISTENING);
@@ -1042,7 +1235,7 @@ stp_make_forwarding(struct stp_port *p)
  }
  
  static void
-stp_make_blocking(struct stp_port *p)
+stp_make_blocking(struct stp_port *p) OVS_REQ_WRLOCK(mutex)
  {
      if (!(p->state & (STP_DISABLED | STP_BLOCKING))) {
          if (p->state & (STP_FORWARDING | STP_LEARNING)) {
@@ -1057,6 +1250,7 @@ stp_make_blocking(struct stp_port *p)
  
  static void
  stp_set_port_state(struct stp_port *p, enum stp_state state)
+    OVS_REQ_WRLOCK(mutex)
  {
      if (state != p->state && !p->state_changed) {
          p->state_changed = true;
@@ -1068,7 +1262,7 @@ stp_set_port_state(struct stp_port *p, enum stp_state state)
  }
  
  static void
-stp_topology_change_detection(struct stp *stp)
+stp_topology_change_detection(struct stp *stp) OVS_REQ_WRLOCK(mutex)
  {
      static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
  
@@ -1085,14 +1279,14 @@ stp_topology_change_detection(struct stp *stp)
  }
  
  static void
-stp_topology_change_acknowledged(struct stp *stp)
+stp_topology_change_acknowledged(struct stp *stp) OVS_REQ_WRLOCK(mutex)
  {
      stp->topology_change_detected = false;
      stp_stop_timer(&stp->tcn_timer);
  }
  
  static void
-stp_acknowledge_topology_change(struct stp_port *p)
+stp_acknowledge_topology_change(struct stp_port *p) OVS_REQ_WRLOCK(mutex)
  {
      p->topology_change_ack = true;
      stp_transmit_config(p);
@@ -1101,6 +1295,7 @@ stp_acknowledge_topology_change(struct stp_port *p)
  static void
  stp_received_config_bpdu(struct stp *stp, struct stp_port *p,
                           const struct stp_config_bpdu *config)
+    OVS_REQ_WRLOCK(mutex)
  {
      if (ntohs(config->message_age) >= ntohs(config->max_age)) {
          VLOG_WARN("%s: received config BPDU with message age (%u) greater "
@@ -1141,6 +1336,7 @@ stp_received_config_bpdu(struct stp *stp, struct stp_port *p,
  
  static void
  stp_received_tcn_bpdu(struct stp *stp, struct stp_port *p)
+    OVS_REQ_WRLOCK(mutex)
  {
      if (p->state != STP_DISABLED) {
          if (stp_is_designated_port(p)) {
@@ -1151,14 +1347,14 @@ stp_received_tcn_bpdu(struct stp *stp, struct stp_port *p)
  }
  
  static void
-stp_hello_timer_expiry(struct stp *stp)
+stp_hello_timer_expiry(struct stp *stp) OVS_REQ_WRLOCK(mutex)
  {
      stp_config_bpdu_generation(stp);
      stp_start_timer(&stp->hello_timer, 0);
  }
  
  static void
-stp_message_age_timer_expiry(struct stp_port *p)
+stp_message_age_timer_expiry(struct stp_port *p) OVS_REQ_WRLOCK(mutex)
  {
      struct stp *stp = p->stp;
      bool root = stp_is_root_bridge(stp);
@@ -1177,7 +1373,7 @@ stp_message_age_timer_expiry(struct stp_port *p)
  }
  
  static bool
-stp_is_designated_for_some_port(const struct stp *stp)
+stp_is_designated_for_some_port(const struct stp *stp) OVS_REQ_WRLOCK(mutex)
  {
      const struct stp_port *p;
  
@@ -1190,7 +1386,7 @@ stp_is_designated_for_some_port(const struct stp *stp)
  }
  
  static void
-stp_forward_delay_timer_expiry(struct stp_port *p)
+stp_forward_delay_timer_expiry(struct stp_port *p) OVS_REQ_WRLOCK(mutex)
  {
      if (p->state == STP_LISTENING) {
          stp_set_port_state(p, STP_LEARNING);
@@ -1206,21 +1402,21 @@ stp_forward_delay_timer_expiry(struct stp_port *p)
  }
  
  static void
-stp_tcn_timer_expiry(struct stp *stp)
+stp_tcn_timer_expiry(struct stp *stp) OVS_REQ_WRLOCK(mutex)
  {
      stp_transmit_tcn(stp);
      stp_start_timer(&stp->tcn_timer, 0);
  }
  
  static void
-stp_topology_change_timer_expiry(struct stp *stp)
+stp_topology_change_timer_expiry(struct stp *stp) OVS_REQ_WRLOCK(mutex)
  {
      stp->topology_change_detected = false;
      stp->topology_change = false;
  }
  
  static void
-stp_hold_timer_expiry(struct stp_port *p)
+stp_hold_timer_expiry(struct stp_port *p) OVS_REQ_WRLOCK(mutex)
  {
      if (p->config_pending) {
          stp_transmit_config(p);
@@ -1229,6 +1425,7 @@ stp_hold_timer_expiry(struct stp_port *p)
  
  static void
  stp_initialize_port(struct stp_port *p, enum stp_state state)
+    OVS_REQ_WRLOCK(mutex)
  {
      ovs_assert(state & (STP_DISABLED | STP_BLOCKING));
      stp_become_designated_port(p);
@@ -1244,7 +1441,7 @@ stp_initialize_port(struct stp_port *p, enum stp_state state)
  }
  
  static void
-stp_become_root_bridge(struct stp *stp)
+stp_become_root_bridge(struct stp *stp) OVS_REQ_WRLOCK(mutex)
  {
      stp->max_age = stp->bridge_max_age;
      stp->hello_time = stp->bridge_hello_time;
@@ -1256,20 +1453,21 @@ stp_become_root_bridge(struct stp *stp)
  }
  
  static void
-stp_start_timer(struct stp_timer *timer, int value)
+stp_start_timer(struct stp_timer *timer, int value) OVS_REQ_WRLOCK(mutex)
  {
      timer->value = value;
      timer->active = true;
  }
  
  static void
-stp_stop_timer(struct stp_timer *timer)
+stp_stop_timer(struct stp_timer *timer) OVS_REQ_WRLOCK(mutex)
  {
      timer->active = false;
  }
  
  static bool
  stp_timer_expired(struct stp_timer *timer, int elapsed, int timeout)
+    OVS_REQ_WRLOCK(mutex)
  {
      if (timer->active) {
          timer->value += elapsed;
@@ -1304,7 +1502,7 @@ clamp(int x, int min, int max)
  }
  
  static void
-stp_update_bridge_timers(struct stp *stp)
+stp_update_bridge_timers(struct stp *stp) OVS_REQ_WRLOCK(mutex)
  {
      int ht, ma, fd;
  
@@ -1325,6 +1523,7 @@ stp_update_bridge_timers(struct stp *stp)
  
  static void
  stp_send_bpdu(struct stp_port *p, const void *bpdu, size_t bpdu_size)
+    OVS_REQ_WRLOCK(mutex)
  {
      struct eth_header *eth;
      struct llc_header *llc;
@@ -1353,11 +1552,11 @@ stp_send_bpdu(struct stp_port *p, const void *bpdu, size_t bpdu_size)
  /* Unixctl. */
  
  static struct stp *
-stp_find(const char *name)
+stp_find(const char *name) OVS_REQ_WRLOCK(mutex)
  {
      struct stp *stp;
  
-    LIST_FOR_EACH (stp, node, &all_stps) {
+    LIST_FOR_EACH (stp, node, all_stps) {
          if (!strcmp(stp->name, name)) {
              return stp;
          }
@@ -1369,21 +1568,25 @@ static void
  stp_unixctl_tcn(struct unixctl_conn *conn, int argc,
                  const char *argv[], void *aux OVS_UNUSED)
  {
+    ovs_mutex_lock(&mutex);
      if (argc > 1) {
          struct stp *stp = stp_find(argv[1]);
  
          if (!stp) {
              unixctl_command_reply_error(conn, "no such stp object");
-            return;
+            goto out;
          }
          stp_topology_change_detection(stp);
      } else {
          struct stp *stp;
  
-        LIST_FOR_EACH (stp, node, &all_stps) {
+        LIST_FOR_EACH (stp, node, all_stps) {
              stp_topology_change_detection(stp);
          }
      }
  
      unixctl_command_reply(conn, "OK");
+
+out:
+    ovs_mutex_unlock(&mutex);
  }
diff --git a/lib/stp.h b/lib/stp.h

index 524b9dc..affde18 100644 (file)
--- a/lib/stp.h
+++ b/lib/stp.h
@@ -60,7 +60,8 @@ struct stp *stp_create(const char *name, stp_identifier bridge_id,
                         void (*send_bpdu)(struct ofpbuf *bpdu, int port_no,
                                           void *aux),
                         void *aux);
-void stp_destroy(struct stp *);
+struct stp *stp_ref(const struct stp *);
+void stp_unref(struct stp *);
  void stp_tick(struct stp *, int ms);
  void stp_set_bridge_id(struct stp *, stp_identifier bridge_id);
  void stp_set_bridge_priority(struct stp *, uint16_t new_priority);
diff --git a/lib/tag.c b/lib/tag.c

deleted file mode 100644 (file)

index f064d17..0000000
--- a/lib/tag.c
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright (c) 2008, 2009, 2010, 2011 Nicira, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <config.h>
-#include "tag.h"
-#include <limits.h>
-#include "random.h"
-#include "type-props.h"
-#include "util.h"
-
-#define N_TAG_BITS (CHAR_BIT * sizeof(tag_type))
-BUILD_ASSERT_DECL(IS_POW2(N_TAG_BITS));
-
-#define LOG2_N_TAG_BITS (N_TAG_BITS == 32 ? 5 : N_TAG_BITS == 64 ? 6 : 0)
-BUILD_ASSERT_DECL(LOG2_N_TAG_BITS > 0);
-
-/* Returns a randomly selected tag. */
-tag_type
-tag_create_random(void)
-{
-    int x, y;
-    do {
-        uint16_t r = random_uint16();
-        x = r & (N_TAG_BITS - 1);
-        y = r >> (16 - LOG2_N_TAG_BITS);
-    } while (x == y);
-    return (1u << x) | (1u << y);
-}
-
-/* Returns a tag deterministically generated from 'seed'.
- *
- * 'seed' should have data in all of its bits; if it has data only in its
- * low-order bits then the resulting tags will be poorly distributed.  Use a
- * hash function such as hash_bytes() to generate 'seed' if necessary. */
-tag_type
-tag_create_deterministic(uint32_t seed)
-{
-    int x = seed & (N_TAG_BITS - 1);
-    int y = (seed >> LOG2_N_TAG_BITS) % (N_TAG_BITS - 1);
-    y += y >= x;
-    return (1u << x) | (1u << y);
-}
-
-/* Initializes 'set' as an empty tag set. */
-void
-tag_set_init(struct tag_set *set)
-{
-    memset(set, 0, sizeof *set);
-}
-
-static bool
-tag_is_worth_adding(const struct tag_set *set, tag_type tag)
-{
-    if (!tag) {
-        /* Nothing to add. */
-        return false;
-    } else if ((set->total & tag) != tag) {
-        /* 'set' doesn't have all the bits in 'tag', so we need to add it. */
-        return true;
-    } else {
-        /* We can drop it if some member of 'set' already includes all of the
-         * 1-bits in 'tag'.  (tag_set_intersects() does a different test:
-         * whether some member of 'set' has at least two 1-bit in common with
-         * 'tag'.) */
-        int i;
-
-        for (i = 0; i < TAG_SET_SIZE; i++) {
-            if ((set->tags[i] & tag) == tag) {
-                return false;
-            }
-        }
-        return true;
-    }
-}
-
-/* Adds 'tag' to 'set'. */
-void
-tag_set_add(struct tag_set *set, tag_type tag)
-{
-    if (tag_is_worth_adding(set, tag)) {
-        /* XXX We could do better by finding the set member to which we would
-         * add the fewest number of 1-bits.  This would reduce the amount of
-         * ambiguity, since e.g. three 1-bits match 3 * 2 / 2 = 3 unique tags
-         * whereas four 1-bits match 4 * 3 / 2 = 6 unique tags. */
-        tag_type *t = &set->tags[set->n++ % TAG_SET_SIZE];
-        *t |= tag;
-        if (*t == TYPE_MAXIMUM(tag_type)) {
-            set->tags[0] = *t;
-        }
-
-        set->total |= tag;
-    }
-}
-
-/* Adds all the tags in 'other' to 'set'. */
-void
-tag_set_union(struct tag_set *set, const struct tag_set *other)
-{
-    size_t i;
-
-    for (i = 0; i < TAG_SET_SIZE; i++) {
-        tag_set_add(set, other->tags[i]);
-    }
-}
diff --git a/lib/tag.h b/lib/tag.h

deleted file mode 100644 (file)

index 9d6b4aa..0000000
--- a/lib/tag.h
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Copyright (c) 2008, 2011, 2012 Nicira, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef TAG_H
-#define TAG_H 1
-
-#include <stdbool.h>
-#include <stdint.h>
-#include "util.h"
-
-/*
- * Tagging support.
- *
- * A 'tag' represents an arbitrary category.  Currently, tags are used to
- * represent categories of flows and in particular the dependencies for a flow
- * switching decision.  For example, if a flow's output port is based on
- * knowledge that source MAC 00:02:e3:0f:80:a4 is on eth0, then a tag that
- * represents that dependency is attached to that flow in the flowtracking hash
- * table.
- *
- * As this example shows, the universe of possible categories is very large,
- * and even the number of categories that are in use at a given time can be
- * very large.  This means that keeping track of category membership via
- * conventional means (lists, bitmaps, etc.) is likely to be expensive.
- *
- * Tags are actually implemented via a "superimposed coding", as discussed in
- * Knuth TAOCP v.3 section 6.5 "Retrieval on Secondary Keys".  A tag is an
- * unsigned integer in which exactly 2 bits are set to 1 and the rest set to 0.
- * For 32-bit integers (as currently used) there are 32 * 31 / 2 = 496 unique
- * tags; for 64-bit integers there are 64 * 63 / 2 = 2,016.
- *
- * Because there is a small finite number of unique tags, tags must collide
- * after some number of them have been created.  In practice we generally
- * create tags by choosing bits randomly.
- *
- * The key property of tags is that we can combine them without increasing the
- * amount of data required using bitwise-OR, since the result has the 1-bits
- * from both tags set.  The necessary tradeoff is that the result is even more
- * ambiguous: if combining two tags yields a value with 4 bits set to 1, then
- * the result value will test as having 4 * 3 / 2 = 6 unique tags, not just the
- * two tags that we combined.
- *
- * The upshot is this: a value that is the bitwise-OR combination of a number
- * of tags will always include the tags that were combined, but it may contain
- * any number of additional tags as well.  This is acceptable for flowtracking,
- * since we want to be sure that we catch every flow that needs to be
- * revalidated, but it is OK if we revalidate a few extra flows as well.
- *
- * If we combine too many tags, then the result will have every bit set, so
- * that it will test as including every tag.  Fortunately, this is not a big
- * problem for us: although there are many flows overall, each individual flow
- * belongs only to a small number of categories.
- */
-
-/* Represents a tag, or the combination of 0 or more tags. */
-typedef uint32_t tag_type;
-
-tag_type tag_create_random(void);
-tag_type tag_create_deterministic(uint32_t seed);
-static inline bool tag_intersects(tag_type, tag_type);
-static inline bool tag_is_valid(tag_type);
-
-/* Returns true if 'a' and 'b' have at least one tag in common,
- * false if their set of tags is disjoint. */
-static inline bool
-tag_intersects(tag_type a, tag_type b)
-{
-    tag_type x = a & b;
-    return (x & (x - 1)) != 0;
-}
-
-/* Returns true if 'tag' is a valid tag, that is, if exactly two bits are set
- * to 1 and the rest to 0.   Otherwise, returns false. */
-static inline bool
-tag_is_valid(tag_type tag)
-{
-    tag_type x = tag & (tag - 1);
-    tag_type y = x & (x - 1);
-    return x && !y;
-}
-\f
-/*
- * A tag set accumulates tags with reduced ambiguity compared to a single tag.
- * The flow tracking uses tag sets to keep track of tags that need to
- * revalidated after a number of packets have been processed.
- */
-#define TAG_SET_SIZE 4
-struct tag_set {
-    tag_type total;
-    tag_type tags[TAG_SET_SIZE];
-    unsigned int n;
-};
-
-void tag_set_init(struct tag_set *);
-void tag_set_add(struct tag_set *, tag_type);
-void tag_set_union(struct tag_set *, const struct tag_set *);
-static inline bool tag_set_is_empty(const struct tag_set *);
-static inline bool tag_set_intersects(const struct tag_set *, tag_type);
-
-/* Returns true if 'set' will match no tags at all,
- * false if it will match at least one tag. */
-static inline bool
-tag_set_is_empty(const struct tag_set *set)
-{
-    return !set->n;
-}
-
-/* Returns true if any of the tags in 'tags' are also in 'set',
- * false if the intersection is empty. */
-static inline bool
-tag_set_intersects(const struct tag_set *set, tag_type tags)
-{
-    BUILD_ASSERT_DECL(TAG_SET_SIZE == 4);
-    return (tag_intersects(set->total, tags)
-            && (tag_intersects(set->tags[0], tags)
-                || tag_intersects(set->tags[1], tags)
-                || tag_intersects(set->tags[2], tags)
-                || tag_intersects(set->tags[3], tags)));
-}
-
-#endif /* tag.h */
diff --git a/lib/util.c b/lib/util.c

index 6a72107..1751c6f 100644 (file)
--- a/lib/util.c
+++ b/lib/util.c
@@ -401,8 +401,10 @@ void
  set_subprogram_name(const char *name)
  {
      free(subprogram_name_set(xstrdup(name)));
-#if HAVE_PTHREAD_SETNAME_NP
+#if HAVE_GLIBC_PTHREAD_SETNAME_NP
      pthread_setname_np(pthread_self(), name);
+#elif HAVE_NETBSD_PTHREAD_SETNAME_NP
+    pthread_setname_np(pthread_self(), "%s", name);
  #elif HAVE_PTHREAD_SET_NAME_NP
      pthread_set_name_np(pthread_self(), name);
  #endif
diff --git a/lib/vlog.c b/lib/vlog.c

index 6f0a256..26d0e6c 100644 (file)
--- a/lib/vlog.c
+++ b/lib/vlog.c
@@ -110,6 +110,7 @@ static struct ovs_mutex log_file_mutex = OVS_ADAPTIVE_MUTEX_INITIALIZER;
  static char *log_file_name OVS_GUARDED_BY(log_file_mutex);
  static int log_fd OVS_GUARDED_BY(log_file_mutex) = -1;
  static struct async_append *log_writer OVS_GUARDED_BY(log_file_mutex);
+static bool log_async OVS_GUARDED_BY(log_file_mutex);
  
  static void format_log_message(const struct vlog_module *, enum vlog_level,
                                 enum vlog_facility,
@@ -344,7 +345,9 @@ vlog_set_log_file(const char *file_name)
  
      log_file_name = xstrdup(new_log_file_name);
      log_fd = new_log_fd;
-    log_writer = async_append_create(new_log_fd);
+    if (log_async) {
+        log_writer = async_append_create(new_log_fd);
+    }
  
      for (mp = vlog_modules; mp < &vlog_modules[n_vlog_modules]; mp++) {
          update_min_level(*mp);
@@ -617,6 +620,22 @@ vlog_init(void)
      pthread_once(&once, vlog_init__);
  }
  
+/* Enables VLF_FILE log output to be written asynchronously to disk.
+ * Asynchronous file writes avoid blocking the process in the case of a busy
+ * disk, but on the other hand they are less robust: there is a chance that the
+ * write will not make it to the log file if the process crashes soon after the
+ * log call. */
+void
+vlog_enable_async(void)
+{
+    ovs_mutex_lock(&log_file_mutex);
+    log_async = true;
+    if (log_fd >= 0 && !log_writer) {
+        log_writer = async_append_create(log_fd);
+    }
+    ovs_mutex_unlock(&log_file_mutex);
+}
+
  /* Print the current logging level for each module. */
  char *
  vlog_get_levels(void)
@@ -836,9 +855,13 @@ vlog_valist(const struct vlog_module *module, enum vlog_level level,
  
              ovs_mutex_lock(&log_file_mutex);
              if (log_fd >= 0) {
-                async_append_write(log_writer, s.string, s.length);
-                if (level == VLL_EMER) {
-                    async_append_flush(log_writer);
+                if (log_writer) {
+                    async_append_write(log_writer, s.string, s.length);
+                    if (level == VLL_EMER) {
+                        async_append_flush(log_writer);
+                    }
+                } else {
+                    ignore(write(log_fd, s.string, s.length));
                  }
              }
              ovs_mutex_unlock(&log_file_mutex);
diff --git a/lib/vlog.h b/lib/vlog.h

index 901b3d3..87a9654 100644 (file)
--- a/lib/vlog.h
+++ b/lib/vlog.h
@@ -141,6 +141,7 @@ int vlog_reopen_log_file(void);
  
  /* Initialization. */
  void vlog_init(void);
+void vlog_enable_async(void);
  
  /* Functions for actual logging. */
  void vlog(const struct vlog_module *, enum vlog_level, const char *format, ...)
diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c

index e555603..fb4d0b4 100644 (file)
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -16,6 +16,8 @@
  
  #include "ofproto/ofproto-dpif-xlate.h"
  
+#include <errno.h>
+
  #include "bfd.h"
  #include "bitmap.h"
  #include "bond.h"
@@ -61,13 +63,14 @@ struct xbridge {
      struct hmap xports;           /* Indexed by ofp_port. */
  
      char *name;                   /* Name used in log messages. */
+    struct dpif *dpif;            /* Datapath interface. */
      struct mac_learning *ml;      /* Mac learning handle. */
      struct mbridge *mbridge;      /* Mirroring. */
      struct dpif_sflow *sflow;     /* SFlow handle, or null. */
      struct dpif_ipfix *ipfix;     /* Ipfix handle, or null. */
+    struct stp *stp;              /* STP or null if disabled. */
  
      enum ofp_config_flags frag;   /* Fragmentation handling. */
-    bool has_stp;                 /* Bridge runs stp? */
      bool has_netflow;             /* Bridge runs netflow? */
      bool has_in_band;             /* Bridge has in band control? */
      bool forward_bpdu;            /* Bridge forwards STP BPDUs? */
@@ -112,7 +115,9 @@ struct xport {
      struct xport *peer;              /* Patch port peer or null. */
  
      enum ofputil_port_config config; /* OpenFlow port configuration. */
-    enum stp_state stp_state;        /* STP_DISABLED if STP not in use. */
+    int stp_port_no;                 /* STP port number or 0 if not in use. */
+
+    struct hmap skb_priorities;      /* Map of 'skb_priority_to_dscp's. */
  
      bool may_enable;                 /* May be enabled in bonds. */
      bool is_tunnel;                  /* Is a tunnel port. */
@@ -147,7 +152,6 @@ struct xlate_ctx {
      struct rule_dpif *rule;
  
      int recurse;                /* Recursion level, via xlate_table_action. */
-    bool max_resubmit_trigger;  /* Recursed too deeply during translation. */
      uint32_t orig_skb_priority; /* Priority when packet arrived. */
      uint8_t table_id;           /* OpenFlow table ID where flow was found. */
      uint32_t sflow_n_outputs;   /* Number of output ports. */
@@ -164,6 +168,16 @@ struct xlate_ctx {
   * The bundle's name and vlan mode are initialized in lookup_input_bundle() */
  static struct xbundle ofpp_none_bundle;
  
+/* Node in 'xport''s 'skb_priorities' map.  Used to maintain a map from
+ * 'priority' (the datapath's term for QoS queue) to the dscp bits which all
+ * traffic egressing the 'ofport' with that priority should be marked with. */
+struct skb_priority_to_dscp {
+    struct hmap_node hmap_node; /* Node in 'ofport_dpif''s 'skb_priorities'. */
+    uint32_t skb_priority;      /* Priority of this queue (see struct flow). */
+
+    uint8_t dscp;               /* DSCP bits to mark outgoing traffic with. */
+};
+
  static struct hmap xbridges = HMAP_INITIALIZER(&xbridges);
  static struct hmap xbundles = HMAP_INITIALIZER(&xbundles);
  static struct hmap xports = HMAP_INITIALIZER(&xports);
@@ -185,16 +199,21 @@ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
  
  static struct xbridge *xbridge_lookup(const struct ofproto_dpif *);
  static struct xbundle *xbundle_lookup(const struct ofbundle *);
-static struct xport *xport_lookup(struct ofport_dpif *);
+static struct xport *xport_lookup(const struct ofport_dpif *);
  static struct xport *get_ofp_port(const struct xbridge *, ofp_port_t ofp_port);
+static struct skb_priority_to_dscp *get_skb_priority(const struct xport *,
+                                                     uint32_t skb_priority);
+static void clear_skb_priorities(struct xport *);
+static bool dscp_from_skb_priority(const struct xport *, uint32_t skb_priority,
+                                   uint8_t *dscp);
  
  void
  xlate_ofproto_set(struct ofproto_dpif *ofproto, const char *name,
-                  const struct mac_learning *ml, const struct mbridge *mbridge,
+                  struct dpif *dpif, const struct mac_learning *ml,
+                  struct stp *stp, const struct mbridge *mbridge,
                    const struct dpif_sflow *sflow,
                    const struct dpif_ipfix *ipfix, enum ofp_config_flags frag,
-                  bool forward_bpdu, bool has_in_band, bool has_netflow,
-                  bool has_stp)
+                  bool forward_bpdu, bool has_in_band, bool has_netflow)
  {
      struct xbridge *xbridge = xbridge_lookup(ofproto);
  
@@ -227,13 +246,18 @@ xlate_ofproto_set(struct ofproto_dpif *ofproto, const char *name,
          xbridge->ipfix = dpif_ipfix_ref(ipfix);
      }
  
+    if (xbridge->stp != stp) {
+        stp_unref(xbridge->stp);
+        xbridge->stp = stp_ref(stp);
+    }
+
      free(xbridge->name);
      xbridge->name = xstrdup(name);
  
+    xbridge->dpif = dpif;
      xbridge->forward_bpdu = forward_bpdu;
      xbridge->has_in_band = has_in_band;
      xbridge->has_netflow = has_netflow;
-    xbridge->has_stp = has_stp;
      xbridge->frag = frag;
  }
  
@@ -330,10 +354,13 @@ xlate_ofport_set(struct ofproto_dpif *ofproto, struct ofbundle *ofbundle,
                   struct ofport_dpif *ofport, ofp_port_t ofp_port,
                   odp_port_t odp_port, const struct netdev *netdev,
                   const struct cfm *cfm, const struct bfd *bfd,
-                 struct ofport_dpif *peer, enum ofputil_port_config config,
-                 enum stp_state stp_state, bool is_tunnel, bool may_enable)
+                 struct ofport_dpif *peer, int stp_port_no,
+                 const struct ofproto_port_queue *qdscp_list, size_t n_qdscp,
+                 enum ofputil_port_config config, bool is_tunnel,
+                 bool may_enable)
  {
      struct xport *xport = xport_lookup(ofport);
+    size_t i;
  
      if (!xport) {
          xport = xzalloc(sizeof *xport);
@@ -341,6 +368,7 @@ xlate_ofport_set(struct ofproto_dpif *ofproto, struct ofbundle *ofbundle,
          xport->xbridge = xbridge_lookup(ofproto);
          xport->ofp_port = ofp_port;
  
+        hmap_init(&xport->skb_priorities);
          hmap_insert(&xports, &xport->hmap_node, hash_pointer(ofport, 0));
          hmap_insert(&xport->xbridge->xports, &xport->ofp_node,
                      hash_ofp_port(xport->ofp_port));
@@ -349,7 +377,7 @@ xlate_ofport_set(struct ofproto_dpif *ofproto, struct ofbundle *ofbundle,
      ovs_assert(xport->ofp_port == ofp_port);
  
      xport->config = config;
-    xport->stp_state = stp_state;
+    xport->stp_port_no = stp_port_no;
      xport->is_tunnel = is_tunnel;
      xport->may_enable = may_enable;
      xport->odp_port = odp_port;
@@ -372,7 +400,7 @@ xlate_ofport_set(struct ofproto_dpif *ofproto, struct ofbundle *ofbundle,
      if (xport->peer) {
          xport->peer->peer = NULL;
      }
-    xport->peer = peer ? xport_lookup(peer) : NULL;
+    xport->peer = xport_lookup(peer);
      if (xport->peer) {
          xport->peer->peer = xport;
      }
@@ -380,10 +408,27 @@ xlate_ofport_set(struct ofproto_dpif *ofproto, struct ofbundle *ofbundle,
      if (xport->xbundle) {
          list_remove(&xport->bundle_node);
      }
-    xport->xbundle = ofbundle ? xbundle_lookup(ofbundle) : NULL;
+    xport->xbundle = xbundle_lookup(ofbundle);
      if (xport->xbundle) {
          list_insert(&xport->xbundle->xports, &xport->bundle_node);
      }
+
+    clear_skb_priorities(xport);
+    for (i = 0; i < n_qdscp; i++) {
+        struct skb_priority_to_dscp *pdscp;
+        uint32_t skb_priority;
+
+        if (dpif_queue_to_priority(xport->xbridge->dpif, qdscp_list[i].queue,
+                                   &skb_priority)) {
+            continue;
+        }
+
+        pdscp = xmalloc(sizeof *pdscp);
+        pdscp->skb_priority = skb_priority;
+        pdscp->dscp = (qdscp_list[i].dscp << 2) & IP_DSCP_MASK;
+        hmap_insert(&xport->skb_priorities, &pdscp->hmap_node,
+                    hash_int(pdscp->skb_priority, 0));
+    }
  }
  
  void
@@ -404,6 +449,9 @@ xlate_ofport_remove(struct ofport_dpif *ofport)
          list_remove(&xport->bundle_node);
      }
  
+    clear_skb_priorities(xport);
+    hmap_destroy(&xport->skb_priorities);
+
      hmap_remove(&xports, &xport->hmap_node);
      hmap_remove(&xport->xbridge->xports, &xport->ofp_node);
  
@@ -413,11 +461,102 @@ xlate_ofport_remove(struct ofport_dpif *ofport)
      free(xport);
  }
  
+/* Given a datpath, packet, and flow metadata ('backer', 'packet', and 'key'
+ * respectively), populates 'flow' with the result of odp_flow_key_to_flow().
+ * Optionally, if nonnull, populates 'fitnessp' with the fitness of 'flow' as
+ * returned by odp_flow_key_to_flow().  Also, optionally populates 'ofproto'
+ * with the ofproto_dpif, and 'odp_in_port' with the datapath in_port, that
+ * 'packet' ingressed.
+ *
+ * If 'ofproto' is nonnull, requires 'flow''s in_port to exist.  Otherwise sets
+ * 'flow''s in_port to OFPP_NONE.
+ *
+ * This function does post-processing on data returned from
+ * odp_flow_key_to_flow() to help make VLAN splinters transparent to the rest
+ * of the upcall processing logic.  In particular, if the extracted in_port is
+ * a VLAN splinter port, it replaces flow->in_port by the "real" port, sets
+ * flow->vlan_tci correctly for the VLAN of the VLAN splinter port, and pushes
+ * a VLAN header onto 'packet' (if it is nonnull).
+ *
+ * Similarly, this function also includes some logic to help with tunnels.  It
+ * may modify 'flow' as necessary to make the tunneling implementation
+ * transparent to the upcall processing logic.
+ *
+ * Returns 0 if successful, ENODEV if the parsed flow has no associated ofport,
+ * or some other positive errno if there are other problems. */
+int
+xlate_receive(const struct dpif_backer *backer, struct ofpbuf *packet,
+              const struct nlattr *key, size_t key_len,
+              struct flow *flow, enum odp_key_fitness *fitnessp,
+              struct ofproto_dpif **ofproto, odp_port_t *odp_in_port)
+{
+    enum odp_key_fitness fitness;
+    const struct xport *xport;
+    int error = ENODEV;
+
+    fitness = odp_flow_key_to_flow(key, key_len, flow);
+    if (fitness == ODP_FIT_ERROR) {
+        error = EINVAL;
+        goto exit;
+    }
+
+    if (odp_in_port) {
+        *odp_in_port = flow->in_port.odp_port;
+    }
+
+    xport = xport_lookup(tnl_port_should_receive(flow)
+            ? tnl_port_receive(flow)
+            : odp_port_to_ofport(backer, flow->in_port.odp_port));
+
+    flow->in_port.ofp_port = xport ? xport->ofp_port : OFPP_NONE;
+    if (!xport) {
+        goto exit;
+    }
+
+    if (vsp_adjust_flow(xport->xbridge->ofproto, flow)) {
+        if (packet) {
+            /* Make the packet resemble the flow, so that it gets sent to
+             * an OpenFlow controller properly, so that it looks correct
+             * for sFlow, and so that flow_extract() will get the correct
+             * vlan_tci if it is called on 'packet'.
+             *
+             * The allocated space inside 'packet' probably also contains
+             * 'key', that is, both 'packet' and 'key' are probably part of
+             * a struct dpif_upcall (see the large comment on that
+             * structure definition), so pushing data on 'packet' is in
+             * general not a good idea since it could overwrite 'key' or
+             * free it as a side effect.  However, it's OK in this special
+             * case because we know that 'packet' is inside a Netlink
+             * attribute: pushing 4 bytes will just overwrite the 4-byte
+             * "struct nlattr", which is fine since we don't need that
+             * header anymore. */
+            eth_push_vlan(packet, flow->vlan_tci);
+        }
+        /* We can't reproduce 'key' from 'flow'. */
+        fitness = fitness == ODP_FIT_PERFECT ? ODP_FIT_TOO_MUCH : fitness;
+    }
+    error = 0;
+
+    if (ofproto) {
+        *ofproto = xport->xbridge->ofproto;
+    }
+
+exit:
+    if (fitnessp) {
+        *fitnessp = fitness;
+    }
+    return error;
+}
+
  static struct xbridge *
  xbridge_lookup(const struct ofproto_dpif *ofproto)
  {
      struct xbridge *xbridge;
  
+    if (!ofproto) {
+        return NULL;
+    }
+
      HMAP_FOR_EACH_IN_BUCKET (xbridge, hmap_node, hash_pointer(ofproto, 0),
                               &xbridges) {
          if (xbridge->ofproto == ofproto) {
@@ -432,6 +571,10 @@ xbundle_lookup(const struct ofbundle *ofbundle)
  {
      struct xbundle *xbundle;
  
+    if (!ofbundle) {
+        return NULL;
+    }
+
      HMAP_FOR_EACH_IN_BUCKET (xbundle, hmap_node, hash_pointer(ofbundle, 0),
                               &xbundles) {
          if (xbundle->ofbundle == ofbundle) {
@@ -442,10 +585,14 @@ xbundle_lookup(const struct ofbundle *ofbundle)
  }
  
  static struct xport *
-xport_lookup(struct ofport_dpif *ofport)
+xport_lookup(const struct ofport_dpif *ofport)
  {
      struct xport *xport;
  
+    if (!ofport) {
+        return NULL;
+    }
+
      HMAP_FOR_EACH_IN_BUCKET (xport, hmap_node, hash_pointer(ofport, 0),
                               &xports) {
          if (xport->ofport == ofport) {
@@ -455,6 +602,60 @@ xport_lookup(struct ofport_dpif *ofport)
      return NULL;
  }
  
+static struct stp_port *
+xport_get_stp_port(const struct xport *xport)
+{
+    return xport->xbridge->stp && xport->stp_port_no
+        ? stp_get_port(xport->xbridge->stp, xport->stp_port_no)
+        : NULL;
+}
+
+static enum stp_state
+xport_stp_learn_state(const struct xport *xport)
+{
+    struct stp_port *sp = xport_get_stp_port(xport);
+    return stp_learn_in_state(sp ? stp_port_get_state(sp) : STP_DISABLED);
+}
+
+static bool
+xport_stp_forward_state(const struct xport *xport)
+{
+    struct stp_port *sp = xport_get_stp_port(xport);
+    return stp_forward_in_state(sp ? stp_port_get_state(sp) : STP_DISABLED);
+}
+
+/* Returns true if STP should process 'flow'.  Sets fields in 'wc' that
+ * were used to make the determination.*/
+static bool
+stp_should_process_flow(const struct flow *flow, struct flow_wildcards *wc)
+{
+    memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst);
+    return eth_addr_equals(flow->dl_dst, eth_addr_stp);
+}
+
+static void
+stp_process_packet(const struct xport *xport, const struct ofpbuf *packet)
+{
+    struct stp_port *sp = xport_get_stp_port(xport);
+    struct ofpbuf payload = *packet;
+    struct eth_header *eth = payload.data;
+
+    /* Sink packets on ports that have STP disabled when the bridge has
+     * STP enabled. */
+    if (!sp || stp_port_get_state(sp) == STP_DISABLED) {
+        return;
+    }
+
+    /* Trim off padding on payload. */
+    if (payload.size > ntohs(eth->eth_type) + ETH_HEADER_LEN) {
+        payload.size = ntohs(eth->eth_type) + ETH_HEADER_LEN;
+    }
+
+    if (ofpbuf_try_pull(&payload, ETH_HEADER_LEN + LLC_HEADER_LEN)) {
+        stp_received_bpdu(sp, payload.data, payload.size);
+    }
+}
+
  static struct xport *
  get_ofp_port(const struct xbridge *xbridge, ofp_port_t ofp_port)
  {
@@ -773,9 +974,8 @@ output_normal(struct xlate_ctx *ctx, const struct xbundle *out_xbundle,
          struct ofport_dpif *ofport;
  
          ofport = bond_choose_output_slave(out_xbundle->bond, &ctx->xin->flow,
-                                          &ctx->xout->wc, vid,
-                                          &ctx->xout->tags);
-        xport = ofport ? xport_lookup(ofport) : NULL;
+                                          &ctx->xout->wc, vid);
+        xport = xport_lookup(ofport);
  
          if (!xport) {
              /* No slaves enabled, so drop packet. */
@@ -837,8 +1037,9 @@ update_learning_table(const struct xbridge *xbridge,
          return;
      }
  
+    ovs_rwlock_wrlock(&xbridge->ml->rwlock);
      if (!mac_learning_may_learn(xbridge->ml, flow->dl_src, vlan)) {
-        return;
+        goto out;
      }
  
      mac = mac_learning_insert(xbridge->ml, flow->dl_src, vlan);
@@ -848,11 +1049,11 @@ update_learning_table(const struct xbridge *xbridge,
          if (!in_xbundle->bond) {
              mac_entry_set_grat_arp_lock(mac);
          } else if (mac_entry_is_grat_arp_locked(mac)) {
-            return;
+            goto out;
          }
      }
  
-    if (mac_entry_is_new(mac) || mac->port.p != in_xbundle->ofbundle) {
+    if (mac->port.p != in_xbundle->ofbundle) {
          /* The log messages here could actually be useful in debugging,
           * so keep the rate limit relatively high. */
          static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(30, 300);
@@ -862,8 +1063,10 @@ update_learning_table(const struct xbridge *xbridge,
                      in_xbundle->name, vlan);
  
          mac->port.p = in_xbundle->ofbundle;
-        mac_learning_changed(xbridge->ml, mac);
+        mac_learning_changed(xbridge->ml);
      }
+out:
+    ovs_rwlock_unlock(&xbridge->ml->rwlock);
  }
  
  /* Determines whether packets in 'flow' within 'xbridge' should be forwarded or
@@ -899,7 +1102,7 @@ is_admissible(struct xlate_ctx *ctx, struct xport *in_port,
          struct mac_entry *mac;
  
          switch (bond_check_admissibility(in_xbundle->bond, in_port->ofport,
-                                         flow->dl_dst, &ctx->xout->tags)) {
+                                         flow->dl_dst)) {
          case BV_ACCEPT:
              break;
  
@@ -908,14 +1111,17 @@ is_admissible(struct xlate_ctx *ctx, struct xport *in_port,
              return false;
  
          case BV_DROP_IF_MOVED:
-            mac = mac_learning_lookup(xbridge->ml, flow->dl_src, vlan, NULL);
+            ovs_rwlock_rdlock(&xbridge->ml->rwlock);
+            mac = mac_learning_lookup(xbridge->ml, flow->dl_src, vlan);
              if (mac && mac->port.p != in_xbundle->ofbundle &&
                  (!is_gratuitous_arp(flow, &ctx->xout->wc)
                   || mac_entry_is_grat_arp_locked(mac))) {
+                ovs_rwlock_unlock(&xbridge->ml->rwlock);
                  xlate_report(ctx, "SLB bond thinks this packet looped back, "
                              "dropping");
                  return false;
              }
+            ovs_rwlock_unlock(&xbridge->ml->rwlock);
              break;
          }
      }
@@ -991,8 +1197,8 @@ xlate_normal(struct xlate_ctx *ctx)
      }
  
      /* Determine output bundle. */
-    mac = mac_learning_lookup(ctx->xbridge->ml, flow->dl_dst, vlan,
-                              &ctx->xout->tags);
+    ovs_rwlock_rdlock(&ctx->xbridge->ml->rwlock);
+    mac = mac_learning_lookup(ctx->xbridge->ml, flow->dl_dst, vlan);
      if (mac) {
          struct xbundle *mac_xbundle = xbundle_lookup(mac->port.p);
          if (mac_xbundle && mac_xbundle != in_xbundle) {
@@ -1017,6 +1223,7 @@ xlate_normal(struct xlate_ctx *ctx)
          }
          ctx->xout->nf_output_iface = NF_OUT_FLOOD;
      }
+    ovs_rwlock_unlock(&ctx->xbridge->ml->rwlock);
  }
  
  /* Compose SAMPLE action for sFlow or IPFIX.  The given probability is
@@ -1032,15 +1239,19 @@ compose_sample_action(const struct xbridge *xbridge,
                        const size_t cookie_size)
  {
      size_t sample_offset, actions_offset;
+    odp_port_t odp_port;
      int cookie_offset;
+    uint32_t pid;
  
      sample_offset = nl_msg_start_nested(odp_actions, OVS_ACTION_ATTR_SAMPLE);
  
      nl_msg_put_u32(odp_actions, OVS_SAMPLE_ATTR_PROBABILITY, probability);
  
      actions_offset = nl_msg_start_nested(odp_actions, OVS_SAMPLE_ATTR_ACTIONS);
-    cookie_offset = put_userspace_action(xbridge->ofproto, odp_actions, flow,
-                                         cookie, cookie_size);
+
+    odp_port = ofp_port_to_odp_port(xbridge, flow->in_port.ofp_port);
+    pid = dpif_port_get_pid(xbridge->dpif, odp_port);
+    cookie_offset = odp_put_userspace_action(pid, cookie, cookie_size, odp_actions);
  
      nl_msg_end_nested(odp_actions, actions_offset);
      nl_msg_end_nested(odp_actions, sample_offset);
@@ -1207,9 +1418,9 @@ process_special(struct xlate_ctx *ctx, const struct flow *flow,
              lacp_process_packet(xport->xbundle->lacp, xport->ofport, packet);
          }
          return SLOW_LACP;
-    } else if (xbridge->has_stp && stp_should_process_flow(flow, wc)) {
+    } else if (xbridge->stp && stp_should_process_flow(flow, wc)) {
          if (packet) {
-            stp_process_packet(xport->ofport, packet);
+            stp_process_packet(xport, packet);
          }
          return SLOW_STP;
      } else {
@@ -1240,7 +1451,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
      } else if (xport->config & OFPUTIL_PC_NO_FWD) {
          xlate_report(ctx, "OFPPC_NO_FWD set, skipping output");
          return;
-    } else if (check_stp && !stp_forward_in_state(xport->stp_state)) {
+    } else if (check_stp && !xport_stp_forward_state(xport)) {
          xlate_report(ctx, "STP not in forwarding state, skipping output");
          return;
      }
@@ -1266,7 +1477,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
          if (special) {
              ctx->xout->slow = special;
          } else if (may_receive(peer, ctx)) {
-            if (stp_forward_in_state(peer->stp_state)) {
+            if (xport_stp_forward_state(peer)) {
                  xlate_table_action(ctx, flow->in_port.ofp_port, 0, true);
              } else {
                  /* Forwarding is disabled by STP.  Let OFPP_NORMAL and the
@@ -1296,8 +1507,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
      flow_skb_mark = flow->skb_mark;
      flow_nw_tos = flow->nw_tos;
  
-    if (ofproto_dpif_dscp_from_priority(xport->ofport, flow->skb_priority,
-                                        &dscp)) {
+    if (dscp_from_skb_priority(xport, flow->skb_priority, &dscp)) {
          wc->masks.nw_tos |= IP_ECN_MASK;
          flow->nw_tos &= ~IP_DSCP_MASK;
          flow->nw_tos |= dscp;
@@ -1408,10 +1618,6 @@ xlate_table_action(struct xlate_ctx *ctx,
                                           &ctx->xin->flow, &ctx->xout->wc,
                                           table_id);
  
-        ctx->xout->tags |= calculate_flow_tag(ctx->xbridge->ofproto,
-                                              &ctx->xin->flow, ctx->table_id,
-                                              rule);
-
          /* Restore the original input port.  Otherwise OFPP_NORMAL and
           * OFPP_IN_PORT will have surprising behavior. */
          ctx->xin->flow.in_port.ofp_port = old_in_port;
@@ -1434,7 +1640,6 @@ xlate_table_action(struct xlate_ctx *ctx,
  
          VLOG_ERR_RL(&recurse_rl, "resubmit actions recursed over %d times",
                      MAX_RESUBMIT_RECURSION);
-        ctx->max_resubmit_trigger = true;
      }
  }
  
@@ -1606,6 +1811,7 @@ compose_set_mpls_ttl_action(struct xlate_ctx *ctx, uint8_t ttl)
          return true;
      }
  
+    ctx->xout->wc.masks.mpls_lse |= htonl(MPLS_TTL_MASK);
      set_mpls_lse_ttl(&ctx->xin->flow.mpls_lse, ttl);
      return false;
  }
@@ -1710,8 +1916,7 @@ xlate_enqueue_action(struct xlate_ctx *ctx,
      int error;
  
      /* Translate queue to priority. */
-    error = ofproto_dpif_queue_to_priority(ctx->xbridge->ofproto, queue_id,
-                                           &priority);
+    error = dpif_queue_to_priority(ctx->xbridge->dpif, queue_id, &priority);
      if (error) {
          /* Fall back to ordinary output action. */
          xlate_output_action(ctx, enqueue->port, 0, false);
@@ -1744,8 +1949,7 @@ xlate_set_queue_action(struct xlate_ctx *ctx, uint32_t queue_id)
  {
      uint32_t skb_priority;
  
-    if (!ofproto_dpif_queue_to_priority(ctx->xbridge->ofproto, queue_id,
-                                        &skb_priority)) {
+    if (!dpif_queue_to_priority(ctx->xbridge->dpif, queue_id, &skb_priority)) {
          ctx->xin->flow.skb_priority = skb_priority;
      } else {
          /* Couldn't translate queue to a priority.  Nothing to do.  A warning
@@ -1785,7 +1989,8 @@ xlate_bundle_action(struct xlate_ctx *ctx,
                            slave_enabled_cb,
                            CONST_CAST(struct xbridge *, ctx->xbridge));
      if (bundle->dst.field) {
-        nxm_reg_load(&bundle->dst, ofp_to_u16(port), &ctx->xin->flow);
+        nxm_reg_load(&bundle->dst, ofp_to_u16(port), &ctx->xin->flow,
+                     &ctx->xout->wc);
      } else {
          xlate_output_action(ctx, port, 0, false);
      }
@@ -1795,11 +2000,8 @@ static void
  xlate_learn_action(struct xlate_ctx *ctx,
                     const struct ofpact_learn *learn)
  {
-    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
-    struct ofputil_flow_mod fm;
-    uint64_t ofpacts_stub[1024 / 8];
+    struct ofputil_flow_mod *fm;
      struct ofpbuf ofpacts;
-    int error;
  
      ctx->xout->has_learn = true;
  
@@ -1809,16 +2011,11 @@ xlate_learn_action(struct xlate_ctx *ctx,
          return;
      }
  
-    ofpbuf_use_stack(&ofpacts, ofpacts_stub, sizeof ofpacts_stub);
-    learn_execute(learn, &ctx->xin->flow, &fm, &ofpacts);
-
-    error = ofproto_dpif_flow_mod(ctx->xbridge->ofproto, &fm);
-    if (error && !VLOG_DROP_WARN(&rl)) {
-        VLOG_WARN("learning action failed to modify flow table (%s)",
-                  ofperr_get_name(error));
-    }
+    fm = xmalloc(sizeof *fm);
+    ofpbuf_init(&ofpacts, 0);
+    learn_execute(learn, &ctx->xin->flow, fm, &ofpacts);
  
-    ofpbuf_uninit(&ofpacts);
+    ofproto_dpif_flow_mod(ctx->xbridge->ofproto, fm);
  }
  
  /* Reduces '*timeout' to no more than 'max'.  A value of zero in either case
@@ -1878,8 +2075,7 @@ may_receive(const struct xport *xport, struct xlate_ctx *ctx)
       * disabled.  If just learning is enabled, we need to have
       * OFPP_NORMAL and the learning action have a look at the packet
       * before we can drop it. */
-    if (!stp_forward_in_state(xport->stp_state)
-        && !stp_learn_in_state(xport->stp_state)) {
+    if (!xport_stp_forward_state(xport) && !xport_stp_learn_state(xport)) {
          return false;
      }
  
@@ -1919,7 +2115,6 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
          ctx->rule->up.evictable = false;
      }
  
- do_xlate_actions_again:
      OFPACT_FOR_EACH (a, ofpacts, ofpacts_len) {
          struct ofpact_controller *controller;
          const struct ofpact_metadata *metadata;
@@ -1946,12 +2141,14 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
              break;
  
          case OFPACT_SET_VLAN_VID:
+            wc->masks.vlan_tci |= htons(VLAN_VID_MASK | VLAN_CFI);
              flow->vlan_tci &= ~htons(VLAN_VID_MASK);
              flow->vlan_tci |= (htons(ofpact_get_SET_VLAN_VID(a)->vlan_vid)
                                 | htons(VLAN_CFI));
              break;
  
          case OFPACT_SET_VLAN_PCP:
+            wc->masks.vlan_tci |= htons(VLAN_PCP_MASK | VLAN_CFI);
              flow->vlan_tci &= ~htons(VLAN_PCP_MASK);
              flow->vlan_tci |=
                  htons((ofpact_get_SET_VLAN_PCP(a)->vlan_pcp << VLAN_PCP_SHIFT)
@@ -1959,35 +2156,42 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
              break;
  
          case OFPACT_STRIP_VLAN:
+            memset(&wc->masks.vlan_tci, 0xff, sizeof wc->masks.vlan_tci);
              flow->vlan_tci = htons(0);
              break;
  
          case OFPACT_PUSH_VLAN:
              /* XXX 802.1AD(QinQ) */
+            memset(&wc->masks.vlan_tci, 0xff, sizeof wc->masks.vlan_tci);
              flow->vlan_tci = htons(VLAN_CFI);
              break;
  
          case OFPACT_SET_ETH_SRC:
+            memset(&wc->masks.dl_src, 0xff, sizeof wc->masks.dl_src);
              memcpy(flow->dl_src, ofpact_get_SET_ETH_SRC(a)->mac, ETH_ADDR_LEN);
              break;
  
          case OFPACT_SET_ETH_DST:
+            memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst);
              memcpy(flow->dl_dst, ofpact_get_SET_ETH_DST(a)->mac, ETH_ADDR_LEN);
              break;
  
          case OFPACT_SET_IPV4_SRC:
+            memset(&wc->masks.nw_src, 0xff, sizeof wc->masks.nw_src);
              if (flow->dl_type == htons(ETH_TYPE_IP)) {
                  flow->nw_src = ofpact_get_SET_IPV4_SRC(a)->ipv4;
              }
              break;
  
          case OFPACT_SET_IPV4_DST:
+            memset(&wc->masks.nw_dst, 0xff, sizeof wc->masks.nw_dst);
              if (flow->dl_type == htons(ETH_TYPE_IP)) {
                  flow->nw_dst = ofpact_get_SET_IPV4_DST(a)->ipv4;
              }
              break;
  
          case OFPACT_SET_IPV4_DSCP:
+            wc->masks.nw_tos |= IP_DSCP_MASK;
              /* OpenFlow 1.0 only supports IPv4. */
              if (flow->dl_type == htons(ETH_TYPE_IP)) {
                  flow->nw_tos &= ~IP_DSCP_MASK;
@@ -1997,6 +2201,7 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
  
          case OFPACT_SET_L4_SRC_PORT:
              memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
+            memset(&wc->masks.tp_src, 0xff, sizeof wc->masks.tp_src);
              if (is_ip_any(flow)) {
                  flow->tp_src = htons(ofpact_get_SET_L4_SRC_PORT(a)->port);
              }
@@ -2004,6 +2209,7 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
  
          case OFPACT_SET_L4_DST_PORT:
              memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
+            memset(&wc->masks.tp_dst, 0xff, sizeof wc->masks.tp_dst);
              if (is_ip_any(flow)) {
                  flow->tp_dst = htons(ofpact_get_SET_L4_DST_PORT(a)->port);
              }
@@ -2039,7 +2245,8 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
              break;
  
          case OFPACT_STACK_POP:
-            nxm_execute_stack_pop(ofpact_get_STACK_POP(a), flow, &ctx->stack);
+            nxm_execute_stack_pop(ofpact_get_STACK_POP(a), flow, wc,
+                                  &ctx->stack);
              break;
  
          case OFPACT_PUSH_MPLS:
@@ -2064,6 +2271,7 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
              break;
  
          case OFPACT_DEC_TTL:
+            wc->masks.nw_ttl = 0xff;
              if (compose_dec_ttl(ctx, ofpact_get_DEC_TTL(a))) {
                  goto out;
              }
@@ -2120,35 +2328,10 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
          case OFPACT_GOTO_TABLE: {
              /* It is assumed that goto-table is the last action. */
              struct ofpact_goto_table *ogt = ofpact_get_GOTO_TABLE(a);
-            struct rule_dpif *rule;
  
              ovs_assert(ctx->table_id < ogt->table_id);
-
-            ctx->table_id = ogt->table_id;
-
-            /* Look up a flow from the new table. */
-            rule = rule_dpif_lookup_in_table(ctx->xbridge->ofproto, flow, wc,
-                                             ctx->table_id);
-
-            ctx->xout->tags |= calculate_flow_tag(ctx->xbridge->ofproto,
-                                                  &ctx->xin->flow,
-                                                  ctx->table_id, rule);
-
-            rule = ctx_rule_hooks(ctx, rule, true);
-
-            if (rule) {
-                if (ctx->rule) {
-                    ctx->rule->up.evictable = was_evictable;
-                }
-                ctx->rule = rule;
-                was_evictable = rule->up.evictable;
-                rule->up.evictable = false;
-
-                /* Tail recursion removal. */
-                ofpacts = rule->up.ofpacts;
-                ofpacts_len = rule->up.ofpacts_len;
-                goto do_xlate_actions_again;
-            }
+            xlate_table_action(ctx, ctx->xin->flow.in_port.ofp_port,
+                               ogt->table_id, true);
              break;
          }
  
@@ -2213,7 +2396,6 @@ void
  xlate_out_copy(struct xlate_out *dst, const struct xlate_out *src)
  {
      dst->wc = src->wc;
-    dst->tags = src->tags;
      dst->slow = src->slow;
      dst->has_learn = src->has_learn;
      dst->has_normal = src->has_normal;
@@ -2227,6 +2409,41 @@ xlate_out_copy(struct xlate_out *dst, const struct xlate_out *src)
                 src->odp_actions.size);
  }
  \f
+static struct skb_priority_to_dscp *
+get_skb_priority(const struct xport *xport, uint32_t skb_priority)
+{
+    struct skb_priority_to_dscp *pdscp;
+    uint32_t hash;
+
+    hash = hash_int(skb_priority, 0);
+    HMAP_FOR_EACH_IN_BUCKET (pdscp, hmap_node, hash, &xport->skb_priorities) {
+        if (pdscp->skb_priority == skb_priority) {
+            return pdscp;
+        }
+    }
+    return NULL;
+}
+
+static bool
+dscp_from_skb_priority(const struct xport *xport, uint32_t skb_priority,
+                       uint8_t *dscp)
+{
+    struct skb_priority_to_dscp *pdscp = get_skb_priority(xport, skb_priority);
+    *dscp = pdscp ? pdscp->dscp : 0;
+    return pdscp != NULL;
+}
+
+static void
+clear_skb_priorities(struct xport *xport)
+{
+    struct skb_priority_to_dscp *pdscp, *next;
+
+    HMAP_FOR_EACH_SAFE (pdscp, next, hmap_node, &xport->skb_priorities) {
+        hmap_remove(&xport->skb_priorities, &pdscp->hmap_node);
+        free(pdscp);
+    }
+}
+
  static bool
  actions_output_to_local_port(const struct xlate_ctx *ctx)
  {
@@ -2249,11 +2466,6 @@ actions_output_to_local_port(const struct xlate_ctx *ctx)
  void
  xlate_actions(struct xlate_in *xin, struct xlate_out *xout)
  {
-    /* Normally false.  Set to true if we ever hit MAX_RESUBMIT_RECURSION, so
-     * that in the future we always keep a copy of the original flow for
-     * tracing purposes. */
-    static bool hit_resubmit_limit;
-
      struct flow_wildcards *wc = &xout->wc;
      struct flow *flow = &xin->flow;
  
@@ -2289,7 +2501,6 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout)
  
      ctx.xin = xin;
      ctx.xout = xout;
-    ctx.xout->tags = 0;
      ctx.xout->slow = 0;
      ctx.xout->has_learn = false;
      ctx.xout->has_normal = false;
@@ -2319,13 +2530,15 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout)
  
      if (tnl_port_should_receive(&ctx.xin->flow)) {
          memset(&wc->masks.tunnel, 0xff, sizeof wc->masks.tunnel);
+        /* skb_mark is currently used only by tunnels but that will likely
+         * change in the future. */
+        memset(&wc->masks.skb_mark, 0xff, sizeof wc->masks.skb_mark);
      }
      if (ctx.xbridge->has_netflow) {
          netflow_mask_wc(flow, wc);
      }
  
      ctx.recurse = 0;
-    ctx.max_resubmit_trigger = false;
      ctx.orig_skb_priority = flow->skb_priority;
      ctx.table_id = 0;
      ctx.exit = false;
@@ -2342,7 +2555,7 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout)
  
      ofpbuf_use_stub(&ctx.stack, ctx.init_stack, sizeof ctx.init_stack);
  
-    if (mbridge_has_mirrors(ctx.xbridge->mbridge) || hit_resubmit_limit) {
+    if (mbridge_has_mirrors(ctx.xbridge->mbridge)) {
          /* Do this conditionally because the copy is expensive enough that it
           * shows up in profiles. */
          orig_flow = *flow;
@@ -2376,7 +2589,6 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout)
      if (special) {
          ctx.xout->slow = special;
      } else {
-        static struct vlog_rate_limit trace_rl = VLOG_RATE_LIMIT_INIT(1, 1);
          size_t sample_actions_len;
  
          if (flow->in_port.ofp_port
@@ -2395,27 +2607,11 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout)
  
              /* We've let OFPP_NORMAL and the learning action look at the
               * packet, so drop it now if forwarding is disabled. */
-            if (in_port && !stp_forward_in_state(in_port->stp_state)) {
+            if (in_port && !xport_stp_forward_state(in_port)) {
                  ctx.xout->odp_actions.size = sample_actions_len;
              }
          }
  
-        if (ctx.max_resubmit_trigger && !ctx.xin->resubmit_hook) {
-            if (!hit_resubmit_limit) {
-                /* We didn't record the original flow.  Make sure we do from
-                 * now on. */
-                hit_resubmit_limit = true;
-            } else if (!VLOG_DROP_ERR(&trace_rl)) {
-                struct ds ds = DS_EMPTY_INITIALIZER;
-
-                ofproto_trace(ctx.xbridge->ofproto, &orig_flow,
-                              ctx.xin->packet, &ds);
-                VLOG_ERR("Trace triggered by excessive resubmit "
-                         "recursion:\n%s", ds_cstr(&ds));
-                ds_destroy(&ds);
-            }
-        }
-
          if (ctx.xbridge->has_in_band
              && in_band_must_output_to_local_port(flow)
              && !actions_output_to_local_port(&ctx)) {
diff --git a/ofproto/ofproto-dpif-xlate.h b/ofproto/ofproto-dpif-xlate.h

index 4cb8530..9f8ff44 100644 (file)
--- a/ofproto/ofproto-dpif-xlate.h
+++ b/ofproto/ofproto-dpif-xlate.h
@@ -21,10 +21,10 @@
  #include "ofpbuf.h"
  #include "ofproto-dpif-mirror.h"
  #include "ofproto-dpif.h"
-#include "tag.h"
  
  struct bfd;
  struct bond;
+struct dpif;
  struct lacp;
  struct dpif_ipfix;
  struct dpif_sflow;
@@ -40,7 +40,6 @@ struct xlate_out {
       * set. */
      struct flow_wildcards wc;
  
-    tag_type tags;              /* Tags associated with actions. */
      enum slow_path_reason slow; /* 0 if fast path may be used. */
      bool has_learn;             /* Actions include NXAST_LEARN? */
      bool has_normal;            /* Actions output to OFPP_NORMAL? */
@@ -112,10 +111,11 @@ struct xlate_in {
  };
  
  void xlate_ofproto_set(struct ofproto_dpif *, const char *name,
-                       const struct mac_learning *, const struct mbridge *,
+                       struct dpif *, const struct mac_learning *,
+                       struct stp *, const struct mbridge *,
                         const struct dpif_sflow *, const struct dpif_ipfix *,
                         enum ofp_config_flags, bool forward_bpdu,
-                       bool has_in_band, bool has_netflow, bool has_stp);
+                       bool has_in_band, bool has_netflow);
  void xlate_remove_ofproto(struct ofproto_dpif *);
  
  void xlate_bundle_set(struct ofproto_dpif *, struct ofbundle *,
@@ -129,10 +129,16 @@ void xlate_ofport_set(struct ofproto_dpif *, struct ofbundle *,
                        struct ofport_dpif *, ofp_port_t, odp_port_t,
                        const struct netdev *, const struct cfm *,
                        const struct bfd *, struct ofport_dpif *peer,
-                      enum ofputil_port_config, enum stp_state, bool is_tunnel,
+                      int stp_port_no, const struct ofproto_port_queue *qdscp,
+                      size_t n_qdscp, enum ofputil_port_config, bool is_tunnel,
                        bool may_enable);
  void xlate_ofport_remove(struct ofport_dpif *);
  
+int xlate_receive(const struct dpif_backer *, struct ofpbuf *packet,
+                  const struct nlattr *key, size_t key_len,
+                  struct flow *, enum odp_key_fitness *,
+                  struct ofproto_dpif **, odp_port_t *odp_in_port);
+
  void xlate_actions(struct xlate_in *, struct xlate_out *);
  void xlate_in_init(struct xlate_in *, struct ofproto_dpif *,
                     const struct flow *, struct rule_dpif *,
@@ -140,5 +146,4 @@ void xlate_in_init(struct xlate_in *, struct ofproto_dpif *,
  void xlate_out_uninit(struct xlate_out *);
  void xlate_actions_for_side_effects(struct xlate_in *);
  void xlate_out_copy(struct xlate_out *dst, const struct xlate_out *src);
-
  #endif /* ofproto-dpif-xlate.h */
diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c

index 839de69..a8e5cd5 100644 (file)
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -71,6 +71,7 @@ COVERAGE_DEFINE(facet_revalidate);
  COVERAGE_DEFINE(facet_unexpected);
  COVERAGE_DEFINE(facet_suppress);
  COVERAGE_DEFINE(subfacet_install_fail);
+COVERAGE_DEFINE(flow_mod_overflow);
  
  /* Number of implemented OpenFlow tables. */
  enum { N_TABLES = 255 };
@@ -85,7 +86,6 @@ static struct rule_dpif *rule_dpif_lookup(struct ofproto_dpif *,
                                            struct flow_wildcards *wc);
  
  static void rule_get_stats(struct rule *, uint64_t *packets, uint64_t *bytes);
-static void rule_invalidate(const struct rule_dpif *);
  
  struct ofbundle {
      struct hmap_node hmap_node; /* In struct ofproto's "bundles" hmap. */
@@ -289,7 +289,6 @@ struct ofport_dpif {
      struct list bundle_node;    /* In struct ofbundle's "ports" list. */
      struct cfm *cfm;            /* Connectivity Fault Management, if any. */
      struct bfd *bfd;            /* BFD, if any. */
-    tag_type tag;               /* Tag associated with this port. */
      bool may_enable;            /* May be enabled in bonds. */
      bool is_tunnel;             /* This port is a tunnel. */
      long long int carrier_seq;  /* Carrier status changes. */
@@ -300,7 +299,9 @@ struct ofport_dpif {
      enum stp_state stp_state;   /* Always STP_DISABLED if STP not in use. */
      long long int stp_state_entered;
  
-    struct hmap priorities;     /* Map of attached 'priority_to_dscp's. */
+    /* Queue to DSCP mapping. */
+    struct ofproto_port_queue *qdscp;
+    size_t n_qdscp;
  
      /* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.)
       *
@@ -312,16 +313,6 @@ struct ofport_dpif {
      int vlandev_vid;
  };
  
-/* Node in 'ofport_dpif''s 'priorities' map.  Used to maintain a map from
- * 'priority' (the datapath's term for QoS queue) to the dscp bits which all
- * traffic egressing the 'ofport' with that priority should be marked with. */
-struct priority_to_dscp {
-    struct hmap_node hmap_node; /* Node in 'ofport_dpif''s 'priorities' map. */
-    uint32_t priority;          /* Priority of this queue (see struct flow). */
-
-    uint8_t dscp;               /* DSCP bits to mark outgoing traffic with. */
-};
-
  /* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.)
   *
   * This is deprecated.  It is only for compatibility with broken device drivers
@@ -336,7 +327,6 @@ struct vlan_splinter {
      int vid;
  };
  
-static bool vsp_adjust_flow(const struct ofproto_dpif *, struct flow *);
  static void vsp_remove(struct ofport_dpif *);
  static void vsp_add(struct ofport_dpif *, ofp_port_t realdev_ofp_port, int vid);
  
@@ -357,26 +347,15 @@ static void port_run_fast(struct ofport_dpif *);
  static void port_wait(struct ofport_dpif *);
  static int set_bfd(struct ofport *, const struct smap *);
  static int set_cfm(struct ofport *, const struct cfm_settings *);
-static void ofport_clear_priorities(struct ofport_dpif *);
  static void ofport_update_peer(struct ofport_dpif *);
  static void run_fast_rl(void);
+static int run_fast(struct ofproto *);
  
  struct dpif_completion {
      struct list list_node;
      struct ofoperation *op;
  };
  
-/* Extra information about a classifier table.
- * Currently used just for optimized flow revalidation. */
-struct table_dpif {
-    /* If either of these is nonnull, then this table has a form that allows
-     * flows to be tagged to avoid revalidating most flows for the most common
-     * kinds of flow table changes. */
-    struct cls_table *catchall_table; /* Table that wildcards all fields. */
-    struct cls_table *other_table;    /* Table with any other wildcard set. */
-    uint32_t basis;                   /* Keeps each table's tags separate. */
-};
-
  /* Reasons that we might need to revalidate every facet, and corresponding
   * coverage counters.
   *
@@ -388,14 +367,18 @@ struct table_dpif {
  enum revalidate_reason {
      REV_RECONFIGURE = 1,       /* Switch configuration changed. */
      REV_STP,                   /* Spanning tree protocol port status change. */
+    REV_BOND,                  /* Bonding changed. */
      REV_PORT_TOGGLED,          /* Port enabled or disabled by CFM, LACP, ...*/
      REV_FLOW_TABLE,            /* Flow table changed. */
+    REV_MAC_LEARNING,          /* Mac learning changed. */
      REV_INCONSISTENCY          /* Facet self-check failed. */
  };
  COVERAGE_DEFINE(rev_reconfigure);
  COVERAGE_DEFINE(rev_stp);
+COVERAGE_DEFINE(rev_bond);
  COVERAGE_DEFINE(rev_port_toggled);
  COVERAGE_DEFINE(rev_flow_table);
+COVERAGE_DEFINE(rev_mac_learning);
  COVERAGE_DEFINE(rev_inconsistency);
  
  /* Drop keys are odp flow keys which have drop flows installed in the kernel.
@@ -418,13 +401,14 @@ struct dpif_backer {
      int refcount;
      struct dpif *dpif;
      struct timer next_expiration;
-    struct hmap odp_to_ofport_map; /* ODP port to ofport mapping. */
+
+    struct ovs_rwlock odp_to_ofport_lock;
+    struct hmap odp_to_ofport_map OVS_GUARDED; /* ODP port to ofport map. */
  
      struct simap tnl_backers;      /* Set of dpif ports backing tunnels. */
  
      /* Facet revalidation flags applying to facets which use this backer. */
      enum revalidate_reason need_revalidate; /* Revalidate every facet. */
-    struct tag_set revalidate_set; /* Revalidate only matching facets. */
  
      struct hmap drop_keys; /* Set of dropped odp keys. */
      bool recv_set_enable; /* Enables or disables receiving packets. */
@@ -463,8 +447,6 @@ struct dpif_backer {
  static struct shash all_dpif_backers = SHASH_INITIALIZER(&all_dpif_backers);
  
  static void drop_key_clear(struct dpif_backer *);
-static struct ofport_dpif *
-odp_port_to_ofport(const struct dpif_backer *, odp_port_t odp_port);
  static void update_moving_averages(struct dpif_backer *backer);
  
  struct ofproto_dpif {
@@ -490,9 +472,6 @@ struct ofproto_dpif {
      struct classifier facets;     /* Contains 'struct facet's. */
      long long int consistency_rl;
  
-    /* Revalidation. */
-    struct table_dpif tables[N_TABLES];
-
      /* Support for debugging async flow mods. */
      struct list completions;
  
@@ -504,8 +483,9 @@ struct ofproto_dpif {
      long long int stp_last_tick;
  
      /* VLAN splinters. */
-    struct hmap realdev_vid_map; /* (realdev,vid) -> vlandev. */
-    struct hmap vlandev_map;     /* vlandev -> (realdev,vid). */
+    struct ovs_mutex vsp_mutex;
+    struct hmap realdev_vid_map OVS_GUARDED; /* (realdev,vid) -> vlandev. */
+    struct hmap vlandev_map OVS_GUARDED;     /* vlandev -> (realdev,vid). */
  
      /* Ports. */
      struct sset ports;             /* Set of standard port names. */
@@ -516,6 +496,11 @@ struct ofproto_dpif {
      /* Per ofproto's dpif stats. */
      uint64_t n_hit;
      uint64_t n_missed;
+
+    /* Work queues. */
+    struct ovs_mutex flow_mod_mutex;
+    struct list flow_mods OVS_GUARDED;
+    size_t n_flow_mods OVS_GUARDED;
  };
  
  /* Defer flow mod completion until "ovs-appctl ofproto/unclog"?  (Useful only
@@ -540,6 +525,8 @@ ofproto_dpif_cast(const struct ofproto *ofproto)
  
  static struct ofport_dpif *get_ofp_port(const struct ofproto_dpif *ofproto,
                                          ofp_port_t ofp_port);
+static void ofproto_trace(struct ofproto_dpif *, const struct flow *,
+                          const struct ofpbuf *packet, struct ds *);
  
  /* Upcalls. */
  #define FLOW_MISS_MAX_BATCH 50
@@ -560,11 +547,23 @@ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
  /* Initial mappings of port to bridge mappings. */
  static struct shash init_ofp_ports = SHASH_INITIALIZER(&init_ofp_ports);
  
-int
+/* Executes and takes ownership of 'fm'. */
+void
  ofproto_dpif_flow_mod(struct ofproto_dpif *ofproto,
                        struct ofputil_flow_mod *fm)
  {
-    return ofproto_flow_mod(&ofproto->up, fm);
+    ovs_mutex_lock(&ofproto->flow_mod_mutex);
+    if (ofproto->n_flow_mods > 1024) {
+        ovs_mutex_unlock(&ofproto->flow_mod_mutex);
+        COVERAGE_INC(flow_mod_overflow);
+        free(fm->ofpacts);
+        free(fm);
+        return;
+    }
+
+    list_push_back(&ofproto->flow_mods, &fm->list_node);
+    ofproto->n_flow_mods++;
+    ovs_mutex_unlock(&ofproto->flow_mod_mutex);
  }
  
  void
@@ -700,10 +699,7 @@ type_run(const char *type)
          backer->need_revalidate = REV_RECONFIGURE;
      }
  
-    if (backer->need_revalidate
-        || !tag_set_is_empty(&backer->revalidate_set)) {
-        struct tag_set revalidate_set = backer->revalidate_set;
-        bool need_revalidate = backer->need_revalidate;
+    if (backer->need_revalidate) {
          struct ofproto_dpif *ofproto;
          struct simap_node *node;
          struct simap tmp_backers;
@@ -764,66 +760,62 @@ type_run(const char *type)
          switch (backer->need_revalidate) {
          case REV_RECONFIGURE:   COVERAGE_INC(rev_reconfigure);   break;
          case REV_STP:           COVERAGE_INC(rev_stp);           break;
+        case REV_BOND:          COVERAGE_INC(rev_bond);          break;
          case REV_PORT_TOGGLED:  COVERAGE_INC(rev_port_toggled);  break;
          case REV_FLOW_TABLE:    COVERAGE_INC(rev_flow_table);    break;
+        case REV_MAC_LEARNING:  COVERAGE_INC(rev_mac_learning);  break;
          case REV_INCONSISTENCY: COVERAGE_INC(rev_inconsistency); break;
          }
-
-        if (backer->need_revalidate) {
-            /* Clear the drop_keys in case we should now be accepting some
-             * formerly dropped flows. */
-            drop_key_clear(backer);
-        }
-
-        /* Clear the revalidation flags. */
-        tag_set_init(&backer->revalidate_set);
          backer->need_revalidate = 0;
  
+        /* Clear the drop_keys in case we should now be accepting some
+         * formerly dropped flows. */
+        drop_key_clear(backer);
+
          HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
              struct facet *facet, *next;
+            struct ofport_dpif *ofport;
              struct cls_cursor cursor;
+            struct ofbundle *bundle;
  
              if (ofproto->backer != backer) {
                  continue;
              }
  
-            if (need_revalidate) {
-                struct ofport_dpif *ofport;
-                struct ofbundle *bundle;
-
-                xlate_ofproto_set(ofproto, ofproto->up.name, ofproto->ml,
-                                  ofproto->mbridge, ofproto->sflow,
-                                  ofproto->ipfix, ofproto->up.frag_handling,
-                                  ofproto->up.forward_bpdu,
-                                  connmgr_has_in_band(ofproto->up.connmgr),
-                                  ofproto->netflow != NULL,
-                                  ofproto->stp != NULL);
-
-                HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) {
-                    xlate_bundle_set(ofproto, bundle, bundle->name,
-                                     bundle->vlan_mode, bundle->vlan,
-                                     bundle->trunks, bundle->use_priority_tags,
-                                     bundle->bond, bundle->lacp,
-                                     bundle->floodable);
-                }
+            xlate_ofproto_set(ofproto, ofproto->up.name,
+                              ofproto->backer->dpif, ofproto->ml,
+                              ofproto->stp, ofproto->mbridge,
+                              ofproto->sflow, ofproto->ipfix,
+                              ofproto->up.frag_handling,
+                              ofproto->up.forward_bpdu,
+                              connmgr_has_in_band(ofproto->up.connmgr),
+                              ofproto->netflow != NULL);
  
-                HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) {
-                    xlate_ofport_set(ofproto, ofport->bundle, ofport,
-                                     ofport->up.ofp_port, ofport->odp_port,
-                                     ofport->up.netdev, ofport->cfm,
-                                     ofport->bfd, ofport->peer,
-                                     ofport->up.pp.config, ofport->stp_state,
-                                     ofport->is_tunnel, ofport->may_enable);
-                }
+            HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) {
+                xlate_bundle_set(ofproto, bundle, bundle->name,
+                                 bundle->vlan_mode, bundle->vlan,
+                                 bundle->trunks, bundle->use_priority_tags,
+                                 bundle->bond, bundle->lacp,
+                                 bundle->floodable);
+            }
+
+            HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) {
+                int stp_port = ofport->stp_port
+                    ? stp_port_no(ofport->stp_port)
+                    : 0;
+                xlate_ofport_set(ofproto, ofport->bundle, ofport,
+                                 ofport->up.ofp_port, ofport->odp_port,
+                                 ofport->up.netdev, ofport->cfm,
+                                 ofport->bfd, ofport->peer, stp_port,
+                                 ofport->qdscp, ofport->n_qdscp,
+                                 ofport->up.pp.config, ofport->is_tunnel,
+                                 ofport->may_enable);
              }
  
              cls_cursor_init(&cursor, &ofproto->facets, NULL);
              CLS_CURSOR_FOR_EACH_SAFE (facet, next, cr, &cursor) {
-                if (need_revalidate
-                    || tag_set_intersects(&revalidate_set, facet->xout.tags)) {
-                    facet_revalidate(facet);
-                    run_fast_rl();
-                }
+                facet_revalidate(facet);
+                run_fast_rl();
              }
          }
      }
@@ -962,10 +954,12 @@ process_dpif_port_change(struct dpif_backer *backer, const char *devname)
              /* 'ofport''s datapath port number has changed from
               * 'ofport->odp_port' to 'port.port_no'.  Update our internal data
               * structures to match. */
+            ovs_rwlock_wrlock(&backer->odp_to_ofport_lock);
              hmap_remove(&backer->odp_to_ofport_map, &ofport->odp_port_node);
              ofport->odp_port = port.port_no;
              hmap_insert(&backer->odp_to_ofport_map, &ofport->odp_port_node,
                          hash_odp_port(port.port_no));
+            ovs_rwlock_unlock(&backer->odp_to_ofport_lock);
              backer->need_revalidate = REV_RECONFIGURE;
          }
      }
@@ -1040,13 +1034,9 @@ run_fast_rl(void)
  
      if (time_msec() >= port_rl) {
          struct ofproto_dpif *ofproto;
-        struct ofport_dpif *ofport;
  
          HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
-
-            HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) {
-                port_run_fast(ofport);
-            }
+            run_fast(&ofproto->up);
          }
          port_rl = time_msec() + 200;
      }
@@ -1121,6 +1111,7 @@ close_dpif_backer(struct dpif_backer *backer)
      hmap_destroy(&backer->drop_keys);
  
      simap_destroy(&backer->tnl_backers);
+    ovs_rwlock_destroy(&backer->odp_to_ofport_lock);
      hmap_destroy(&backer->odp_to_ofport_map);
      node = shash_find(&all_dpif_backers, backer->type);
      free(backer->type);
@@ -1199,12 +1190,12 @@ open_dpif_backer(const char *type, struct dpif_backer **backerp)
      backer->governor = NULL;
      backer->refcount = 1;
      hmap_init(&backer->odp_to_ofport_map);
+    ovs_rwlock_init(&backer->odp_to_ofport_lock);
      hmap_init(&backer->drop_keys);
      hmap_init(&backer->subfacets);
      timer_set_duration(&backer->next_expiration, 1000);
      backer->need_revalidate = 0;
      simap_init(&backer->tnl_backers);
-    tag_set_init(&backer->revalidate_set);
      backer->recv_set_enable = !ofproto_get_flow_restore_wait();
      *backerp = backer;
  
@@ -1265,7 +1256,6 @@ construct(struct ofproto *ofproto_)
      struct shash_node *node, *next;
      odp_port_t max_ports;
      int error;
-    int i;
  
      error = open_dpif_backer(ofproto->up.type, &ofproto->backer);
      if (error) {
@@ -1284,20 +1274,19 @@ construct(struct ofproto *ofproto_)
      ofproto->ml = mac_learning_create(MAC_ENTRY_DEFAULT_IDLE_TIME);
      ofproto->mbridge = mbridge_create();
      ofproto->has_bonded_bundles = false;
+    ovs_mutex_init(&ofproto->vsp_mutex, PTHREAD_MUTEX_NORMAL);
  
      classifier_init(&ofproto->facets);
      ofproto->consistency_rl = LLONG_MIN;
  
-    for (i = 0; i < N_TABLES; i++) {
-        struct table_dpif *table = &ofproto->tables[i];
-
-        table->catchall_table = NULL;
-        table->other_table = NULL;
-        table->basis = random_uint32();
-    }
-
      list_init(&ofproto->completions);
  
+    ovs_mutex_init(&ofproto->flow_mod_mutex, PTHREAD_MUTEX_NORMAL);
+    ovs_mutex_lock(&ofproto->flow_mod_mutex);
+    list_init(&ofproto->flow_mods);
+    ofproto->n_flow_mods = 0;
+    ovs_mutex_unlock(&ofproto->flow_mod_mutex);
+
      ofproto_dpif_unixctl_init();
  
      hmap_init(&ofproto->vlandev_map);
@@ -1428,6 +1417,7 @@ destruct(struct ofproto *ofproto_)
  {
      struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
      struct rule_dpif *rule, *next_rule;
+    struct ofputil_flow_mod *fm, *next_fm;
      struct oftable *table;
  
      ofproto->backer->need_revalidate = REV_RECONFIGURE;
@@ -1445,6 +1435,16 @@ destruct(struct ofproto *ofproto_)
          }
      }
  
+    ovs_mutex_lock(&ofproto->flow_mod_mutex);
+    LIST_FOR_EACH_SAFE (fm, next_fm, list_node, &ofproto->flow_mods) {
+        list_remove(&fm->list_node);
+        ofproto->n_flow_mods--;
+        free(fm->ofpacts);
+        free(fm);
+    }
+    ovs_mutex_unlock(&ofproto->flow_mod_mutex);
+    ovs_mutex_destroy(&ofproto->flow_mod_mutex);
+
      mbridge_unref(ofproto->mbridge);
  
      netflow_destroy(ofproto->netflow);
@@ -1461,6 +1461,8 @@ destruct(struct ofproto *ofproto_)
      sset_destroy(&ofproto->ghost_ports);
      sset_destroy(&ofproto->port_poll_set);
  
+    ovs_mutex_destroy(&ofproto->vsp_mutex);
+
      close_dpif_backer(ofproto->backer);
  }
  
@@ -1468,7 +1470,9 @@ static int
  run_fast(struct ofproto *ofproto_)
  {
      struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
+    struct ofputil_flow_mod *fm, *next;
      struct ofport_dpif *ofport;
+    struct list flow_mods;
  
      /* Do not perform any periodic activity required by 'ofproto' while
       * waiting for flow restore to complete. */
@@ -1476,6 +1480,29 @@ run_fast(struct ofproto *ofproto_)
          return 0;
      }
  
+    ovs_mutex_lock(&ofproto->flow_mod_mutex);
+    if (ofproto->n_flow_mods) {
+        flow_mods = ofproto->flow_mods;
+        list_moved(&flow_mods);
+        list_init(&ofproto->flow_mods);
+        ofproto->n_flow_mods = 0;
+    } else {
+        list_init(&flow_mods);
+    }
+    ovs_mutex_unlock(&ofproto->flow_mod_mutex);
+
+    LIST_FOR_EACH_SAFE (fm, next, list_node, &flow_mods) {
+        int error = ofproto_flow_mod(&ofproto->up, fm);
+        if (error && !VLOG_DROP_WARN(&rl)) {
+            VLOG_WARN("learning action failed to modify flow table (%s)",
+                      ofperr_get_name(error));
+        }
+
+        list_remove(&fm->list_node);
+        free(fm->ofpacts);
+        free(fm);
+    }
+
      HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) {
          port_run_fast(ofport);
      }
@@ -1497,7 +1524,9 @@ run(struct ofproto *ofproto_)
  
      if (mbridge_need_revalidate(ofproto->mbridge)) {
          ofproto->backer->need_revalidate = REV_RECONFIGURE;
-        mac_learning_flush(ofproto->ml, NULL);
+        ovs_rwlock_wrlock(&ofproto->ml->rwlock);
+        mac_learning_flush(ofproto->ml);
+        ovs_rwlock_unlock(&ofproto->ml->rwlock);
      }
  
      /* Do not perform any periodic activity below required by 'ofproto' while
@@ -1528,7 +1557,11 @@ run(struct ofproto *ofproto_)
      }
  
      stp_run(ofproto);
-    mac_learning_run(ofproto->ml, &ofproto->backer->revalidate_set);
+    ovs_rwlock_wrlock(&ofproto->ml->rwlock);
+    if (mac_learning_run(ofproto->ml)) {
+        ofproto->backer->need_revalidate = REV_MAC_LEARNING;
+    }
+    ovs_rwlock_unlock(&ofproto->ml->rwlock);
  
      /* Check the consistency of a random facet, to aid debugging. */
      if (time_msec() >= ofproto->consistency_rl
@@ -1546,11 +1579,8 @@ run(struct ofproto *ofproto_)
                            hmap_node);
          facet = CONTAINER_OF(cr, struct facet, cr);
  
-        if (!tag_set_intersects(&ofproto->backer->revalidate_set,
-                                facet->xout.tags)) {
-            if (!facet_check_consistency(facet)) {
-                ofproto->backer->need_revalidate = REV_INCONSISTENCY;
-            }
+        if (!facet_check_consistency(facet)) {
+            ofproto->backer->need_revalidate = REV_INCONSISTENCY;
          }
      }
  
@@ -1577,9 +1607,6 @@ wait(struct ofproto *ofproto_)
      if (ofproto->sflow) {
          dpif_sflow_wait(ofproto->sflow);
      }
-    if (!tag_set_is_empty(&ofproto->backer->revalidate_set)) {
-        poll_immediate_wake();
-    }
      HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) {
          port_wait(ofport);
      }
@@ -1589,7 +1616,9 @@ wait(struct ofproto *ofproto_)
      if (ofproto->netflow) {
          netflow_wait(ofproto->netflow);
      }
+    ovs_rwlock_rdlock(&ofproto->ml->rwlock);
      mac_learning_wait(ofproto->ml);
+    ovs_rwlock_unlock(&ofproto->ml->rwlock);
      stp_wait(ofproto);
      if (ofproto->backer->need_revalidate) {
          /* Shouldn't happen, but if it does just go around again. */
@@ -1713,13 +1742,13 @@ port_construct(struct ofport *port_)
      port->bundle = NULL;
      port->cfm = NULL;
      port->bfd = NULL;
-    port->tag = tag_create_random();
      port->may_enable = true;
      port->stp_port = NULL;
      port->stp_state = STP_DISABLED;
      port->is_tunnel = false;
      port->peer = NULL;
-    hmap_init(&port->priorities);
+    port->qdscp = NULL;
+    port->n_qdscp = 0;
      port->realdev_ofp_port = 0;
      port->vlandev_vid = 0;
      port->carrier_seq = netdev_get_carrier_resets(netdev);
@@ -1758,8 +1787,10 @@ port_construct(struct ofport *port_)
              return EBUSY;
          }
  
+        ovs_rwlock_wrlock(&ofproto->backer->odp_to_ofport_lock);
          hmap_insert(&ofproto->backer->odp_to_ofport_map, &port->odp_port_node,
                      hash_odp_port(port->odp_port));
+        ovs_rwlock_unlock(&ofproto->backer->odp_to_ofport_lock);
      }
      dpif_port_destroy(&dpif_port);
  
@@ -1800,7 +1831,9 @@ port_destruct(struct ofport *port_)
      }
  
      if (port->odp_port != ODPP_NONE && !port->is_tunnel) {
+        ovs_rwlock_wrlock(&ofproto->backer->odp_to_ofport_lock);
          hmap_remove(&ofproto->backer->odp_to_ofport_map, &port->odp_port_node);
+        ovs_rwlock_unlock(&ofproto->backer->odp_to_ofport_lock);
      }
  
      tnl_port_del(port);
@@ -1813,8 +1846,7 @@ port_destruct(struct ofport *port_)
          dpif_sflow_del_port(ofproto->sflow, port->odp_port);
      }
  
-    ofport_clear_priorities(port);
-    hmap_destroy(&port->priorities);
+    free(port->qdscp);
  }
  
  static void
@@ -2042,7 +2074,7 @@ set_stp(struct ofproto *ofproto_, const struct ofproto_stp_settings *s)
              set_stp_port(ofport, NULL);
          }
  
-        stp_destroy(ofproto->stp);
+        stp_unref(ofproto->stp);
          ofproto->stp = NULL;
      }
  
@@ -2088,8 +2120,9 @@ update_stp_port_state(struct ofport_dpif *ofport)
          if (stp_learn_in_state(ofport->stp_state)
                  != stp_learn_in_state(state)) {
              /* xxx Learning action flows should also be flushed. */
-            mac_learning_flush(ofproto->ml,
-                               &ofproto->backer->revalidate_set);
+            ovs_rwlock_wrlock(&ofproto->ml->rwlock);
+            mac_learning_flush(ofproto->ml);
+            ovs_rwlock_unlock(&ofproto->ml->rwlock);
          }
          fwd_change = stp_forward_in_state(ofport->stp_state)
                          != stp_forward_in_state(state);
@@ -2194,7 +2227,9 @@ stp_run(struct ofproto_dpif *ofproto)
          }
  
          if (stp_check_and_reset_fdb_flush(ofproto->stp)) {
-            mac_learning_flush(ofproto->ml, &ofproto->backer->revalidate_set);
+            ovs_rwlock_wrlock(&ofproto->ml->rwlock);
+            mac_learning_flush(ofproto->ml);
+            ovs_rwlock_unlock(&ofproto->ml->rwlock);
          }
      }
  }
@@ -2206,129 +2241,25 @@ stp_wait(struct ofproto_dpif *ofproto)
          poll_timer_wait(1000);
      }
  }
-
-/* Returns true if STP should process 'flow'.  Sets fields in 'wc' that
- * were used to make the determination.*/
-bool
-stp_should_process_flow(const struct flow *flow, struct flow_wildcards *wc)
-{
-    memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst);
-    return eth_addr_equals(flow->dl_dst, eth_addr_stp);
-}
-
-void
-stp_process_packet(const struct ofport_dpif *ofport,
-                   const struct ofpbuf *packet)
-{
-    struct ofpbuf payload = *packet;
-    struct eth_header *eth = payload.data;
-    struct stp_port *sp = ofport->stp_port;
-
-    /* Sink packets on ports that have STP disabled when the bridge has
-     * STP enabled. */
-    if (!sp || stp_port_get_state(sp) == STP_DISABLED) {
-        return;
-    }
-
-    /* Trim off padding on payload. */
-    if (payload.size > ntohs(eth->eth_type) + ETH_HEADER_LEN) {
-        payload.size = ntohs(eth->eth_type) + ETH_HEADER_LEN;
-    }
-
-    if (ofpbuf_try_pull(&payload, ETH_HEADER_LEN + LLC_HEADER_LEN)) {
-        stp_received_bpdu(sp, payload.data, payload.size);
-    }
-}
  \f
-int
-ofproto_dpif_queue_to_priority(const struct ofproto_dpif *ofproto,
-                               uint32_t queue_id, uint32_t *priority)
-{
-    return dpif_queue_to_priority(ofproto->backer->dpif, queue_id, priority);
-}
-
-static struct priority_to_dscp *
-get_priority(const struct ofport_dpif *ofport, uint32_t priority)
-{
-    struct priority_to_dscp *pdscp;
-    uint32_t hash;
-
-    hash = hash_int(priority, 0);
-    HMAP_FOR_EACH_IN_BUCKET (pdscp, hmap_node, hash, &ofport->priorities) {
-        if (pdscp->priority == priority) {
-            return pdscp;
-        }
-    }
-    return NULL;
-}
-
-bool
-ofproto_dpif_dscp_from_priority(const struct ofport_dpif *ofport,
-                                uint32_t priority, uint8_t *dscp)
-{
-    struct priority_to_dscp *pdscp = get_priority(ofport, priority);
-    *dscp = pdscp ? pdscp->dscp : 0;
-    return pdscp != NULL;
-}
-
-static void
-ofport_clear_priorities(struct ofport_dpif *ofport)
-{
-    struct priority_to_dscp *pdscp, *next;
-
-    HMAP_FOR_EACH_SAFE (pdscp, next, hmap_node, &ofport->priorities) {
-        hmap_remove(&ofport->priorities, &pdscp->hmap_node);
-        free(pdscp);
-    }
-}
-
  static int
-set_queues(struct ofport *ofport_,
-           const struct ofproto_port_queue *qdscp_list,
+set_queues(struct ofport *ofport_, const struct ofproto_port_queue *qdscp,
             size_t n_qdscp)
  {
      struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
      struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
-    struct hmap new = HMAP_INITIALIZER(&new);
-    size_t i;
-
-    for (i = 0; i < n_qdscp; i++) {
-        struct priority_to_dscp *pdscp;
-        uint32_t priority;
-        uint8_t dscp;
-
-        dscp = (qdscp_list[i].dscp << 2) & IP_DSCP_MASK;
-        if (dpif_queue_to_priority(ofproto->backer->dpif, qdscp_list[i].queue,
-                                   &priority)) {
-            continue;
-        }
-
-        pdscp = get_priority(ofport, priority);
-        if (pdscp) {
-            hmap_remove(&ofport->priorities, &pdscp->hmap_node);
-        } else {
-            pdscp = xmalloc(sizeof *pdscp);
-            pdscp->priority = priority;
-            pdscp->dscp = dscp;
-            ofproto->backer->need_revalidate = REV_RECONFIGURE;
-        }
-
-        if (pdscp->dscp != dscp) {
-            pdscp->dscp = dscp;
-            ofproto->backer->need_revalidate = REV_RECONFIGURE;
-        }
-
-        hmap_insert(&new, &pdscp->hmap_node, hash_int(pdscp->priority, 0));
-    }
  
-    if (!hmap_is_empty(&ofport->priorities)) {
-        ofport_clear_priorities(ofport);
+    if (ofport->n_qdscp != n_qdscp
+        || (n_qdscp && memcmp(ofport->qdscp, qdscp,
+                              n_qdscp * sizeof *qdscp))) {
          ofproto->backer->need_revalidate = REV_RECONFIGURE;
+        free(ofport->qdscp);
+        ofport->qdscp = n_qdscp
+            ? xmemdup(qdscp, n_qdscp * sizeof *qdscp)
+            : NULL;
+        ofport->n_qdscp = n_qdscp;
      }
  
-    hmap_swap(&new, &ofport->priorities);
-    hmap_destroy(&new);
-
      return 0;
  }
  \f
@@ -2351,6 +2282,7 @@ bundle_flush_macs(struct ofbundle *bundle, bool all_ofprotos)
      struct mac_entry *mac, *next_mac;
  
      ofproto->backer->need_revalidate = REV_RECONFIGURE;
+    ovs_rwlock_wrlock(&ml->rwlock);
      LIST_FOR_EACH_SAFE (mac, next_mac, lru_node, &ml->lrus) {
          if (mac->port.p == bundle) {
              if (all_ofprotos) {
@@ -2360,11 +2292,12 @@ bundle_flush_macs(struct ofbundle *bundle, bool all_ofprotos)
                      if (o != ofproto) {
                          struct mac_entry *e;
  
-                        e = mac_learning_lookup(o->ml, mac->mac, mac->vlan,
-                                                NULL);
+                        ovs_rwlock_wrlock(&o->ml->rwlock);
+                        e = mac_learning_lookup(o->ml, mac->mac, mac->vlan);
                          if (e) {
                              mac_learning_expire(o->ml, e);
                          }
+                        ovs_rwlock_unlock(&o->ml->rwlock);
                      }
                  }
              }
@@ -2372,6 +2305,7 @@ bundle_flush_macs(struct ofbundle *bundle, bool all_ofprotos)
              mac_learning_expire(ml, mac);
          }
      }
+    ovs_rwlock_unlock(&ml->rwlock);
  }
  
  static struct ofbundle *
@@ -2715,6 +2649,7 @@ bundle_send_learning_packets(struct ofbundle *bundle)
      struct mac_entry *e;
  
      error = n_packets = n_errors = 0;
+    ovs_rwlock_rdlock(&ofproto->ml->rwlock);
      LIST_FOR_EACH (e, lru_node, &ofproto->ml->lrus) {
          if (e->port.p != bundle) {
              struct ofpbuf *learning_packet;
@@ -2737,6 +2672,7 @@ bundle_send_learning_packets(struct ofbundle *bundle)
              n_packets++;
          }
      }
+    ovs_rwlock_unlock(&ofproto->ml->rwlock);
  
      if (n_errors) {
          static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
@@ -2762,8 +2698,10 @@ bundle_run(struct ofbundle *bundle)
              bond_slave_set_may_enable(bundle->bond, port, port->may_enable);
          }
  
-        bond_run(bundle->bond, &bundle->ofproto->backer->revalidate_set,
-                 lacp_status(bundle->lacp));
+        if (bond_run(bundle->bond, lacp_status(bundle->lacp))) {
+            bundle->ofproto->backer->need_revalidate = REV_BOND;
+        }
+
          if (bond_should_send_learning_packets(bundle->bond)) {
              bundle_send_learning_packets(bundle);
          }
@@ -2829,9 +2767,11 @@ static int
  set_flood_vlans(struct ofproto *ofproto_, unsigned long *flood_vlans)
  {
      struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
+    ovs_rwlock_wrlock(&ofproto->ml->rwlock);
      if (mac_learning_set_flood_vlans(ofproto->ml, flood_vlans)) {
-        mac_learning_flush(ofproto->ml, &ofproto->backer->revalidate_set);
+        mac_learning_flush(ofproto->ml);
      }
+    ovs_rwlock_unlock(&ofproto->ml->rwlock);
      return 0;
  }
  
@@ -2855,8 +2795,10 @@ set_mac_table_config(struct ofproto *ofproto_, unsigned int idle_time,
                       size_t max_entries)
  {
      struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
+    ovs_rwlock_wrlock(&ofproto->ml->rwlock);
      mac_learning_set_idle_time(ofproto->ml, idle_time);
      mac_learning_set_max_entries(ofproto->ml, max_entries);
+    ovs_rwlock_unlock(&ofproto->ml->rwlock);
  }
  \f
  /* Ports. */
@@ -3525,7 +3467,7 @@ handle_flow_miss_with_facet(struct flow_miss *miss, struct facet *facet,
          subfacet_update_stats(subfacet, stats);
      }
  
-    if (miss->upcall_type == DPIF_UC_MISS || subfacet->path != want_path) {
+    if (subfacet->path != want_path) {
          struct flow_miss_op *op = &ops[(*n_ops)++];
          struct dpif_flow_put *put = &op->dpif_op.u.flow_put;
  
@@ -3656,98 +3598,6 @@ drop_key_clear(struct dpif_backer *backer)
      }
  }
  
-/* Given a datpath, packet, and flow metadata ('backer', 'packet', and 'key'
- * respectively), populates 'flow' with the result of odp_flow_key_to_flow().
- * Optionally, if nonnull, populates 'fitnessp' with the fitness of 'flow' as
- * returned by odp_flow_key_to_flow().  Also, optionally populates 'ofproto'
- * with the ofproto_dpif, and 'odp_in_port' with the datapath in_port, that
- * 'packet' ingressed.
- *
- * If 'ofproto' is nonnull, requires 'flow''s in_port to exist.  Otherwise sets
- * 'flow''s in_port to OFPP_NONE.
- *
- * This function does post-processing on data returned from
- * odp_flow_key_to_flow() to help make VLAN splinters transparent to the rest
- * of the upcall processing logic.  In particular, if the extracted in_port is
- * a VLAN splinter port, it replaces flow->in_port by the "real" port, sets
- * flow->vlan_tci correctly for the VLAN of the VLAN splinter port, and pushes
- * a VLAN header onto 'packet' (if it is nonnull).
- *
- * Similarly, this function also includes some logic to help with tunnels.  It
- * may modify 'flow' as necessary to make the tunneling implementation
- * transparent to the upcall processing logic.
- *
- * Returns 0 if successful, ENODEV if the parsed flow has no associated ofport,
- * or some other positive errno if there are other problems. */
-static int
-ofproto_receive(const struct dpif_backer *backer, struct ofpbuf *packet,
-                const struct nlattr *key, size_t key_len,
-                struct flow *flow, enum odp_key_fitness *fitnessp,
-                struct ofproto_dpif **ofproto, odp_port_t *odp_in_port)
-{
-    const struct ofport_dpif *port;
-    enum odp_key_fitness fitness;
-    int error = ENODEV;
-
-    fitness = odp_flow_key_to_flow(key, key_len, flow);
-    if (fitness == ODP_FIT_ERROR) {
-        error = EINVAL;
-        goto exit;
-    }
-
-    if (odp_in_port) {
-        *odp_in_port = flow->in_port.odp_port;
-    }
-
-    port = (tnl_port_should_receive(flow)
-            ? tnl_port_receive(flow)
-            : odp_port_to_ofport(backer, flow->in_port.odp_port));
-    flow->in_port.ofp_port = port ? port->up.ofp_port : OFPP_NONE;
-    if (!port) {
-        goto exit;
-    }
-
-    /* XXX: Since the tunnel module is not scoped per backer, for a tunnel port
-     * it's theoretically possible that we'll receive an ofport belonging to an
-     * entirely different datapath.  In practice, this can't happen because no
-     * platforms has two separate datapaths which each support tunneling. */
-    ovs_assert(ofproto_dpif_cast(port->up.ofproto)->backer == backer);
-
-    if (vsp_adjust_flow(ofproto_dpif_cast(port->up.ofproto), flow)) {
-        if (packet) {
-            /* Make the packet resemble the flow, so that it gets sent to
-             * an OpenFlow controller properly, so that it looks correct
-             * for sFlow, and so that flow_extract() will get the correct
-             * vlan_tci if it is called on 'packet'.
-             *
-             * The allocated space inside 'packet' probably also contains
-             * 'key', that is, both 'packet' and 'key' are probably part of
-             * a struct dpif_upcall (see the large comment on that
-             * structure definition), so pushing data on 'packet' is in
-             * general not a good idea since it could overwrite 'key' or
-             * free it as a side effect.  However, it's OK in this special
-             * case because we know that 'packet' is inside a Netlink
-             * attribute: pushing 4 bytes will just overwrite the 4-byte
-             * "struct nlattr", which is fine since we don't need that
-             * header anymore. */
-            eth_push_vlan(packet, flow->vlan_tci);
-        }
-        /* We can't reproduce 'key' from 'flow'. */
-        fitness = fitness == ODP_FIT_PERFECT ? ODP_FIT_TOO_MUCH : fitness;
-    }
-    error = 0;
-
-    if (ofproto) {
-        *ofproto = ofproto_dpif_cast(port->up.ofproto);
-    }
-
-exit:
-    if (fitnessp) {
-        *fitnessp = fitness;
-    }
-    return error;
-}
-
  static void
  handle_miss_upcalls(struct dpif_backer *backer, struct dpif_upcall *upcalls,
                      size_t n_upcalls)
@@ -3782,9 +3632,9 @@ handle_miss_upcalls(struct dpif_backer *backer, struct dpif_upcall *upcalls,
          uint32_t hash;
          int error;
  
-        error = ofproto_receive(backer, upcall->packet, upcall->key,
-                                upcall->key_len, &flow, &miss->key_fitness,
-                                &ofproto, &odp_in_port);
+        error = xlate_receive(backer, upcall->packet, upcall->key,
+                              upcall->key_len, &flow, &miss->key_fitness,
+                              &ofproto, &odp_in_port);
          if (error == ENODEV) {
              struct drop_key *drop_key;
  
@@ -3950,8 +3800,8 @@ handle_sflow_upcall(struct dpif_backer *backer,
      struct flow flow;
      odp_port_t odp_in_port;
  
-    if (ofproto_receive(backer, upcall->packet, upcall->key, upcall->key_len,
-                        &flow, NULL, &ofproto, &odp_in_port)
+    if (xlate_receive(backer, upcall->packet, upcall->key, upcall->key_len,
+                      &flow, NULL, &ofproto, &odp_in_port)
          || !ofproto->sflow) {
          return;
      }
@@ -3970,8 +3820,8 @@ handle_flow_sample_upcall(struct dpif_backer *backer,
      union user_action_cookie cookie;
      struct flow flow;
  
-    if (ofproto_receive(backer, upcall->packet, upcall->key, upcall->key_len,
-                        &flow, NULL, &ofproto, NULL)
+    if (xlate_receive(backer, upcall->packet, upcall->key, upcall->key_len,
+                      &flow, NULL, &ofproto, NULL)
          || !ofproto->ipfix) {
          return;
      }
@@ -3995,8 +3845,8 @@ handle_ipfix_upcall(struct dpif_backer *backer,
      struct ofproto_dpif *ofproto;
      struct flow flow;
  
-    if (ofproto_receive(backer, upcall->packet, upcall->key, upcall->key_len,
-                        &flow, NULL, &ofproto, NULL)
+    if (xlate_receive(backer, upcall->packet, upcall->key, upcall->key_len,
+                      &flow, NULL, &ofproto, NULL)
          || !ofproto->ipfix) {
          return;
      }
@@ -4138,7 +3988,7 @@ expire(struct dpif_backer *backer)
  
              HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) {
                  if (bundle->bond) {
-                    bond_rebalance(bundle->bond, &backer->revalidate_set);
+                    bond_rebalance(bundle->bond);
                  }
              }
          }
@@ -4682,9 +4532,7 @@ facet_lookup_valid(struct ofproto_dpif *ofproto, const struct flow *flow)
  
      facet = facet_find(ofproto, flow);
      if (facet
-        && (ofproto->backer->need_revalidate
-            || tag_set_intersects(&ofproto->backer->revalidate_set,
-                                  facet->xout.tags))
+        && ofproto->backer->need_revalidate
          && !facet_revalidate(facet)) {
          return NULL;
      }
@@ -4752,7 +4600,7 @@ facet_check_consistency(struct facet *facet)
   *     where it is and recompiles its actions anyway.
   *
   *   - If any of 'facet''s subfacets correspond to a new flow according to
- *     ofproto_receive(), 'facet' is removed.
+ *     xlate_receive(), 'facet' is removed.
   *
   *   Returns true if 'facet' is still valid.  False if 'facet' was removed. */
  static bool
@@ -4775,9 +4623,9 @@ facet_revalidate(struct facet *facet)
          struct flow recv_flow;
          int error;
  
-        error = ofproto_receive(ofproto->backer, NULL, subfacet->key,
-                                subfacet->key_len, &recv_flow, NULL,
-                                &recv_ofproto, NULL);
+        error = xlate_receive(ofproto->backer, NULL, subfacet->key,
+                              subfacet->key_len, &recv_flow, NULL,
+                              &recv_ofproto, NULL);
          if (error
              || recv_ofproto != ofproto
              || facet != facet_find(ofproto, &recv_flow)) {
@@ -4831,7 +4679,6 @@ facet_revalidate(struct facet *facet)
      }
  
      /* Update 'facet' now that we've taken care of all the old state. */
-    facet->xout.tags = xout.tags;
      facet->xout.slow = xout.slow;
      facet->xout.has_learn = xout.has_learn;
      facet->xout.has_normal = xout.has_normal;
@@ -4936,9 +4783,11 @@ push_all_stats(void)
  void
  rule_credit_stats(struct rule_dpif *rule, const struct dpif_flow_stats *stats)
  {
+    ovs_mutex_lock(&rule->stats_mutex);
      rule->packet_count += stats->n_packets;
      rule->byte_count += stats->n_bytes;
      ofproto_rule_update_used(&rule->up, stats->used);
+    ovs_mutex_unlock(&rule->stats_mutex);
  }
  \f
  /* Subfacets. */
@@ -5268,7 +5117,7 @@ complete_operation(struct rule_dpif *rule)
  {
      struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
  
-    rule_invalidate(rule);
+    ofproto->backer->need_revalidate = REV_FLOW_TABLE;
      if (clogged) {
          struct dpif_completion *c = xmalloc(sizeof *c);
          c->op = rule->up.pending;
@@ -5296,35 +5145,21 @@ static enum ofperr
  rule_construct(struct rule *rule_)
  {
      struct rule_dpif *rule = rule_dpif_cast(rule_);
-    struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
-    struct rule_dpif *victim;
-    uint8_t table_id;
-
+    ovs_mutex_init(&rule->stats_mutex, PTHREAD_MUTEX_NORMAL);
+    ovs_mutex_lock(&rule->stats_mutex);
      rule->packet_count = 0;
      rule->byte_count = 0;
-
-    table_id = rule->up.table_id;
-    victim = rule_dpif_cast(ofoperation_get_victim(rule->up.pending));
-    if (victim) {
-        rule->tag = victim->tag;
-    } else if (table_id == 0) {
-        rule->tag = 0;
-    } else {
-        struct flow flow;
-
-        miniflow_expand(&rule->up.cr.match.flow, &flow);
-        rule->tag = rule_calculate_tag(&flow, &rule->up.cr.match.mask,
-                                       ofproto->tables[table_id].basis);
-    }
-
+    ovs_mutex_unlock(&rule->stats_mutex);
      complete_operation(rule);
      return 0;
  }
  
  static void
-rule_destruct(struct rule *rule)
+rule_destruct(struct rule *rule_)
  {
-    complete_operation(rule_dpif_cast(rule));
+    struct rule_dpif *rule = rule_dpif_cast(rule_);
+    complete_operation(rule);
+    ovs_mutex_destroy(&rule->stats_mutex);
  }
  
  static void
@@ -5340,8 +5175,10 @@ rule_get_stats(struct rule *rule_, uint64_t *packets, uint64_t *bytes)
  
      /* Start from historical data for 'rule' itself that are no longer tracked
       * in facets.  This counts, for example, facets that have expired. */
+    ovs_mutex_lock(&rule->stats_mutex);
      *packets = rule->packet_count;
      *bytes = rule->byte_count;
+    ovs_mutex_unlock(&rule->stats_mutex);
  }
  
  static void
@@ -5467,158 +5304,16 @@ compose_slow_path(const struct ofproto_dpif *ofproto, const struct flow *flow,
                                           ODPP_NONE);
          odp_put_userspace_action(pid, &cookie, sizeof cookie.slow_path, &buf);
      } else {
-        put_userspace_action(ofproto, &buf, flow, &cookie,
-                             sizeof cookie.slow_path);
+        odp_port_t odp_port;
+        uint32_t pid;
+
+        odp_port = ofp_port_to_odp_port(ofproto, flow->in_port.ofp_port);
+        pid = dpif_port_get_pid(ofproto->backer->dpif, odp_port);
+        odp_put_userspace_action(pid, &cookie, sizeof cookie.slow_path, &buf);
      }
      *actionsp = buf.data;
      *actions_lenp = buf.size;
  }
-
-size_t
-put_userspace_action(const struct ofproto_dpif *ofproto,
-                     struct ofpbuf *odp_actions,
-                     const struct flow *flow,
-                     const union user_action_cookie *cookie,
-                     const size_t cookie_size)
-{
-    uint32_t pid;
-
-    pid = dpif_port_get_pid(ofproto->backer->dpif,
-                            ofp_port_to_odp_port(ofproto,
-                                                 flow->in_port.ofp_port));
-
-    return odp_put_userspace_action(pid, cookie, cookie_size, odp_actions);
-}
-
-tag_type
-calculate_flow_tag(struct ofproto_dpif *ofproto, const struct flow *flow,
-                   uint8_t table_id, struct rule_dpif *rule)
-{
-    if (table_id > 0 && table_id < N_TABLES) {
-        struct table_dpif *table = &ofproto->tables[table_id];
-        if (table->other_table) {
-            return (rule && rule->tag
-                    ? rule->tag
-                    : rule_calculate_tag(flow, &table->other_table->mask,
-                                         table->basis));
-        }
-    }
-
-    return 0;
-}
-\f
-/* Optimized flow revalidation.
- *
- * It's a difficult problem, in general, to tell which facets need to have
- * their actions recalculated whenever the OpenFlow flow table changes.  We
- * don't try to solve that general problem: for most kinds of OpenFlow flow
- * table changes, we recalculate the actions for every facet.  This is
- * relatively expensive, but it's good enough if the OpenFlow flow table
- * doesn't change very often.
- *
- * However, we can expect one particular kind of OpenFlow flow table change to
- * happen frequently: changes caused by MAC learning.  To avoid wasting a lot
- * of CPU on revalidating every facet whenever MAC learning modifies the flow
- * table, we add a special case that applies to flow tables in which every rule
- * has the same form (that is, the same wildcards), except that the table is
- * also allowed to have a single "catch-all" flow that matches all packets.  We
- * optimize this case by tagging all of the facets that resubmit into the table
- * and invalidating the same tag whenever a flow changes in that table.  The
- * end result is that we revalidate just the facets that need it (and sometimes
- * a few more, but not all of the facets or even all of the facets that
- * resubmit to the table modified by MAC learning). */
-
-/* Calculates the tag to use for 'flow' and mask 'mask' when it is inserted
- * into an OpenFlow table with the given 'basis'. */
-tag_type
-rule_calculate_tag(const struct flow *flow, const struct minimask *mask,
-                   uint32_t secret)
-{
-    if (minimask_is_catchall(mask)) {
-        return 0;
-    } else {
-        uint32_t hash = flow_hash_in_minimask(flow, mask, secret);
-        return tag_create_deterministic(hash);
-    }
-}
-
-/* Following a change to OpenFlow table 'table_id' in 'ofproto', update the
- * taggability of that table.
- *
- * This function must be called after *each* change to a flow table.  If you
- * skip calling it on some changes then the pointer comparisons at the end can
- * be invalid if you get unlucky.  For example, if a flow removal causes a
- * cls_table to be destroyed and then a flow insertion causes a cls_table with
- * different wildcards to be created with the same address, then this function
- * will incorrectly skip revalidation. */
-static void
-table_update_taggable(struct ofproto_dpif *ofproto, uint8_t table_id)
-{
-    struct table_dpif *table = &ofproto->tables[table_id];
-    const struct oftable *oftable = &ofproto->up.tables[table_id];
-    struct cls_table *catchall, *other;
-    struct cls_table *t;
-
-    catchall = other = NULL;
-
-    switch (hmap_count(&oftable->cls.tables)) {
-    case 0:
-        /* We could tag this OpenFlow table but it would make the logic a
-         * little harder and it's a corner case that doesn't seem worth it
-         * yet. */
-        break;
-
-    case 1:
-    case 2:
-        HMAP_FOR_EACH (t, hmap_node, &oftable->cls.tables) {
-            if (cls_table_is_catchall(t)) {
-                catchall = t;
-            } else if (!other) {
-                other = t;
-            } else {
-                /* Indicate that we can't tag this by setting both tables to
-                 * NULL.  (We know that 'catchall' is already NULL.) */
-                other = NULL;
-            }
-        }
-        break;
-
-    default:
-        /* Can't tag this table. */
-        break;
-    }
-
-    if (table->catchall_table != catchall || table->other_table != other) {
-        table->catchall_table = catchall;
-        table->other_table = other;
-        ofproto->backer->need_revalidate = REV_FLOW_TABLE;
-    }
-}
-
-/* Given 'rule' that has changed in some way (either it is a rule being
- * inserted, a rule being deleted, or a rule whose actions are being
- * modified), marks facets for revalidation to ensure that packets will be
- * forwarded correctly according to the new state of the flow table.
- *
- * This function must be called after *each* change to a flow table.  See
- * the comment on table_update_taggable() for more information. */
-static void
-rule_invalidate(const struct rule_dpif *rule)
-{
-    struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
-
-    table_update_taggable(ofproto, rule->up.table_id);
-
-    if (!ofproto->backer->need_revalidate) {
-        struct table_dpif *table = &ofproto->tables[rule->up.table_id];
-
-        if (table->other_table && rule->tag) {
-            tag_set_add(&ofproto->backer->revalidate_set, rule->tag);
-        } else {
-            ofproto->backer->need_revalidate = REV_FLOW_TABLE;
-        }
-    }
-}
  \f
  static bool
  set_frag_handling(struct ofproto *ofproto_,
@@ -5762,10 +5457,14 @@ ofproto_unixctl_fdb_flush(struct unixctl_conn *conn, int argc,
              unixctl_command_reply_error(conn, "no such bridge");
              return;
          }
-        mac_learning_flush(ofproto->ml, &ofproto->backer->revalidate_set);
+        ovs_rwlock_wrlock(&ofproto->ml->rwlock);
+        mac_learning_flush(ofproto->ml);
+        ovs_rwlock_unlock(&ofproto->ml->rwlock);
      } else {
          HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
-            mac_learning_flush(ofproto->ml, &ofproto->backer->revalidate_set);
+            ovs_rwlock_wrlock(&ofproto->ml->rwlock);
+            mac_learning_flush(ofproto->ml);
+            ovs_rwlock_unlock(&ofproto->ml->rwlock);
          }
      }
  
@@ -5794,6 +5493,7 @@ ofproto_unixctl_fdb_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
      }
  
      ds_put_cstr(&ds, " port  VLAN  MAC                Age\n");
+    ovs_rwlock_rdlock(&ofproto->ml->rwlock);
      LIST_FOR_EACH (e, lru_node, &ofproto->ml->lrus) {
          struct ofbundle *bundle = e->port.p;
          char name[OFP_MAX_PORT_NAME_LEN];
@@ -5804,6 +5504,7 @@ ofproto_unixctl_fdb_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
                        name, e->vlan, ETH_ADDR_ARGS(e->mac),
                        mac_entry_age(ofproto->ml, e));
      }
+    ovs_rwlock_unlock(&ofproto->ml->rwlock);
      unixctl_command_reply(conn, ds_cstr(&ds));
      ds_destroy(&ds);
  }
@@ -5966,10 +5667,8 @@ ofproto_unixctl_trace(struct unixctl_conn *conn, int argc, const char *argv[],
              backer = node->data;
          }
  
-        /* Extract the ofproto_dpif object from the ofproto_receive()
-         * function. */
-        if (ofproto_receive(backer, NULL, odp_key.data,
-                            odp_key.size, &flow, NULL, &ofproto, NULL)) {
+        if (xlate_receive(backer, NULL, odp_key.data, odp_key.size, &flow,
+                          NULL, &ofproto, NULL)) {
              unixctl_command_reply_error(conn, "Invalid datapath flow");
              goto exit;
          }
@@ -6020,7 +5719,7 @@ exit:
      ofpbuf_uninit(&odp_mask);
  }
  
-void
+static void
  ofproto_trace(struct ofproto_dpif *ofproto, const struct flow *flow,
                const struct ofpbuf *packet, struct ds *ds)
  {
@@ -6457,7 +6156,7 @@ ofproto_unixctl_dpif_dump_flows(struct unixctl_conn *conn,
          }
  
          odp_flow_format(subfacet->key, subfacet->key_len,
-                        mask.data, mask.size, &ds);
+                        mask.data, mask.size, &ds, false);
  
          ds_put_format(&ds, ", packets:%"PRIu64", bytes:%"PRIu64", used:",
                        subfacet->dp_packet_count, subfacet->dp_byte_count);
@@ -6599,20 +6298,20 @@ hash_realdev_vid(ofp_port_t realdev_ofp_port, int vid)
  
  bool
  ofproto_has_vlan_splinters(const struct ofproto_dpif *ofproto)
+    OVS_EXCLUDED(ofproto->vsp_mutex)
  {
-    return !hmap_is_empty(&ofproto->realdev_vid_map);
+    bool ret;
+
+    ovs_mutex_lock(&ofproto->vsp_mutex);
+    ret = !hmap_is_empty(&ofproto->realdev_vid_map);
+    ovs_mutex_unlock(&ofproto->vsp_mutex);
+    return ret;
  }
  
-/* Returns the OFP port number of the Linux VLAN device that corresponds to
- * 'vlan_tci' on the network device with port number 'realdev_ofp_port' in
- * 'struct ofport_dpif'.  For example, given 'realdev_ofp_port' of eth0 and
- * 'vlan_tci' 9, it would return the port number of eth0.9.
- *
- * Unless VLAN splinters are enabled for port 'realdev_ofp_port', this
- * function just returns its 'realdev_ofp_port' argument. */
-ofp_port_t
-vsp_realdev_to_vlandev(const struct ofproto_dpif *ofproto,
-                       ofp_port_t realdev_ofp_port, ovs_be16 vlan_tci)
+static ofp_port_t
+vsp_realdev_to_vlandev__(const struct ofproto_dpif *ofproto,
+                         ofp_port_t realdev_ofp_port, ovs_be16 vlan_tci)
+    OVS_REQUIRES(ofproto->vsp_mutex)
  {
      if (!hmap_is_empty(&ofproto->realdev_vid_map)) {
          int vid = vlan_tci_to_vid(vlan_tci);
@@ -6630,6 +6329,26 @@ vsp_realdev_to_vlandev(const struct ofproto_dpif *ofproto,
      return realdev_ofp_port;
  }
  
+/* Returns the OFP port number of the Linux VLAN device that corresponds to
+ * 'vlan_tci' on the network device with port number 'realdev_ofp_port' in
+ * 'struct ofport_dpif'.  For example, given 'realdev_ofp_port' of eth0 and
+ * 'vlan_tci' 9, it would return the port number of eth0.9.
+ *
+ * Unless VLAN splinters are enabled for port 'realdev_ofp_port', this
+ * function just returns its 'realdev_ofp_port' argument. */
+ofp_port_t
+vsp_realdev_to_vlandev(const struct ofproto_dpif *ofproto,
+                       ofp_port_t realdev_ofp_port, ovs_be16 vlan_tci)
+    OVS_EXCLUDED(ofproto->vsp_mutex)
+{
+    ofp_port_t ret;
+
+    ovs_mutex_lock(&ofproto->vsp_mutex);
+    ret = vsp_realdev_to_vlandev__(ofproto, realdev_ofp_port, vlan_tci);
+    ovs_mutex_unlock(&ofproto->vsp_mutex);
+    return ret;
+}
+
  static struct vlan_splinter *
  vlandev_find(const struct ofproto_dpif *ofproto, ofp_port_t vlandev_ofp_port)
  {
@@ -6658,6 +6377,7 @@ vlandev_find(const struct ofproto_dpif *ofproto, ofp_port_t vlandev_ofp_port)
  static ofp_port_t
  vsp_vlandev_to_realdev(const struct ofproto_dpif *ofproto,
                         ofp_port_t vlandev_ofp_port, int *vid)
+    OVS_REQ_WRLOCK(ofproto->vsp_mutex)
  {
      if (!hmap_is_empty(&ofproto->vlandev_map)) {
          const struct vlan_splinter *vsp;
@@ -6679,13 +6399,16 @@ vsp_vlandev_to_realdev(const struct ofproto_dpif *ofproto,
   * 'flow->vlan_tci' to the VLAN VID, and returns true.  Otherwise (which is
   * always the case unless VLAN splinters are enabled), returns false without
   * making any changes. */
-static bool
+bool
  vsp_adjust_flow(const struct ofproto_dpif *ofproto, struct flow *flow)
+    OVS_EXCLUDED(ofproto->vsp_mutex)
  {
      ofp_port_t realdev;
      int vid;
  
+    ovs_mutex_lock(&ofproto->vsp_mutex);
      realdev = vsp_vlandev_to_realdev(ofproto, flow->in_port.ofp_port, &vid);
+    ovs_mutex_unlock(&ofproto->vsp_mutex);
      if (!realdev) {
          return false;
      }
@@ -6703,6 +6426,7 @@ vsp_remove(struct ofport_dpif *port)
      struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto);
      struct vlan_splinter *vsp;
  
+    ovs_mutex_lock(&ofproto->vsp_mutex);
      vsp = vlandev_find(ofproto, port->up.ofp_port);
      if (vsp) {
          hmap_remove(&ofproto->vlandev_map, &vsp->vlandev_node);
@@ -6713,6 +6437,7 @@ vsp_remove(struct ofport_dpif *port)
      } else {
          VLOG_ERR("missing vlan device record");
      }
+    ovs_mutex_unlock(&ofproto->vsp_mutex);
  }
  
  static void
@@ -6720,24 +6445,27 @@ vsp_add(struct ofport_dpif *port, ofp_port_t realdev_ofp_port, int vid)
  {
      struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto);
  
+    ovs_mutex_lock(&ofproto->vsp_mutex);
      if (!vsp_vlandev_to_realdev(ofproto, port->up.ofp_port, NULL)
-        && (vsp_realdev_to_vlandev(ofproto, realdev_ofp_port, htons(vid))
+        && (vsp_realdev_to_vlandev__(ofproto, realdev_ofp_port, htons(vid))
              == realdev_ofp_port)) {
          struct vlan_splinter *vsp;
  
          vsp = xmalloc(sizeof *vsp);
-        hmap_insert(&ofproto->vlandev_map, &vsp->vlandev_node,
-                    hash_ofp_port(port->up.ofp_port));
-        hmap_insert(&ofproto->realdev_vid_map, &vsp->realdev_vid_node,
-                    hash_realdev_vid(realdev_ofp_port, vid));
          vsp->realdev_ofp_port = realdev_ofp_port;
          vsp->vlandev_ofp_port = port->up.ofp_port;
          vsp->vid = vid;
  
          port->realdev_ofp_port = realdev_ofp_port;
+
+        hmap_insert(&ofproto->vlandev_map, &vsp->vlandev_node,
+                    hash_ofp_port(port->up.ofp_port));
+        hmap_insert(&ofproto->realdev_vid_map, &vsp->realdev_vid_node,
+                    hash_realdev_vid(realdev_ofp_port, vid));
      } else {
          VLOG_ERR("duplicate vlan device record");
      }
+    ovs_mutex_unlock(&ofproto->vsp_mutex);
  }
  
  static odp_port_t
@@ -6747,18 +6475,21 @@ ofp_port_to_odp_port(const struct ofproto_dpif *ofproto, ofp_port_t ofp_port)
      return ofport ? ofport->odp_port : ODPP_NONE;
  }
  
-static struct ofport_dpif *
+struct ofport_dpif *
  odp_port_to_ofport(const struct dpif_backer *backer, odp_port_t odp_port)
  {
      struct ofport_dpif *port;
  
+    ovs_rwlock_rdlock(&backer->odp_to_ofport_lock);
      HMAP_FOR_EACH_IN_BUCKET (port, odp_port_node, hash_odp_port(odp_port),
                               &backer->odp_to_ofport_map) {
          if (port->odp_port == odp_port) {
+            ovs_rwlock_unlock(&backer->odp_to_ofport_lock);
              return port;
          }
      }
  
+    ovs_rwlock_unlock(&backer->odp_to_ofport_lock);
      return NULL;
  }
  
diff --git a/ofproto/ofproto-dpif.h b/ofproto/ofproto-dpif.h

index b220423..88593ce 100644 (file)
--- a/ofproto/ofproto-dpif.h
+++ b/ofproto/ofproto-dpif.h
@@ -19,13 +19,14 @@
  
  #include "hmapx.h"
  #include "ofproto/ofproto-provider.h"
-#include "tag.h"
+#include "ovs-thread.h"
  #include "timer.h"
  #include "util.h"
  
  union user_action_cookie;
  struct ofproto_dpif;
  struct ofport_dpif;
+struct dpif_backer;
  
  struct rule_dpif {
      struct rule up;
@@ -43,10 +44,9 @@ struct rule_dpif {
       *     packet_count or byte_count member or that can be obtained from the
       *     datapath by, e.g., dpif_flow_get() for any subfacet.
       */
-    uint64_t packet_count;       /* Number of packets received. */
-    uint64_t byte_count;         /* Number of bytes received. */
-
-    tag_type tag;                /* Caches rule_calculate_tag() result. */
+    struct ovs_mutex stats_mutex;
+    uint64_t packet_count OVS_GUARDED;  /* Number of packets received. */
+    uint64_t byte_count OVS_GUARDED;    /* Number of bytes received. */
  };
  
  static inline struct rule_dpif *rule_dpif_cast(const struct rule *rule)
@@ -59,40 +59,21 @@ struct rule_dpif *rule_dpif_lookup_in_table(struct ofproto_dpif *,
                                              struct flow_wildcards *,
                                              uint8_t table_id);
  
-tag_type rule_calculate_tag(const struct flow *flow, const struct minimask *,
-                            uint32_t secret);
-
  struct rule_dpif *rule_dpif_miss_rule(struct ofproto_dpif *ofproto,
                                        const struct flow *);
  
  void rule_credit_stats(struct rule_dpif *, const struct dpif_flow_stats *);
  
-void ofproto_trace(struct ofproto_dpif *, const struct flow *,
-                   const struct ofpbuf *packet, struct ds *);
-
-size_t put_userspace_action(const struct ofproto_dpif *,
-                            struct ofpbuf *odp_actions, const struct flow *,
-                            const union user_action_cookie *,
-                            const size_t cookie_size);
-
-bool stp_should_process_flow(const struct flow *, struct flow_wildcards *);
-void stp_process_packet(const struct ofport_dpif *,
-                        const struct ofpbuf *packet);
-
  bool ofproto_has_vlan_splinters(const struct ofproto_dpif *);
  ofp_port_t vsp_realdev_to_vlandev(const struct ofproto_dpif *,
                                    ofp_port_t realdev_ofp_port,
                                    ovs_be16 vlan_tci);
-
-bool ofproto_dpif_dscp_from_priority(const struct ofport_dpif *,
-                                     uint32_t priority, uint8_t *dscp);
-int ofproto_dpif_queue_to_priority(const struct ofproto_dpif *,
-                                   uint32_t queue_id, uint32_t *priority);
-tag_type calculate_flow_tag(struct ofproto_dpif *, const struct flow *,
-                            uint8_t table_id, struct rule_dpif *);
+bool vsp_adjust_flow(const struct ofproto_dpif *, struct flow *);
  
  void ofproto_dpif_send_packet_in(struct ofproto_dpif *,
                                   struct ofputil_packet_in *pin);
-int ofproto_dpif_flow_mod(struct ofproto_dpif *, struct ofputil_flow_mod *);
+void ofproto_dpif_flow_mod(struct ofproto_dpif *, struct ofputil_flow_mod *);
+
+struct ofport_dpif *odp_port_to_ofport(const struct dpif_backer *, odp_port_t);
  
  #endif /* ofproto-dpif.h */
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c

index 432aef3..0625ccf 100644 (file)
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -3094,8 +3094,7 @@ ofproto_get_netflow_ids(const struct ofproto *ofproto,
   * Returns false if the port did not have CFM configured, in which case
   * '*status' is indeterminate.
   *
- * The caller must provide and owns '*status', but it does not own and must not
- * modify or free the array returned in 'status->rmps'. */
+ * The caller must provide and owns '*status', and must free 'status->rmps'. */
  bool
  ofproto_port_get_cfm_status(const struct ofproto *ofproto, ofp_port_t ofp_port,
                              struct ofproto_cfm_status *status)
diff --git a/ofproto/ofproto.h b/ofproto/ofproto.h

index 792df89..1bde385 100644 (file)
--- a/ofproto/ofproto.h
+++ b/ofproto/ofproto.h
@@ -27,7 +27,6 @@
  #include "netflow.h"
  #include "sset.h"
  #include "stp.h"
-#include "tag.h"
  
  #ifdef  __cplusplus
  extern "C" {
@@ -410,7 +409,7 @@ struct ofproto_cfm_status {
      int health;
  
      /* MPIDs of remote maintenance points whose CCMs have been received. */
-    const uint64_t *rmps;
+    uint64_t *rmps;
      size_t n_rmps;
  };
  
diff --git a/tests/bfd.at b/tests/bfd.at

index d95f8ab..c54fff0 100644 (file)
--- a/tests/bfd.at
+++ b/tests/bfd.at
@@ -35,7 +35,6 @@ AT_CHECK([ovs-appctl bfd/show $1 | sed -n '/RX Interval/p'],[0],
  ])
  ])
  AT_SETUP([bfd - basic config on different bridges])
-ovs-appctl time/stop
  #Create 2 bridges connected by patch ports and enable BFD
  OVS_VSWITCHD_START(
     [add-br br1 -- \
@@ -47,6 +46,7 @@ OVS_VSWITCHD_START(
         options:peer=p1 -- \
      set Interface p0 bfd:enable=true -- \
      set Interface p1 bfd:enable=true ])
+ovs-appctl time/stop
  for i in `seq 0 40`; do ovs-appctl time/warp 100; done
  
  #Verify that BFD has been enabled on both interfaces.
@@ -82,7 +82,6 @@ AT_SETUP([bfd - Verify tunnel down detection])
  #interfaces in br-bfd0 and br-bfd1. When br-sw is dropping all packets, BFD should detect
  # that the tunnel is down, and come back up when br-sw is working fine.
  
-ovs-appctl time/stop
  OVS_VSWITCHD_START(
     [add-br br-bfd0 -- \
         set bridge br-bfd0 datapath-type=dummy \
@@ -102,6 +101,7 @@ OVS_VSWITCHD_START(
      add-port br-bfd0 p0 -- set Interface p0 type=patch \
         options:peer=p0-sw bfd:enable=true --])
  
+ovs-appctl time/stop
  
  #Create 2 bridges connected by patch ports and enable BFD
  
@@ -158,12 +158,11 @@ AT_CLEANUP
  
  AT_SETUP([bfd - concatenated path down])
  #Create 2 bridges connected by patch ports and enable BFD
-ovs-appctl time/stop
  OVS_VSWITCHD_START()
+ovs-appctl time/stop
  AT_CHECK([ ovs-vsctl -- add-br br1 -- \
             set bridge br1 datapath-type=dummy \
             other-config:hwaddr=aa:55:aa:56:00:00 ])
-ovs-appctl time/stop
  AT_CHECK([ ovs-vsctl -- add-port br1 p1 -- set Interface p1 type=patch \
             options:peer=p0 ])
  AT_CHECK([ ovs-vsctl -- add-port br0 p0 -- set Interface p0 type=patch \
@@ -186,9 +185,8 @@ AT_CLEANUP
  
  AT_SETUP([bfd - Edit the Min Tx/Rx values])
  #Create 2 bridges connected by patch ports and enable BFD
-ovs-appctl time/stop
-ovs-appctl vlog/set bfd:dbg
  OVS_VSWITCHD_START()
+ovs-appctl time/stop
  AT_CHECK([ ovs-vsctl -- add-br br1 -- \
             set bridge br1 datapath-type=dummy ])
  AT_CHECK([ ovs-vsctl -- add-port br1 p1 -- set Interface p1 type=patch \
@@ -197,7 +195,7 @@ AT_CHECK([ ovs-vsctl -- add-port br0 p0 -- set Interface p0 type=patch \
             options:peer=p1 ])
  AT_CHECK([ ovs-vsctl -- set interface p0 bfd:enable=true ])
  AT_CHECK([ ovs-vsctl -- set interface p1 bfd:enable=true ])
-for i in `seq 0 20`; do ovs-appctl time/warp 100; done
+for i in `seq 0 30`; do ovs-appctl time/warp 100; done
  #Verify that BFD has been enabled on both interfaces.
  BFD_CHECK([p1], [true], [false], [none], [up], [No Diagnostic], [none], [up], [No Diagnostic])
  BFD_CHECK([p0], [true], [false], [none], [up], [No Diagnostic], [none], [up], [No Diagnostic])
diff --git a/tests/odp.at b/tests/odp.at

index 5776b95..469e120 100644 (file)
--- a/tests/odp.at
+++ b/tests/odp.at
@@ -24,7 +24,7 @@ in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv
  in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,label=0,proto=58,tclass=0,hlimit=128,frag=no),icmpv6(type=136,code=0),nd(target=::3,tll=00:0a:0b:0c:0d:0e)
  in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,label=0,proto=58,tclass=0,hlimit=128,frag=no),icmpv6(type=136,code=0),nd(target=::3,sll=00:05:06:07:08:09,tll=00:0a:0b:0c:0d:0e)
  in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0806),arp(sip=1.2.3.4,tip=5.6.7.8,op=1,sha=00:0f:10:11:12:13,tha=00:14:15:16:17:18)
-skb_mark(0x1234),in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,label=0,proto=58,tclass=0,hlimit=128,frag=no),icmpv6(type=136,code=0),nd(target=::3,sll=00:05:06:07:08:09,tll=00:0a:0b:0c:0d:0e)
+in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,label=0,proto=58,tclass=0,hlimit=128,frag=no),icmpv6(type=136,code=0),nd(target=::3,sll=00:05:06:07:08:09,tll=00:0a:0b:0c:0d:0e)
  in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x8847),mpls(label=100,tc=3,ttl=64,bos=1)
  in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x8847),mpls(label=100,tc=7,ttl=100,bos=1)
  in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x8847),mpls(label=100,tc=7,ttl=100,bos=0)
@@ -33,48 +33,53 @@ in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x8848),mpl
  ])
  
  (echo '# Valid forms without tun_id or VLAN header.'
- cat odp-base.txt
+ set 's/^/skb_priority(0),skb_mark(0),/' odp-base.txt
+
+ set '
+s/^/skb_priority(0),skb_mark(0),/
+' odp-base.txt
+
  
   echo
   echo '# Valid forms with tunnel header.'
- sed 's/^/tunnel(tun_id=0x7f10354,src=10.10.10.10,dst=20.20.20.20,tos=0x0,ttl=64,flags(csum,key)),/' odp-base.txt
+ sed 's/^/skb_priority(0),tunnel(tun_id=0x7f10354,src=10.10.10.10,dst=20.20.20.20,tos=0x0,ttl=64,flags(csum,key)),skb_mark(0x1234),/' odp-base.txt
  
   echo
   echo '# Valid forms with VLAN header.'
- sed 's/\(eth([[^)]]*)\),*/\1,eth_type(0x8100),vlan(vid=99,pcp=7),encap(/
+ sed 's/^/skb_priority(0),skb_mark(0),/
+s/\(eth([[^)]]*)\),*/\1,eth_type(0x8100),vlan(vid=99,pcp=7),encap(/
  s/$/)/' odp-base.txt
  
   echo
   echo '# Valid forms with MPLS header.'
- sed 's/\(eth([[^)]]*),?\)/\1,eth_type(0x8847),mpls(label=100,tc=7,ttl=64,bos=1)/' odp-base.txt
+ sed 's/^/skb_priority(0),skb_mark(0),/
+s/\(eth([[^)]]*),?\)/\1,eth_type(0x8847),mpls(label=100,tc=7,ttl=64,bos=1)/' odp-base.txt
  
   echo
   echo '# Valid forms with MPLS multicast header.'
- sed 's/\(eth([[^)]]*),?\)/\1,eth_type(0x8848),mpls(label=100,tc=7,ttl=64,bos=1)/' odp-base.txt
-
- echo
- echo '# Valid forms with QoS priority.'
- sed 's/^/skb_priority(0x1234),/' odp-base.txt
+ sed 's/^/skb_priority(0),skb_mark(0),/
+s/\(eth([[^)]]*),?\)/\1,eth_type(0x8848),mpls(label=100,tc=7,ttl=64,bos=1)/' odp-base.txt
  
   echo
   echo '# Valid forms with tunnel and VLAN headers.'
- sed 's/^/tunnel(tun_id=0xfedcba9876543210,src=10.0.0.1,dst=10.0.0.2,tos=0x8,ttl=128,flags(key)),/
+ sed 's/^/skb_priority(0),tunnel(tun_id=0xfedcba9876543210,src=10.0.0.1,dst=10.0.0.2,tos=0x8,ttl=128,flags(key)),skb_mark(0),/
  s/\(eth([[^)]]*)\),*/\1,eth_type(0x8100),vlan(vid=99,pcp=7),encap(/
  s/$/)/' odp-base.txt
  
   echo
   echo '# Valid forms with QOS priority, tunnel, and VLAN headers.'
- sed 's/^/skb_priority(0x1234),tunnel(tun_id=0xfedcba9876543210,src=10.10.10.10,dst=20.20.20.20,tos=0x8,ttl=64,flags(key)),/
+ sed 's/^/skb_priority(0x1234),tunnel(tun_id=0xfedcba9876543210,src=10.10.10.10,dst=20.20.20.20,tos=0x8,ttl=64,flags(key)),skb_mark(0),/
  s/\(eth([[^)]]*)\),*/\1,eth_type(0x8100),vlan(vid=99,pcp=7),encap(/
  s/$/)/' odp-base.txt
  
   echo
   echo '# Valid forms with IP first fragment.'
-sed -n 's/,frag=no),/,frag=first),/p' odp-base.txt
+sed 's/^/skb_priority(0),skb_mark(0),/' odp-base.txt | sed -n 's/,frag=no),/,frag=first),/p'
  
   echo
   echo '# Valid forms with IP later fragment.'
-sed -n 's/,frag=no),.*/,frag=later)/p' odp-base.txt) > odp.txt
+sed 's/^/skb_priority(0),skb_mark(0),/' odp-base.txt | sed -n 's/,frag=no),.*/,frag=later)/p'
+) > odp.txt
  AT_CAPTURE_FILE([odp.txt])
  AT_CHECK_UNQUOTED([test-odp parse-keys < odp.txt], [0], [`cat odp.txt`
  ])
@@ -88,7 +93,7 @@ in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x1234/0xff
  in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41/255.255.255.0,dst=172.16.0.20/255.255.255.0,proto=5/0xf0,tos=0x80/0xf0,ttl=128/0xf0,frag=no/0xf0)
  in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=6,tos=0,ttl=128,frag=no),tcp(src=80/0xff00,dst=8080/0xff)
  in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=17,tos=0,ttl=128,frag=no),udp(src=81/0xff00,dst=6632/0xff)
-in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=17,tos=0,ttl=128,frag=no),udp(src=81/0,dst=6632/0)
+in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=17,tos=0,ttl=128,frag=no),udp(src=81/0xff,dst=6632/0xff00)
  in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=1,tos=0,ttl=128,frag=no),icmp(type=1/0xf0,code=2/0xff)
  in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1/::255,dst=::2/::255,label=0/0xf0,proto=10/0xf0,tclass=0x70/0xf0,hlimit=128/0xf0,frag=no/0xf0)
  in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,label=0,proto=6,tclass=0,hlimit=128,frag=no),tcp(src=80/0xff00,dst=8080/0xff)
diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at

index 2728a28..46e1dea 100644 (file)
--- a/tests/ofproto-dpif.at
+++ b/tests/ofproto-dpif.at
@@ -24,8 +24,8 @@ AT_SETUP([ofproto-dpif - goto table])
  OVS_VSWITCHD_START
  ADD_OF_PORTS([br0], [1], [10], [11])
  echo "table=0 in_port=1 actions=output(10),goto_table(1)" > flows.txt
-for i in `seq 1 252`; do echo "table=$i actions=goto_table($(($i+1)))"; done >> flows.txt
-echo "table=253 actions=output(11)" >> flows.txt
+for i in `seq 1 63`; do echo "table=$i actions=goto_table($(($i+1)))"; done >> flows.txt
+echo "table=64 actions=output(11)" >> flows.txt
  AT_CHECK([ovs-ofctl -O OpenFlow12 add-flows br0 flows.txt])
  AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_proto=1,nw_tos=0,nw_ttl=128,icmp_type=8,icmp_code=0'], [0], [stdout])
  AT_CHECK([tail -1 stdout], [0],
@@ -2088,12 +2088,12 @@ AT_CHECK([ovs-appctl netdev-dummy/receive p2 'in_port(2),eth(src=50:54:00:00:00:
  AT_CHECK([ovs-appctl netdev-dummy/receive p3 'in_port(3),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)'])
  
  AT_CHECK([ovs-appctl dpif/dump-flows br0 | sort | STRIP_USED], [0], [dnl
-in_port(1),eth(src=50:54:00:00:00:05/00:00:00:00:00:00,dst=50:54:00:00:00:07/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=192.168.0.1/0.0.0.0,dst=192.168.0.2/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0x2),icmp(type=8/0,code=0/0), packets:0, bytes:0, used:0.0s, actions:userspace(pid=0,slow_path(controller))
-in_port(2),eth(src=50:54:00:00:00:07/00:00:00:00:00:00,dst=50:54:00:00:00:05/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=192.168.0.2/0.0.0.0,dst=192.168.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0x2),icmp(type=0/0,code=0/0), packets:0, bytes:0, used:0.0s, actions:userspace(pid=0,slow_path(controller))
+skb_priority(0),in_port(1),eth_type(0x0800),ipv4(src=192.168.0.1/0.0.0.0,dst=192.168.0.2/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff), packets:0, bytes:0, used:0.0s, actions:userspace(pid=0,slow_path(controller))
+skb_priority(0),in_port(2),eth_type(0x0800),ipv4(src=192.168.0.2/0.0.0.0,dst=192.168.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff), packets:0, bytes:0, used:0.0s, actions:userspace(pid=0,slow_path(controller))
  ])
  
  AT_CHECK([ovs-appctl dpif/dump-flows br1 | sort | STRIP_USED], [0], [dnl
-in_port(3),eth(src=50:54:00:00:00:09/00:00:00:00:00:00,dst=50:54:00:00:00:0a/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=10.0.0.2/0.0.0.0,dst=10.0.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0x2),icmp(type=8/0,code=0/0), packets:0, bytes:0, used:0.0s, actions:userspace(pid=0,slow_path(controller))
+skb_priority(0),in_port(3),eth_type(0x0800),ipv4(src=10.0.0.2/0.0.0.0,dst=10.0.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff), packets:0, bytes:0, used:0.0s, actions:userspace(pid=0,slow_path(controller))
  ])
  
  OVS_VSWITCHD_STOP
@@ -2110,12 +2110,12 @@ AT_CHECK([ovs-appctl netdev-dummy/receive p2 'in_port(2),eth(src=50:54:00:00:00:
  AT_CHECK([ovs-appctl netdev-dummy/receive p3 'in_port(3),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)'])
  
  AT_CHECK([ovs-appctl dpif/dump-flows br0 | sort | STRIP_USED], [0], [dnl
-in_port(1),eth(src=50:54:00:00:00:05/00:00:00:00:00:00,dst=50:54:00:00:00:07/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=192.168.0.1/0.0.0.0,dst=192.168.0.2/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0x2),icmp(type=8/0,code=0/0), packets:0, bytes:0, used:0.0s, actions:userspace(pid=0,slow_path(controller))
-in_port(2),eth(src=50:54:00:00:00:07/00:00:00:00:00:00,dst=50:54:00:00:00:05/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=192.168.0.2/0.0.0.0,dst=192.168.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0x2),icmp(type=0/0,code=0/0), packets:0, bytes:0, used:0.0s, actions:userspace(pid=0,slow_path(controller))
+skb_priority(0),in_port(1),eth_type(0x0800),ipv4(src=192.168.0.1/0.0.0.0,dst=192.168.0.2/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff), packets:0, bytes:0, used:0.0s, actions:userspace(pid=0,slow_path(controller))
+skb_priority(0),in_port(2),eth_type(0x0800),ipv4(src=192.168.0.2/0.0.0.0,dst=192.168.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff), packets:0, bytes:0, used:0.0s, actions:userspace(pid=0,slow_path(controller))
  ])
  
  AT_CHECK([ovs-appctl dpif/dump-flows br1 | sort | STRIP_USED], [0], [dnl
-in_port(3),eth(src=50:54:00:00:00:09/00:00:00:00:00:00,dst=50:54:00:00:00:0a/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=10.0.0.2/0.0.0.0,dst=10.0.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0x2),icmp(type=8/0,code=0/0), packets:0, bytes:0, used:0.0s, actions:userspace(pid=0,slow_path(controller))
+skb_priority(0),in_port(3),eth_type(0x0800),ipv4(src=10.0.0.2/0.0.0.0,dst=10.0.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff), packets:0, bytes:0, used:0.0s, actions:userspace(pid=0,slow_path(controller))
  ])
  
  AT_CHECK([ovs-appctl dpif/del-flows br0])
@@ -2123,7 +2123,7 @@ AT_CHECK([ovs-appctl dpif/dump-flows br0 | sort | STRIP_USED], [0], [dnl
  ])
  
  AT_CHECK([ovs-appctl dpif/dump-flows br1 | sort | STRIP_USED], [0], [dnl
-in_port(3),eth(src=50:54:00:00:00:09/00:00:00:00:00:00,dst=50:54:00:00:00:0a/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=10.0.0.2/0.0.0.0,dst=10.0.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0x2),icmp(type=8/0,code=0/0), packets:0, bytes:0, used:0.0s, actions:userspace(pid=0,slow_path(controller))
+skb_priority(0),in_port(3),eth_type(0x0800),ipv4(src=10.0.0.2/0.0.0.0,dst=10.0.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff), packets:0, bytes:0, used:0.0s, actions:userspace(pid=0,slow_path(controller))
  ])
  
  OVS_VSWITCHD_STOP
@@ -2170,10 +2170,10 @@ dummy@ovs-dummy: hit:13 missed:2
  ])
  
  AT_CHECK([ovs-appctl dpif/dump-flows br0 | STRIP_USED], [0], [dnl
-in_port(100),eth(src=50:54:00:00:00:05/00:00:00:00:00:00,dst=50:54:00:00:00:07/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=192.168.0.1/0.0.0.0,dst=192.168.0.2/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0x2),icmp(type=8/0,code=0/0), packets:9, bytes:540, used:0.0s, actions:101,3,2
+skb_priority(0),in_port(100),eth_type(0x0800),ipv4(src=192.168.0.1/0.0.0.0,dst=192.168.0.2/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff), packets:9, bytes:540, used:0.0s, actions:101,3,2
  ]),
  AT_CHECK([ovs-appctl dpif/dump-flows br1 | STRIP_USED], [0], [dnl
-in_port(101),eth(src=50:54:00:00:00:07/00:00:00:00:00:00,dst=50:54:00:00:00:05/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=192.168.0.2/0.0.0.0,dst=192.168.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0x2),icmp(type=8/0,code=0/0), packets:4, bytes:240, used:0.0s, actions:100,2,3
+skb_priority(0),in_port(101),eth_type(0x0800),ipv4(src=192.168.0.2/0.0.0.0,dst=192.168.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff), packets:4, bytes:240, used:0.0s, actions:100,2,3
  ])
  
  AT_CHECK([ovs-ofctl dump-ports br0 pbr0], [0], [dnl
@@ -2249,6 +2249,9 @@ AT_BANNER([ofproto-dpif -- megaflows])
  
  # Strips out uninteresting parts of megaflow output, as well as parts
  # that vary from one run to another (e.g., timing and bond actions).
+m4_define([STRIP_USED], [[sed '
+    s/used:[0-9]*\.[0-9]*/used:0.0/
+' | sort]])
  m4_define([STRIP_XOUT], [[sed '
      s/used:[0-9]*\.[0-9]*/used:0.0/
      s/Datapath actions:.*/Datapath actions: <del>/
@@ -2633,6 +2636,32 @@ skb_priority=0,icmp,in_port=1,nw_src=10.0.0.4,nw_dst=10.0.0.3,nw_tos=0,nw_ecn=0,
  OVS_VSWITCHD_STOP
  AT_CLEANUP
  
+AT_SETUP([ofproto-dpif megaflow - set dl_dst])
+OVS_VSWITCHD_START
+ADD_OF_PORTS([br0], [1], [2])
+AT_DATA([flows.txt], [dnl
+table=0 in_port=1 actions=mod_dl_dst(50:54:00:00:00:0a),output(2)
+])
+AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
+AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)'])
+AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.4,dst=10.0.0.3,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)'])
+dnl The megaflows do not match the same fields, since the first packet
+dnl is essentially a no-op.  (The new destination MAC is the same as the
+dnl original.) The ofproto-dpif library un-wildcards the destination MAC
+dnl so that a packet that doesn't need its MAC address changed doesn't
+dnl hide one that does.  Since the first entry doesn't need to change,
+dnl only the destination MAC address is matched (as decided by
+dnl ofproto-dpif).  The second entry actually updates the destination
+dnl MAC, so both the source and destination MAC addresses are
+dnl un-wildcarded, since the ODP commit functions update both the source
+dnl and destination MAC addresses.
+AT_CHECK([ovs-appctl dpif/dump-megaflows br0 | STRIP_USED], [0], [dnl
+skb_priority=0,ip,in_port=1,dl_dst=50:54:00:00:00:0a,nw_frag=no, n_subfacets:1, used:0.0s, Datapath actions: 2
+skb_priority=0,ip,in_port=1,dl_src=50:54:00:00:00:0b,dl_dst=50:54:00:00:00:0c,nw_frag=no, n_subfacets:1, used:0.0s, Datapath actions: set(eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0a)),2
+])
+OVS_VSWITCHD_STOP
+AT_CLEANUP
+
  AT_SETUP([ofproto-dpif - datapath port number change])
  OVS_VSWITCHD_START([set Bridge br0 fail-mode=standalone])
  ADD_OF_PORTS([br0], 1)
diff --git a/tests/ovs-ofctl.at b/tests/ovs-ofctl.at

index 8133f75..18f9152 100644 (file)
--- a/tests/ovs-ofctl.at
+++ b/tests/ovs-ofctl.at
@@ -2272,3 +2272,21 @@ AT_CHECK([ovs-ofctl -F oxm -O openflow13], [1], [],
    [ovs-ofctl: missing command name; use --help for help
  ])
  AT_CLEANUP
+
+AT_SETUP([ovs-ofctl ofp-parse])
+# Test the echo request/reply messages (0 payload).
+AT_CHECK([printf '\1\2\0\10\0\0\0\0\1\3\0\10\0\0\0\0' > binary_ofp_msg])
+AT_CHECK([ovs-ofctl ofp-parse binary_ofp_msg], [0], [dnl
+OFPT_ECHO_REQUEST (xid=0x0): 0 bytes of payload
+OFPT_ECHO_REPLY (xid=0x0): 0 bytes of payload
+])
+
+# Test the hello (xid:1 3-byte payload).
+AT_CHECK([printf '\1\0\0\13\0\0\0\1\101\102\103' > binary_ofp_msg])
+AT_CHECK([ovs-ofctl ofp-parse - < binary_ofp_msg], [0], [dnl
+OFPT_HELLO (xid=0x1):
+ version bitmap: 0x01
+ unknown data in hello:
+00000000  01 00 00 0b 00 00 00 01-41 42 43                |........ABC     |
+])
+AT_CLEANUP
diff --git a/tests/test-atomic.c b/tests/test-atomic.c

index 27bf552..e9bd6bd 100644 (file)
--- a/tests/test-atomic.c
+++ b/tests/test-atomic.c
@@ -61,6 +61,16 @@
          ovs_assert(value == 8);                         \
      }
  
+static void
+test_atomic_flag(void)
+{
+    atomic_flag flag = ATOMIC_FLAG_INIT;
+    ovs_assert(atomic_flag_test_and_set(&flag) == false);
+    ovs_assert(flag.b == true);
+    atomic_flag_clear(&flag);
+    ovs_assert(flag.b == false);
+}
+
  int
  main(void)
  {
@@ -90,5 +100,7 @@ main(void)
      TEST_ATOMIC_TYPE(atomic_uint64_t, uint64_t);
      TEST_ATOMIC_TYPE(atomic_int64_t, int64_t);
  
+    test_atomic_flag();
+
      return 0;
  }
diff --git a/tests/test-odp.c b/tests/test-odp.c

index b1d2853..45605e4 100644 (file)
--- a/tests/test-odp.c
+++ b/tests/test-odp.c
@@ -86,7 +86,7 @@ parse_keys(bool wc_keys)
          ds_init(&out);
          if (wc_keys) {
              odp_flow_format(odp_key.data, odp_key.size,
-                            odp_mask.data, odp_mask.size, &out);
+                            odp_mask.data, odp_mask.size, &out, false);
          } else {
              odp_flow_key_format(odp_key.data, odp_key.size, &out);
          }
diff --git a/tests/test-stp.c b/tests/test-stp.c

index 0acc7e0..28e9a6e 100644 (file)
--- a/tests/test-stp.c
+++ b/tests/test-stp.c
@@ -661,7 +661,7 @@ main(int argc, char *argv[])
      }
      for (i = 0; i < tc->n_bridges; i++) {
          struct bridge *bridge = tc->bridges[i];
-        stp_destroy(bridge->stp);
+        stp_unref(bridge->stp);
          free(bridge);
      }
      free(tc);
diff --git a/utilities/ovs-dpctl.8.in b/utilities/ovs-dpctl.8.in

index 2b0036c..5c01570 100644 (file)
--- a/utilities/ovs-dpctl.8.in
+++ b/utilities/ovs-dpctl.8.in
@@ -118,9 +118,11 @@ exactly one datapath exists, in which case that datapath is the
  default.  When multiple datapaths exist, then a datapath name is
  required.
  .
-.IP "\fBdump\-flows\fR [\fIdp\fR]"
-Prints to the console all flow entries in datapath \fIdp\fR's
-flow table.
+.IP "[\fB\-m \fR| \fB\-\-more\fR] \fBdump\-flows\fR [\fIdp\fR]"
+Prints to the console all flow entries in datapath \fIdp\fR's flow
+table.  Without \fB\-m\fR or \fB\-\-more\fR, output omits match fields
+that a flow wildcards entirely; with \fB\-m\fR or \fB\-\-more\fR,
+output includes all wildcarded fields.
  .
  .IP "\fBadd\-flow\fR [\fIdp\fR] \fIflow actions\fR"
  .IQ "[\fB\-\-clear\fR] [\fB\-\-may-create\fR] [\fB\-s\fR | \fB\-\-statistics\fR] \fBmod\-flow\fR [\fIdp\fR] \fIflow actions\fR"
@@ -159,6 +161,10 @@ Deletes all flow entries from datapath \fIdp\fR's flow table.
  Causes the \fBshow\fR command to print packet and byte counters for
  each port within the datapaths that it shows.
  .
+.IP "\fB\-m\fR"
+.IQ "\fB\-\-more\fR"
+Increases the verbosity of \fBdump\-flows\fR output.
+.
  .IP "\fB\-t\fR"
  .IQ "\fB\-\-timeout=\fIsecs\fR"
  Limits \fBovs\-dpctl\fR runtime to approximately \fIsecs\fR seconds.  If
diff --git a/utilities/ovs-dpctl.c b/utilities/ovs-dpctl.c

index fa78b53..0516d1b 100644 (file)
--- a/utilities/ovs-dpctl.c
+++ b/utilities/ovs-dpctl.c
@@ -182,6 +182,8 @@ usage(void)
      vlog_usage();
      printf("\nOptions for show and mod-flow:\n"
             "  -s,  --statistics           print statistics for port or flow\n"
+           "\nOptions for dump-flows:\n"
+           "  -m, --more                  increase verbosity of output\n"
             "\nOptions for mod-flow:\n"
             "  --may-create                create flow if it doesn't exist\n"
             "  --clear                     reset existing stats to zero\n"
@@ -761,7 +763,7 @@ dpctl_dump_flows(int argc, char *argv[])
                                 &mask, &mask_len,
                                 &actions, &actions_len, &stats)) {
          ds_clear(&ds);
-        odp_flow_format(key, key_len, mask, mask_len, &ds);
+        odp_flow_format(key, key_len, mask, mask_len, &ds, verbosity);
          ds_put_cstr(&ds, ", ");
  
          dpif_flow_stats_format(stats, &ds);
@@ -1050,7 +1052,7 @@ dpctl_normalize_actions(int argc, char *argv[])
          "odp_flow_key_from_string");
  
      ds_clear(&s);
-    odp_flow_format(keybuf.data, keybuf.size, NULL, 0, &s);
+    odp_flow_format(keybuf.data, keybuf.size, NULL, 0, &s, verbosity);
      printf("input flow: %s\n", ds_cstr(&s));
  
      run(odp_flow_key_to_flow(keybuf.data, keybuf.size, &flow),
diff --git a/utilities/ovs-ofctl.8.in b/utilities/ovs-ofctl.8.in

index e66c605..3e6c7fe 100644 (file)
--- a/utilities/ovs-ofctl.8.in
+++ b/utilities/ovs-ofctl.8.in
@@ -370,6 +370,14 @@ response.  Reports the total time required.  This is a measure of the
  maximum bandwidth to \fItarget\fR for round-trips of \fIn\fR-byte
  messages.
  .
+.SS "Other Commands"
+.
+.IP "\fBofp\-parse\fR \fIfile\fR"
+Reads \fIfile\fR (or \fBstdin\fR if \fIfile\fR is \fB\-\fR) as a
+series of OpenFlow messages in the binary format used on an OpenFlow
+connection, and prints them to the console.  This can be useful for
+printing OpenFlow messages captured from a TCP stream.
+.
  .SS "Flow Syntax"
  .PP
  Some \fBovs\-ofctl\fR commands accept an argument that describes a flow or
diff --git a/utilities/ovs-ofctl.c b/utilities/ovs-ofctl.c

index 68b73bf..35a2ca7 100644 (file)
--- a/utilities/ovs-ofctl.c
+++ b/utilities/ovs-ofctl.c
@@ -305,7 +305,9 @@ usage(void)
             "  probe TARGET                probe whether TARGET is up\n"
             "  ping TARGET [N]             latency of N-byte echos\n"
             "  benchmark TARGET N COUNT    bandwidth of COUNT N-byte echos\n"
-           "where SWITCH or TARGET is an active OpenFlow connection method.\n",
+           "SWITCH or TARGET is an active OpenFlow connection method.\n"
+           "\nOther commands:\n"
+           "  ofp-parse FILE              print messages read from FILE\n",
             program_name, program_name);
      vconn_usage(true, false, false);
      daemon_usage();
@@ -1696,6 +1698,56 @@ ofctl_set_frags(int argc OVS_UNUSED, char *argv[])
      vconn_close(vconn);
  }
  
+static void
+ofctl_ofp_parse(int argc OVS_UNUSED, char *argv[])
+{
+    const char *filename = argv[1];
+    struct ofpbuf b;
+    FILE *file;
+
+    file = !strcmp(filename, "-") ? stdin : fopen(filename, "r");
+    if (file == NULL) {
+        ovs_fatal(errno, "%s: open", filename);
+    }
+
+    ofpbuf_init(&b, 65536);
+    for (;;) {
+        struct ofp_header *oh;
+        size_t length, tail_len;
+        void *tail;
+        size_t n;
+
+        ofpbuf_clear(&b);
+        oh = ofpbuf_put_uninit(&b, sizeof *oh);
+        n = fread(oh, 1, sizeof *oh, file);
+        if (n == 0) {
+            break;
+        } else if (n < sizeof *oh) {
+            ovs_fatal(0, "%s: unexpected end of file mid-message", filename);
+        }
+
+        length = ntohs(oh->length);
+        if (length < sizeof *oh) {
+            ovs_fatal(0, "%s: %zu-byte message is too short for OpenFlow",
+                      filename, length);
+        }
+
+        tail_len = length - sizeof *oh;
+        tail = ofpbuf_put_uninit(&b, tail_len);
+        n = fread(tail, 1, tail_len, file);
+        if (n < tail_len) {
+            ovs_fatal(0, "%s: unexpected end of file mid-message", filename);
+        }
+
+        ofp_print(stdout, b.data, b.size, verbosity + 2);
+    }
+    ofpbuf_uninit(&b);
+
+    if (file != stdin) {
+        fclose(file);
+    }
+}
+
  static void
  ofctl_ping(int argc, char *argv[])
  {
@@ -2932,6 +2984,7 @@ static const struct command all_commands[] = {
      { "mod-port", 3, 3, ofctl_mod_port },
      { "get-frags", 1, 1, ofctl_get_frags },
      { "set-frags", 2, 2, ofctl_set_frags },
+    { "ofp-parse", 1, 1, ofctl_ofp_parse },
      { "probe", 1, 1, ofctl_probe },
      { "ping", 1, 2, ofctl_ping },
      { "benchmark", 3, 3, ofctl_benchmark },
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c

index 1460ea2..abbda56 100644 (file)
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -1915,6 +1915,8 @@ iface_refresh_cfm_stats(struct iface *iface)
          } else {
              ovsrec_interface_set_cfm_health(cfg, NULL, 0);
          }
+
+        free(status.rmps);
      }
  }
  
@@ -2445,7 +2447,7 @@ bridge_run(void)
               * process that forked us to exit successfully. */
              daemonize_complete();
  
-            async_append_enable();
+            vlog_enable_async();
  
              VLOG_INFO_ONCE("%s (Open vSwitch) %s", program_name, VERSION);
          }
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml

index 957b02c..b89d58c 100644 (file)
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -1896,6 +1896,15 @@
            <code>false</code>.
        </column>
  
+      <column name="bfd" key="bfd_dst_mac">
+        An Ethernet address in the form
+        <var>xx</var>:<var>xx</var>:<var>xx</var>:<var>xx</var>:<var>xx</var>:<var>xx</var>
+        to set the destination mac address of the bfd packet. If this
+        field is set, it is assumed that all the bfd packets destined to this
+        interface also has the same destination mac address. If not set, a
+        default value of <code>00:23:20:00:00:01</code> is used.
+      </column>
+
        <column name="bfd_status" key="state"
            type='{"type": "string",
            "enum": ["set", ["admin_down", "down", "init", "up"]]}'>
author	Giuseppe Lettieri <g.lettieri@iet.unipi.it>
	Thu, 8 Aug 2013 14:42:27 +0000 (16:42 +0200)
committer	Giuseppe Lettieri <g.lettieri@iet.unipi.it>
	Thu, 8 Aug 2013 14:42:27 +0000 (16:42 +0200)
AUTHORS		patch \| blob \| history
CodingStyle		patch \| blob \| history
FAQ		patch \| blob \| history
INSTALL		patch \| blob \| history
NEWS		patch \| blob \| history
OPENFLOW-1.1+		patch \| blob \| history
acinclude.m4		patch \| blob \| history
configure.ac		patch \| blob \| history
datapath/datapath.c		patch \| blob \| history
datapath/flow.c		patch \| blob \| history
datapath/linux/compat/gso.c		patch \| blob \| history
datapath/tunnel.c		patch \| blob \| history
debian/changelog		patch \| blob \| history
lib/async-append-aio.c		patch \| blob \| history
lib/async-append-null.c	[moved from lib/async-append-sync.c with 63% similarity]	patch \| blob \| history
lib/async-append.h		patch \| blob \| history
lib/automake.mk		patch \| blob \| history
lib/bfd.c		patch \| blob \| history
lib/bond.c		patch \| blob \| history
lib/bond.h		patch \| blob \| history
lib/cfm.c		patch \| blob \| history
lib/cfm.h		patch \| blob \| history
lib/dpif-linux.c		patch \| blob \| history
lib/dpif.c		patch \| blob \| history
lib/learning-switch.c		patch \| blob \| history
lib/mac-learning.c		patch \| blob \| history
lib/mac-learning.h		patch \| blob \| history
lib/multipath.c		patch \| blob \| history
lib/netdev-bsd.c		patch \| blob \| history
lib/netdev-dummy.c		patch \| blob \| history
lib/netdev-linux.c		patch \| blob \| history
lib/netdev-linux.h		patch \| blob \| history
lib/netdev-provider.h		patch \| blob \| history
lib/netdev-vport.c		patch \| blob \| history
lib/netdev.c		patch \| blob \| history
lib/nx-match.c		patch \| blob \| history
lib/nx-match.h		patch \| blob \| history
lib/odp-util.c		patch \| blob \| history
lib/odp-util.h		patch \| blob \| history
lib/ofp-util.h		patch \| blob \| history
lib/ovs-atomic-pthreads.c		patch \| blob \| history
lib/ovs-atomic-pthreads.h		patch \| blob \| history
lib/ovs-thread.c		patch \| blob \| history
lib/ovs-thread.h		patch \| blob \| history
lib/packets.h		patch \| blob \| history
lib/stp.c		patch \| blob \| history
lib/stp.h		patch \| blob \| history
lib/tag.c	[deleted file]	patch \| blob \| history
lib/tag.h	[deleted file]	patch \| blob \| history
lib/util.c		patch \| blob \| history
lib/vlog.c		patch \| blob \| history
lib/vlog.h		patch \| blob \| history
ofproto/ofproto-dpif-xlate.c		patch \| blob \| history
ofproto/ofproto-dpif-xlate.h		patch \| blob \| history
ofproto/ofproto-dpif.c		patch \| blob \| history
ofproto/ofproto-dpif.h		patch \| blob \| history
ofproto/ofproto.c		patch \| blob \| history
ofproto/ofproto.h		patch \| blob \| history
tests/bfd.at		patch \| blob \| history
tests/odp.at		patch \| blob \| history
tests/ofproto-dpif.at		patch \| blob \| history
tests/ovs-ofctl.at		patch \| blob \| history
tests/test-atomic.c		patch \| blob \| history
tests/test-odp.c		patch \| blob \| history
tests/test-stp.c		patch \| blob \| history
utilities/ovs-dpctl.8.in		patch \| blob \| history
utilities/ovs-dpctl.c		patch \| blob \| history
utilities/ovs-ofctl.8.in		patch \| blob \| history
utilities/ovs-ofctl.c		patch \| blob \| history
vswitchd/bridge.c		patch \| blob \| history
vswitchd/vswitch.xml		patch \| blob \| history