Sean Brady sbrady@gtfservices.com
Sebastian Andrzej Siewior sebastian@breakpoint.cc
Sébastien RICCIO sr@swisscenter.com
+Spiro Kourtessis spiro@vmware.com
Srini Seetharaman seethara@stanford.edu
Stephen Hemminger shemminger@vyatta.com
Takayuki HAMA t-hama@cb.jp.nec.com
you do this, the "valgrind" results for test <N> are reported in files
named tests/testsuite.dir/<N>/valgrind.*. You may find that the
valgrind results are easier to interpret if you put "-q" in
-~/.valgrindrc, since that reduces the amount of
+~/.valgrindrc, since that reduces the amount of output.
Sometimes a few tests may fail on some runs but not others. This is
usually a bug in the testsuite, not a bug in Open vSwitch itself. If
post-v1.11.0
---------------------
+ - OpenFlow:
+ * New support for matching outer source and destination IP address
+ of tunneled packets, for tunnel ports configured with the newly
+ added "remote_ip=flow" and "local_ip=flow" options.
v1.11.0 - xx xxx xxxx
1.1 and later are now implemented.
* New "stack" extension for use in actions, to push and pop from
NXM fields.
+ * The "load" and "set_field" actions can now modify the "in_port". (This
+ allows one to enable output to a flow's input port by setting the
+ in_port to some unused value, such as OFPP_NONE.)
- ovs-dpctl:
* New debugging commands "add-flow", "mod-flow", "del-flow".
- New syslog format, prefixed with "ovs|", to be easier to filter.
AC_SEARCH_LIBS([pow], [m])
AC_SEARCH_LIBS([clock_gettime], [rt])
AC_SEARCH_LIBS([timer_create], [rt])
+AC_SEARCH_LIBS([pthread_sigmask], [pthread])
OVS_CHECK_ESX
OVS_CHECK_COVERAGE
* - skb->csum does not include the inner Ethernet header.
* - The layer pointers are undefined.
*/
-void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb)
+void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb,
+ struct ovs_key_ipv4_tunnel *tun_key)
{
struct ethhdr *eh;
return;
}
- ovs_vport_receive(vport, skb);
+ ovs_vport_receive(vport, skb, tun_key);
}
static struct rtable *find_route(struct net *net,
return false;
}
-static struct sk_buff *handle_offloads(struct sk_buff *skb,
- const struct rtable *rt,
- int tunnel_hlen)
+static struct sk_buff *handle_offloads(struct sk_buff *skb)
{
- int min_headroom;
int err;
- min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len
- + tunnel_hlen
- + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
-
- if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
- int head_delta = SKB_DATA_ALIGN(min_headroom -
- skb_headroom(skb) +
- 16);
- err = pskb_expand_head(skb, max_t(int, head_delta, 0),
- 0, GFP_ATOMIC);
- if (unlikely(err))
- goto error_free;
- }
-
forward_ip_summed(skb, true);
if (skb_is_gso(skb)) {
nskb = __skb_gso_segment(skb, 0, false);
if (IS_ERR(nskb)) {
- kfree_skb(skb);
err = PTR_ERR(nskb);
goto error;
}
if (unlikely(need_linearize(skb))) {
err = __skb_linearize(skb);
if (unlikely(err))
- goto error_free;
+ goto error;
}
err = skb_checksum_help(skb);
if (unlikely(err))
- goto error_free;
+ goto error;
}
set_ip_summed(skb, OVS_CSUM_NONE);
return skb;
-error_free:
- kfree_skb(skb);
error:
return ERR_PTR(err);
}
return (((u64) hash * range) >> 32) + low;
}
-int ovs_tnl_send(struct vport *vport, struct sk_buff *skb)
+int ovs_tnl_send(struct vport *vport, struct sk_buff *skb,
+ u8 ipproto, int tunnel_hlen,
+ void (*build_header)(const struct vport *,
+ struct sk_buff *,
+ int tunnel_hlen))
{
- struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
+ int min_headroom;
struct rtable *rt;
__be32 saddr;
int sent_len = 0;
- int tunnel_hlen;
-
- if (unlikely(!OVS_CB(skb)->tun_key))
- goto error_free;
+ int err;
+ struct sk_buff *nskb;
/* Route lookup */
saddr = OVS_CB(skb)->tun_key->ipv4_src;
rt = find_route(ovs_dp_get_net(vport->dp),
&saddr,
OVS_CB(skb)->tun_key->ipv4_dst,
- tnl_vport->tnl_ops->ipproto,
+ ipproto,
OVS_CB(skb)->tun_key->ipv4_tos,
skb_get_mark(skb));
- if (IS_ERR(rt))
- goto error_free;
+ if (IS_ERR(rt)) {
+ err = PTR_ERR(rt);
+ goto error;
+ }
- /* Offloading */
- tunnel_hlen = tnl_vport->tnl_ops->hdr_len(OVS_CB(skb)->tun_key);
tunnel_hlen += sizeof(struct iphdr);
- skb = handle_offloads(skb, rt, tunnel_hlen);
- if (IS_ERR(skb)) {
- skb = NULL;
+ min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len
+ + tunnel_hlen
+ + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
+
+ if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
+ int head_delta = SKB_DATA_ALIGN(min_headroom -
+ skb_headroom(skb) +
+ 16);
+
+ err = pskb_expand_head(skb, max_t(int, head_delta, 0),
+ 0, GFP_ATOMIC);
+ if (unlikely(err))
+ goto err_free_rt;
+ }
+
+ /* Offloading */
+ nskb = handle_offloads(skb);
+ if (IS_ERR(nskb)) {
+ err = PTR_ERR(nskb);
goto err_free_rt;
}
+ skb = nskb;
/* Reset SKB */
nf_reset(skb);
struct sk_buff *next_skb = skb->next;
struct iphdr *iph;
int frag_len;
- int err;
skb->next = NULL;
skb_dst_set(skb, &rt_dst(rt));
/* Push Tunnel header. */
- tnl_vport->tnl_ops->build_header(vport, skb, tunnel_hlen);
+ build_header(vport, skb, tunnel_hlen);
/* Push IP header. */
iph = ip_hdr(skb);
iph->version = 4;
iph->ihl = sizeof(struct iphdr) >> 2;
- iph->protocol = tnl_vport->tnl_ops->ipproto;
+ iph->protocol = ipproto;
iph->daddr = OVS_CB(skb)->tun_key->ipv4_dst;
iph->saddr = saddr;
iph->tos = OVS_CB(skb)->tun_key->ipv4_tos;
skb = next_skb;
}
- if (unlikely(sent_len == 0))
- ovs_vport_record_error(vport, VPORT_E_TX_DROPPED);
-
return sent_len;
err_free_rt:
ip_rt_put(rt);
-error_free:
- kfree_skb(skb);
- ovs_vport_record_error(vport, VPORT_E_TX_ERROR);
- return sent_len;
-}
-
-struct vport *ovs_tnl_create(const struct vport_parms *parms,
- const struct vport_ops *vport_ops,
- const struct tnl_ops *tnl_ops)
-{
- struct vport *vport;
- struct tnl_vport *tnl_vport;
- int err;
-
- vport = ovs_vport_alloc(sizeof(struct tnl_vport), vport_ops, parms);
- if (IS_ERR(vport)) {
- err = PTR_ERR(vport);
- goto error;
- }
-
- tnl_vport = tnl_vport_priv(vport);
-
- strcpy(tnl_vport->name, parms->name);
- tnl_vport->tnl_ops = tnl_ops;
-
- return vport;
-
error:
- return ERR_PTR(err);
-}
-
-static void free_port_rcu(struct rcu_head *rcu)
-{
- struct tnl_vport *tnl_vport = container_of(rcu,
- struct tnl_vport, rcu);
-
- ovs_vport_free(vport_from_priv(tnl_vport));
-}
-
-void ovs_tnl_destroy(struct vport *vport)
-{
- struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
-
- call_rcu(&tnl_vport->rcu, free_port_rcu);
-}
-
-const char *ovs_tnl_get_name(const struct vport *vport)
-{
- const struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
- return tnl_vport->name;
+ return err;
}
#include "flow.h"
#include "vport.h"
-struct tnl_ops {
- u8 ipproto; /* The IP protocol for the tunnel. */
- /*
- * Returns the length of the tunnel header that will be added in
- * build_header() (i.e. excludes the IP header).
- */
- int (*hdr_len)(const struct ovs_key_ipv4_tunnel *);
- /*
- * Builds header for given SKB. Space will have already been
- * allocated at the start of the packet equal
- * to sizeof(struct iphdr) + value returned by hdr_len().
- */
- void (*build_header)(const struct vport *, struct sk_buff *,
- int tunnel_hlen);
-};
+int ovs_tnl_send(struct vport *vport, struct sk_buff *skb,
+ u8 ipproto, int tunnel_hlen,
+ void (*build_header)(const struct vport *,
+ struct sk_buff *,
+ int tunnel_hlen));
-struct tnl_vport {
- struct rcu_head rcu;
-
- __be16 dst_port;
- char name[IFNAMSIZ];
- const struct tnl_ops *tnl_ops;
-};
-
-struct vport *ovs_tnl_create(const struct vport_parms *, const struct vport_ops *,
- const struct tnl_ops *);
-void ovs_tnl_destroy(struct vport *);
-
-const char *ovs_tnl_get_name(const struct vport *vport);
-int ovs_tnl_send(struct vport *vport, struct sk_buff *skb);
-void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb);
+void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb,
+ struct ovs_key_ipv4_tunnel *tun_key);
u16 ovs_tnl_get_src_port(struct sk_buff *skb);
-static inline struct tnl_vport *tnl_vport_priv(const struct vport *vport)
-{
- return vport_priv(vport);
-}
-
static inline void tnl_tun_key_init(struct ovs_key_ipv4_tunnel *tun_key,
const struct iphdr *iph, __be64 tun_id, u32 tun_flags)
{
iph = ip_hdr(skb);
tnl_flags = gre_flags_to_tunnel_flags(gre_flags, is_gre64);
tnl_tun_key_init(&tun_key, iph, key, tnl_flags);
- OVS_CB(skb)->tun_key = &tun_key;
__skb_pull(skb, hdr_len);
skb_postpull_rcsum(skb, skb_transport_header(skb), hdr_len + ETH_HLEN);
- ovs_tnl_rcv(vport, skb);
+ ovs_tnl_rcv(vport, skb, &tun_key);
return 0;
error:
#endif
};
-static bool inited;
-
+static int gre_ports;
static int gre_init(void)
{
int err;
- if (inited)
+ gre_ports++;
+ if (gre_ports > 1)
return 0;
- inited = true;
err = inet_add_protocol(&gre_protocol_handlers, IPPROTO_GRE);
if (err)
pr_warn("cannot register gre protocol handler\n");
static void gre_exit(void)
{
- if (!inited)
+ gre_ports--;
+ if (gre_ports > 0)
return;
- inited = false;
-
inet_del_protocol(&gre_protocol_handlers, IPPROTO_GRE);
}
-/* GRE vport. */
-static const struct tnl_ops gre_tnl_ops = {
- .ipproto = IPPROTO_GRE,
- .hdr_len = gre_hdr_len,
- .build_header = gre_build_header,
-};
+static const char *gre_get_name(const struct vport *vport)
+{
+ return vport_priv(vport);
+}
static struct vport *gre_create(const struct vport_parms *parms)
{
struct net *net = ovs_dp_get_net(parms->dp);
struct ovs_net *ovs_net;
struct vport *vport;
+ int err;
+
+ err = gre_init();
+ if (err)
+ return ERR_PTR(err);
ovs_net = net_generic(net, ovs_net_id);
- if (ovsl_dereference(ovs_net->vport_net.gre_vport))
- return ERR_PTR(-EEXIST);
+ if (ovsl_dereference(ovs_net->vport_net.gre_vport)) {
+ vport = ERR_PTR(-EEXIST);
+ goto error;
+ }
- vport = ovs_tnl_create(parms, &ovs_gre_vport_ops, &gre_tnl_ops);
+ vport = ovs_vport_alloc(IFNAMSIZ, &ovs_gre_vport_ops, parms);
+ if (IS_ERR(vport))
+ goto error;
+ strncpy(vport_priv(vport), parms->name, IFNAMSIZ);
rcu_assign_pointer(ovs_net->vport_net.gre_vport, vport);
return vport;
+
+error:
+ gre_exit();
+ return vport;
}
static void gre_tnl_destroy(struct vport *vport)
ovs_net = net_generic(net, ovs_net_id);
rcu_assign_pointer(ovs_net->vport_net.gre_vport, NULL);
- ovs_tnl_destroy(vport);
+ ovs_vport_deferred_free(vport);
+ gre_exit();
+}
+
+static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
+{
+ int hlen;
+
+ if (unlikely(!OVS_CB(skb)->tun_key))
+ return -EINVAL;
+
+ hlen = gre_hdr_len(OVS_CB(skb)->tun_key);
+ return ovs_tnl_send(vport, skb, IPPROTO_GRE, hlen, gre_build_header);
}
const struct vport_ops ovs_gre_vport_ops = {
.type = OVS_VPORT_TYPE_GRE,
- .flags = VPORT_F_TUN_ID,
- .init = gre_init,
- .exit = gre_exit,
.create = gre_create,
.destroy = gre_tnl_destroy,
- .get_name = ovs_tnl_get_name,
- .send = ovs_tnl_send,
+ .get_name = gre_get_name,
+ .send = gre_tnl_send,
};
/* GRE64 vport. */
-static const struct tnl_ops gre64_tnl_ops = {
- .ipproto = IPPROTO_GRE,
- .hdr_len = gre64_hdr_len,
- .build_header = gre64_build_header,
-};
-
static struct vport *gre64_create(const struct vport_parms *parms)
{
struct net *net = ovs_dp_get_net(parms->dp);
struct ovs_net *ovs_net;
struct vport *vport;
+ int err;
+
+ err = gre_init();
+ if (err)
+ return ERR_PTR(err);
ovs_net = net_generic(net, ovs_net_id);
- if (ovsl_dereference(ovs_net->vport_net.gre64_vport))
- return ERR_PTR(-EEXIST);
+ if (ovsl_dereference(ovs_net->vport_net.gre64_vport)) {
+ vport = ERR_PTR(-EEXIST);
+ goto error;
+ }
- vport = ovs_tnl_create(parms, &ovs_gre64_vport_ops, &gre64_tnl_ops);
+ vport = ovs_vport_alloc(IFNAMSIZ, &ovs_gre64_vport_ops, parms);
+ if (IS_ERR(vport))
+ goto error;
+ strncpy(vport_priv(vport), parms->name, IFNAMSIZ);
rcu_assign_pointer(ovs_net->vport_net.gre64_vport, vport);
return vport;
+error:
+ gre_exit();
+ return vport;
}
-
static void gre64_tnl_destroy(struct vport *vport)
{
struct net *net = ovs_dp_get_net(vport->dp);
ovs_net = net_generic(net, ovs_net_id);
rcu_assign_pointer(ovs_net->vport_net.gre64_vport, NULL);
- ovs_tnl_destroy(vport);
+ ovs_vport_deferred_free(vport);
+ gre_exit();
+}
+
+static int gre64_tnl_send(struct vport *vport, struct sk_buff *skb)
+{
+ int hlen;
+
+ if (unlikely(!OVS_CB(skb)->tun_key))
+ return -EINVAL;
+
+ hlen = gre64_hdr_len(OVS_CB(skb)->tun_key);
+ return ovs_tnl_send(vport, skb, IPPROTO_GRE, hlen, gre64_build_header);
}
const struct vport_ops ovs_gre64_vport_ops = {
.type = OVS_VPORT_TYPE_GRE64,
- .flags = VPORT_F_TUN_ID,
- .init = gre_init,
- .exit = gre_exit,
.create = gre64_create,
.destroy = gre64_tnl_destroy,
- .get_name = ovs_tnl_get_name,
- .send = ovs_tnl_send,
+ .get_name = gre_get_name,
+ .send = gre64_tnl_send,
};
vlan_copy_skb_tci(skb);
rcu_read_lock();
- ovs_vport_receive(internal_dev_priv(netdev)->vport, skb);
+ ovs_vport_receive(internal_dev_priv(netdev)->vport, skb, NULL);
rcu_read_unlock();
return 0;
}
const struct vport_ops ovs_internal_vport_ops = {
.type = OVS_VPORT_TYPE_INTERNAL,
- .flags = VPORT_F_REQUIRED,
.create = internal_dev_create,
.destroy = internal_dev_destroy,
.get_name = ovs_netdev_get_name,
#define LISP_HLEN (sizeof(struct udphdr) + sizeof(struct lisphdr))
-static inline int lisp_hdr_len(const struct ovs_key_ipv4_tunnel *tun_key)
-{
- return LISP_HLEN;
-}
-
/**
* struct lisp_port - Keeps track of open UDP ports
- * @list: list element.
- * @vport: vport for the tunnel.
- * @socket: The socket created for this port number.
+ * @dst_port: lisp UDP port no.
+ * @list: list element in @lisp_ports.
+ * @lisp_rcv_socket: The socket created for this port number.
+ * @name: vport name.
*/
struct lisp_port {
+ __be16 dst_port;
struct list_head list;
- struct vport *vport;
struct socket *lisp_rcv_socket;
- struct rcu_head rcu;
+ char name[IFNAMSIZ];
};
static LIST_HEAD(lisp_ports);
+static inline struct lisp_port *lisp_vport(const struct vport *vport)
+{
+ return vport_priv(vport);
+}
+
static struct lisp_port *lisp_find_port(struct net *net, __be16 port)
{
struct lisp_port *lisp_port;
list_for_each_entry_rcu(lisp_port, &lisp_ports, list) {
- struct tnl_vport *tnl_vport = tnl_vport_priv(lisp_port->vport);
-
- if (tnl_vport->dst_port == port &&
+ if (lisp_port->dst_port == port &&
net_eq(sock_net(lisp_port->lisp_rcv_socket->sk), net))
return lisp_port;
}
return (struct lisphdr *)(udp_hdr(skb) + 1);
}
-static int lisp_tnl_send(struct vport *vport, struct sk_buff *skb)
-{
- int tnl_len;
- int network_offset = skb_network_offset(skb);
-
- /* We only encapsulate IPv4 and IPv6 packets */
- switch (skb->protocol) {
- case htons(ETH_P_IP):
- case htons(ETH_P_IPV6):
- /* Pop off "inner" Ethernet header */
- skb_pull(skb, network_offset);
- tnl_len = ovs_tnl_send(vport, skb);
- return tnl_len > 0 ? tnl_len + network_offset : tnl_len;
- default:
- kfree_skb(skb);
- return 0;
- }
-}
-
/* Convert 64 bit tunnel ID to 24 bit Instance ID. */
static void tunnel_id_to_instance_id(__be64 tun_id, __u8 *iid)
{
struct sk_buff *skb,
int tunnel_hlen)
{
- struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
+ struct lisp_port *lisp_port = lisp_vport(vport);
struct udphdr *udph = udp_hdr(skb);
struct lisphdr *lisph = (struct lisphdr *)(udph + 1);
const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key;
- udph->dest = tnl_vport->dst_port;
+ udph->dest = lisp_port->dst_port;
udph->source = htons(ovs_tnl_get_src_port(skb));
udph->check = 0;
udph->len = htons(skb->len - skb_transport_offset(skb));
/* Save outer tunnel values */
iph = ip_hdr(skb);
tnl_tun_key_init(&tun_key, iph, key, OVS_TNL_F_KEY);
- OVS_CB(skb)->tun_key = &tun_key;
/* Drop non-IP inner packets */
inner_iph = (struct iphdr *)(lisph + 1);
ethh->h_source[0] = 0x02;
ethh->h_proto = protocol;
- ovs_tnl_rcv(lisp_port->vport, skb);
+ ovs_tnl_rcv(vport_from_priv(lisp_port), skb, &tun_key);
goto out;
error:
#define UDP_ENCAP_LISP 1
static int lisp_socket_init(struct lisp_port *lisp_port, struct net *net)
{
- int err;
struct sockaddr_in sin;
- struct tnl_vport *tnl_vport = tnl_vport_priv(lisp_port->vport);
+ int err;
err = sock_create_kern(AF_INET, SOCK_DGRAM, 0,
&lisp_port->lisp_rcv_socket);
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = htonl(INADDR_ANY);
- sin.sin_port = tnl_vport->dst_port;
+ sin.sin_port = lisp_port->dst_port;
err = kernel_bind(lisp_port->lisp_rcv_socket, (struct sockaddr *)&sin,
sizeof(struct sockaddr_in));
return err;
}
-
-static void free_port_rcu(struct rcu_head *rcu)
+static int lisp_get_options(const struct vport *vport, struct sk_buff *skb)
{
- struct lisp_port *lisp_port = container_of(rcu,
- struct lisp_port, rcu);
+ struct lisp_port *lisp_port = lisp_vport(vport);
- kfree(lisp_port);
+ if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(lisp_port->dst_port)))
+ return -EMSGSIZE;
+ return 0;
}
-static void lisp_tunnel_release(struct lisp_port *lisp_port)
+static void lisp_tnl_destroy(struct vport *vport)
{
- if (!lisp_port)
- return;
+ struct lisp_port *lisp_port = lisp_vport(vport);
+
list_del_rcu(&lisp_port->list);
/* Release socket */
sk_release_kernel(lisp_port->lisp_rcv_socket->sk);
- call_rcu(&lisp_port->rcu, free_port_rcu);
+
+ ovs_vport_deferred_free(vport);
}
-static int lisp_tunnel_setup(struct net *net, struct vport *vport,
- struct nlattr *options)
+static struct vport *lisp_tnl_create(const struct vport_parms *parms)
{
- struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
+ struct net *net = ovs_dp_get_net(parms->dp);
+ struct nlattr *options = parms->options;
struct lisp_port *lisp_port;
+ struct vport *vport;
struct nlattr *a;
int err;
u16 dst_port;
if (!options) {
err = -EINVAL;
- goto out;
+ goto error;
}
a = nla_find_nested(options, OVS_TUNNEL_ATTR_DST_PORT);
} else {
/* Require destination port from userspace. */
err = -EINVAL;
- goto out;
+ goto error;
}
/* Verify if we already have a socket created for this port */
- lisp_port = lisp_find_port(net, htons(dst_port));
- if (lisp_port) {
+ if (lisp_find_port(net, htons(dst_port))) {
err = -EEXIST;
- goto out;
+ goto error;
}
- /* Add a new socket for this port */
- lisp_port = kzalloc(sizeof(struct lisp_port), GFP_KERNEL);
- if (!lisp_port) {
- err = -ENOMEM;
- goto out;
- }
+ vport = ovs_vport_alloc(sizeof(struct lisp_port),
+ &ovs_lisp_vport_ops, parms);
+ if (IS_ERR(vport))
+ return vport;
- tnl_vport->dst_port = htons(dst_port);
- lisp_port->vport = vport;
- list_add_tail_rcu(&lisp_port->list, &lisp_ports);
+ lisp_port = lisp_vport(vport);
+ lisp_port->dst_port = htons(dst_port);
+ strncpy(lisp_port->name, parms->name, IFNAMSIZ);
err = lisp_socket_init(lisp_port, net);
if (err)
- goto error;
+ goto error_free;
- return 0;
+ list_add_tail_rcu(&lisp_port->list, &lisp_ports);
+ return vport;
+error_free:
+ ovs_vport_free(vport);
error:
- list_del_rcu(&lisp_port->list);
- kfree(lisp_port);
-out:
- return err;
-}
-
-static int lisp_get_options(const struct vport *vport, struct sk_buff *skb)
-{
- const struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
-
- if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(tnl_vport->dst_port)))
- return -EMSGSIZE;
- return 0;
+ return ERR_PTR(err);
}
-static const struct tnl_ops ovs_lisp_tnl_ops = {
- .ipproto = IPPROTO_UDP,
- .hdr_len = lisp_hdr_len,
- .build_header = lisp_build_header,
-};
-
-static void lisp_tnl_destroy(struct vport *vport)
+static int lisp_tnl_send(struct vport *vport, struct sk_buff *skb)
{
- struct lisp_port *lisp_port;
- struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
+ int tnl_len;
+ int network_offset = skb_network_offset(skb);
- lisp_port = lisp_find_port(ovs_dp_get_net(vport->dp),
- tnl_vport->dst_port);
+ if (unlikely(!OVS_CB(skb)->tun_key))
+ return -EINVAL;
- lisp_tunnel_release(lisp_port);
- ovs_tnl_destroy(vport);
+ /* We only encapsulate IPv4 and IPv6 packets */
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ case htons(ETH_P_IPV6):
+ /* Pop off "inner" Ethernet header */
+ skb_pull(skb, network_offset);
+ tnl_len = ovs_tnl_send(vport, skb, IPPROTO_UDP,
+ LISP_HLEN, lisp_build_header);
+ return tnl_len > 0 ? tnl_len + network_offset : tnl_len;
+ default:
+ kfree_skb(skb);
+ return 0;
+ }
}
-static struct vport *lisp_tnl_create(const struct vport_parms *parms)
+static const char *lisp_get_name(const struct vport *vport)
{
- struct vport *vport;
- int err;
-
- vport = ovs_tnl_create(parms, &ovs_lisp_vport_ops, &ovs_lisp_tnl_ops);
- if (IS_ERR(vport))
- return vport;
-
- err = lisp_tunnel_setup(ovs_dp_get_net(parms->dp), vport,
- parms->options);
- if (err) {
- ovs_tnl_destroy(vport);
- return ERR_PTR(err);
- }
-
- return vport;
+ struct lisp_port *lisp_port = lisp_vport(vport);
+ return lisp_port->name;
}
const struct vport_ops ovs_lisp_vport_ops = {
.type = OVS_VPORT_TYPE_LISP,
- .flags = VPORT_F_TUN_ID,
.create = lisp_tnl_create,
.destroy = lisp_tnl_destroy,
- .get_name = ovs_tnl_get_name,
+ .get_name = lisp_get_name,
.get_options = lisp_get_options,
.send = lisp_tnl_send,
};
static int netdev_init(void) { return 0; }
static void netdev_exit(void) { }
#else
-static int netdev_init(void)
+static int port_count;
+
+static void netdev_init(void)
{
+ port_count++;
+ if (port_count > 1)
+ return;
+
/* Hook into callback used by the bridge to intercept packets.
* Parasites we are. */
br_handle_frame_hook = netdev_frame_hook;
- return 0;
+ return;
}
static void netdev_exit(void)
{
+ port_count--;
+ if (port_count > 0)
+ return;
+
br_handle_frame_hook = NULL;
}
#endif
netdev_vport->dev->priv_flags |= IFF_OVS_DATAPATH;
rtnl_unlock();
+ netdev_init();
return vport;
#ifndef HAVE_RHEL_OVS_HOOK
{
struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
+ netdev_exit();
rtnl_lock();
netdev_vport->dev->priv_flags &= ~IFF_OVS_DATAPATH;
netdev_rx_handler_unregister(netdev_vport->dev);
vlan_copy_skb_tci(skb);
- ovs_vport_receive(vport, skb);
+ ovs_vport_receive(vport, skb, NULL);
return;
error:
net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n",
netdev_vport->dev->name,
packet_length(skb), mtu);
- goto error;
+ goto drop;
}
skb->dev = netdev_vport->dev;
nskb = skb_gso_segment(skb, features);
if (!nskb) {
if (unlikely(skb_cloned(skb) &&
- pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) {
- kfree_skb(skb);
- return 0;
- }
+ pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
+ goto drop;
skb_shinfo(skb)->gso_type &= ~SKB_GSO_DODGY;
goto tag;
}
- if (IS_ERR(nskb)) {
- kfree_skb(skb);
- return 0;
- }
+ if (IS_ERR(nskb))
+ goto drop;
consume_skb(skb);
skb = nskb;
return len;
-error:
+drop:
kfree_skb(skb);
- ovs_vport_record_error(vport, VPORT_E_TX_DROPPED);
return 0;
}
const struct vport_ops ovs_netdev_vport_ops = {
.type = OVS_VPORT_TYPE_NETDEV,
- .flags = VPORT_F_REQUIRED,
- .init = netdev_init,
- .exit = netdev_exit,
.create = netdev_create,
.destroy = netdev_destroy,
.get_name = ovs_netdev_get_name,
}
const char *ovs_netdev_get_name(const struct vport *);
-const char *ovs_netdev_get_config(const struct vport *);
#endif /* vport_netdev.h */
#define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr))
-static inline int vxlan_hdr_len(const struct ovs_key_ipv4_tunnel *tun_key)
-{
- return VXLAN_HLEN;
-}
-
/**
* struct vxlan_port - Keeps track of open UDP ports
- * @list: list element.
- * @vport: vport for the tunnel.
- * @socket: The socket created for this port number.
+ * @dst_port: vxlan UDP port no.
+ * @list: list element in @vxlan_ports.
+ * @vxlan_rcv_socket: The socket created for this port number.
+ * @name: vport name.
*/
struct vxlan_port {
+ __be16 dst_port;
struct list_head list;
- struct vport *vport;
struct socket *vxlan_rcv_socket;
- struct rcu_head rcu;
+ char name[IFNAMSIZ];
};
static LIST_HEAD(vxlan_ports);
+static inline struct vxlan_port *vxlan_vport(const struct vport *vport)
+{
+ return vport_priv(vport);
+}
+
static struct vxlan_port *vxlan_find_port(struct net *net, __be16 port)
{
struct vxlan_port *vxlan_port;
list_for_each_entry_rcu(vxlan_port, &vxlan_ports, list) {
- struct tnl_vport *tnl_vport = tnl_vport_priv(vxlan_port->vport);
- if (tnl_vport->dst_port == port &&
+ if (vxlan_port->dst_port == port &&
net_eq(sock_net(vxlan_port->vxlan_rcv_socket->sk), net))
return vxlan_port;
}
struct sk_buff *skb,
int tunnel_hlen)
{
- struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
+ struct vxlan_port *vxlan_port = vxlan_vport(vport);
struct udphdr *udph = udp_hdr(skb);
struct vxlanhdr *vxh = (struct vxlanhdr *)(udph + 1);
const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key;
- udph->dest = tnl_vport->dst_port;
+ udph->dest = vxlan_port->dst_port;
udph->source = htons(ovs_tnl_get_src_port(skb));
udph->check = 0;
udph->len = htons(skb->len - skb_transport_offset(skb));
/* Save outer tunnel values */
iph = ip_hdr(skb);
tnl_tun_key_init(&tun_key, iph, key, OVS_TNL_F_KEY);
- OVS_CB(skb)->tun_key = &tun_key;
- ovs_tnl_rcv(vxlan_vport->vport, skb);
+ ovs_tnl_rcv(vport_from_priv(vxlan_vport), skb, &tun_key);
goto out;
error:
#define UDP_ENCAP_VXLAN 1
static int vxlan_socket_init(struct vxlan_port *vxlan_port, struct net *net)
{
- int err;
struct sockaddr_in sin;
- struct tnl_vport *tnl_vport = tnl_vport_priv(vxlan_port->vport);
+ int err;
err = sock_create_kern(AF_INET, SOCK_DGRAM, 0,
&vxlan_port->vxlan_rcv_socket);
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = htonl(INADDR_ANY);
- sin.sin_port = tnl_vport->dst_port;
+ sin.sin_port = vxlan_port->dst_port;
err = kernel_bind(vxlan_port->vxlan_rcv_socket, (struct sockaddr *)&sin,
sizeof(struct sockaddr_in));
return err;
}
-static void free_port_rcu(struct rcu_head *rcu)
+static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb)
{
- struct vxlan_port *vxlan_port = container_of(rcu,
- struct vxlan_port, rcu);
+ struct vxlan_port *vxlan_port = vxlan_vport(vport);
- kfree(vxlan_port);
+ if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(vxlan_port->dst_port)))
+ return -EMSGSIZE;
+ return 0;
}
-static void vxlan_tunnel_release(struct vxlan_port *vxlan_port)
+static void vxlan_tnl_destroy(struct vport *vport)
{
- if (!vxlan_port)
- return;
+ struct vxlan_port *vxlan_port = vxlan_vport(vport);
list_del_rcu(&vxlan_port->list);
/* Release socket */
sk_release_kernel(vxlan_port->vxlan_rcv_socket->sk);
- call_rcu(&vxlan_port->rcu, free_port_rcu);
+
+ ovs_vport_deferred_free(vport);
}
-static int vxlan_tunnel_setup(struct net *net, struct vport *vport,
- struct nlattr *options)
+static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
{
+ struct net *net = ovs_dp_get_net(parms->dp);
+ struct nlattr *options = parms->options;
struct vxlan_port *vxlan_port;
- struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
+ struct vport *vport;
struct nlattr *a;
int err;
u16 dst_port;
if (!options) {
err = -EINVAL;
- goto out;
+ goto error;
}
-
a = nla_find_nested(options, OVS_TUNNEL_ATTR_DST_PORT);
if (a && nla_len(a) == sizeof(u16)) {
dst_port = nla_get_u16(a);
} else {
/* Require destination port from userspace. */
err = -EINVAL;
- goto out;
+ goto error;
}
/* Verify if we already have a socket created for this port */
- vxlan_port = vxlan_find_port(net, htons(dst_port));
- if (vxlan_port) {
+ if (vxlan_find_port(net, htons(dst_port))) {
err = -EEXIST;
- goto out;
+ goto error;
}
- /* Add a new socket for this port */
- vxlan_port = kzalloc(sizeof(struct vxlan_port), GFP_KERNEL);
- if (!vxlan_port) {
- err = -ENOMEM;
- goto out;
- }
+ vport = ovs_vport_alloc(sizeof(struct vxlan_port),
+ &ovs_vxlan_vport_ops, parms);
+ if (IS_ERR(vport))
+ return vport;
- tnl_vport->dst_port = htons(dst_port);
- vxlan_port->vport = vport;
- list_add_tail_rcu(&vxlan_port->list, &vxlan_ports);
+ vxlan_port = vxlan_vport(vport);
+ vxlan_port->dst_port = htons(dst_port);
+ strncpy(vxlan_port->name, parms->name, IFNAMSIZ);
err = vxlan_socket_init(vxlan_port, net);
if (err)
- goto error;
+ goto error_free;
- return 0;
+ list_add_tail_rcu(&vxlan_port->list, &vxlan_ports);
+ return vport;
+error_free:
+ ovs_vport_free(vport);
error:
- list_del_rcu(&vxlan_port->list);
- kfree(vxlan_port);
-out:
- return err;
+ return ERR_PTR(err);
}
-static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb)
-{
- const struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
-
- if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(tnl_vport->dst_port)))
- return -EMSGSIZE;
- return 0;
-}
-
-static const struct tnl_ops ovs_vxlan_tnl_ops = {
- .ipproto = IPPROTO_UDP,
- .hdr_len = vxlan_hdr_len,
- .build_header = vxlan_build_header,
-};
-
-static void vxlan_tnl_destroy(struct vport *vport)
+static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
{
- struct vxlan_port *vxlan_port;
- struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
+ if (unlikely(!OVS_CB(skb)->tun_key))
+ return -EINVAL;
- vxlan_port = vxlan_find_port(ovs_dp_get_net(vport->dp),
- tnl_vport->dst_port);
-
- vxlan_tunnel_release(vxlan_port);
- ovs_tnl_destroy(vport);
+ return ovs_tnl_send(vport, skb, IPPROTO_UDP,
+ VXLAN_HLEN, vxlan_build_header);
}
-static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
+static const char *vxlan_get_name(const struct vport *vport)
{
- int err;
- struct vport *vport;
-
- vport = ovs_tnl_create(parms, &ovs_vxlan_vport_ops, &ovs_vxlan_tnl_ops);
- if (IS_ERR(vport))
- return vport;
-
- err = vxlan_tunnel_setup(ovs_dp_get_net(parms->dp), vport,
- parms->options);
- if (err) {
- ovs_tnl_destroy(vport);
- return ERR_PTR(err);
- }
-
- return vport;
+ struct vxlan_port *vxlan_port = vxlan_vport(vport);
+ return vxlan_port->name;
}
const struct vport_ops ovs_vxlan_vport_ops = {
.type = OVS_VPORT_TYPE_VXLAN,
- .flags = VPORT_F_TUN_ID,
.create = vxlan_tnl_create,
.destroy = vxlan_tnl_destroy,
- .get_name = ovs_tnl_get_name,
+ .get_name = vxlan_get_name,
.get_options = vxlan_get_options,
- .send = ovs_tnl_send,
+ .send = vxlan_tnl_send,
};
#else
#warning VXLAN tunneling will not be available on kernels before 2.6.26
/* List of statically compiled vport implementations. Don't forget to also
* add yours to the list at the bottom of vport.h. */
-static const struct vport_ops *base_vport_ops_list[] = {
+static const struct vport_ops *vport_ops_list[] = {
&ovs_netdev_vport_ops,
&ovs_internal_vport_ops,
&ovs_gre_vport_ops,
#endif
};
-static const struct vport_ops **vport_ops_list;
-static int n_vport_types;
-
/* Protected by RCU read lock for reading, ovs_mutex for writing. */
static struct hlist_head *dev_table;
#define VPORT_HASH_BUCKETS 1024
/**
* ovs_vport_init - initialize vport subsystem
*
- * Called at module load time to initialize the vport subsystem and any
- * compiled in vport types.
+ * Called at module load time to initialize the vport subsystem.
*/
int ovs_vport_init(void)
{
- int err;
- int i;
-
dev_table = kzalloc(VPORT_HASH_BUCKETS * sizeof(struct hlist_head),
GFP_KERNEL);
- if (!dev_table) {
- err = -ENOMEM;
- goto error;
- }
-
- vport_ops_list = kmalloc(ARRAY_SIZE(base_vport_ops_list) *
- sizeof(struct vport_ops *), GFP_KERNEL);
- if (!vport_ops_list) {
- err = -ENOMEM;
- goto error_dev_table;
- }
-
- for (i = 0; i < ARRAY_SIZE(base_vport_ops_list); i++) {
- const struct vport_ops *new_ops = base_vport_ops_list[i];
-
- if (new_ops->init)
- err = new_ops->init();
- else
- err = 0;
-
- if (!err)
- vport_ops_list[n_vport_types++] = new_ops;
- else if (new_ops->flags & VPORT_F_REQUIRED) {
- ovs_vport_exit();
- goto error;
- }
- }
+ if (!dev_table)
+ return -ENOMEM;
return 0;
-
-error_dev_table:
- kfree(dev_table);
-error:
- return err;
}
/**
* ovs_vport_exit - shutdown vport subsystem
*
- * Called at module exit time to shutdown the vport subsystem and any
- * initialized vport types.
+ * Called at module exit time to shutdown the vport subsystem.
*/
void ovs_vport_exit(void)
{
- int i;
-
- for (i = 0; i < n_vport_types; i++) {
- if (vport_ops_list[i]->exit)
- vport_ops_list[i]->exit();
- }
-
- kfree(vport_ops_list);
kfree(dev_table);
}
int err = 0;
int i;
- for (i = 0; i < n_vport_types; i++) {
+ for (i = 0; i < ARRAY_SIZE(vport_ops_list); i++) {
if (vport_ops_list[i]->type == parms->type) {
struct hlist_head *bucket;
* skb->data should point to the Ethernet header. The caller must have already
* called compute_ip_summed() to initialize the checksumming fields.
*/
-void ovs_vport_receive(struct vport *vport, struct sk_buff *skb)
+void ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
+ struct ovs_key_ipv4_tunnel *tun_key)
{
struct pcpu_tstats *stats;
stats->rx_bytes += skb->len;
u64_stats_update_end(&stats->syncp);
- if (!(vport->ops->flags & VPORT_F_TUN_ID))
- OVS_CB(skb)->tun_key = NULL;
-
+ OVS_CB(skb)->tun_key = tun_key;
ovs_dp_process_received_packet(vport, skb);
}
stats->tx_packets++;
stats->tx_bytes += sent;
u64_stats_update_end(&stats->syncp);
- }
+ } else if (sent < 0) {
+ ovs_vport_record_error(vport, VPORT_E_TX_ERROR);
+ kfree_skb(skb);
+ } else
+ ovs_vport_record_error(vport, VPORT_E_TX_DROPPED);
+
return sent;
}
spin_unlock(&vport->stats_lock);
}
+
+static void free_vport_rcu(struct rcu_head *rcu)
+{
+ struct vport *vport = container_of(rcu, struct vport, rcu);
+
+ ovs_vport_free(vport);
+}
+
+void ovs_vport_deferred_free(struct vport *vport)
+{
+ if (!vport)
+ return;
+
+ call_rcu(&vport->rcu, free_vport_rcu);
+}
struct ovs_vport_stats offset_stats;
};
-#define VPORT_F_REQUIRED (1 << 0) /* If init fails, module loading fails. */
-#define VPORT_F_TUN_ID (1 << 1) /* Sets OVS_CB(skb)->tun_id. */
-
/**
* struct vport_parms - parameters for creating a new vport
*
* struct vport_ops - definition of a type of virtual port
*
* @type: %OVS_VPORT_TYPE_* value for this type of virtual port.
- * @flags: Flags of type VPORT_F_* that influence how the generic vport layer
- * handles this vport.
- * @init: Called at module initialization. If VPORT_F_REQUIRED is set then the
- * failure of this function will cause the module to not load. If the flag is
- * not set and initialzation fails then no vports of this type can be created.
- * @exit: Called at module unload.
* @create: Create a new vport configured as specified. On success returns
* a new vport allocated with ovs_vport_alloc(), otherwise an ERR_PTR() value.
* @destroy: Destroys a vport. Must call vport_free() on the vport but not
* existing vport to a &struct sk_buff. May be %NULL for a vport that does not
* have any configuration.
* @get_name: Get the device's name.
- * @get_config: Get the device's configuration.
- * May be null if the device does not have an ifindex.
- * @send: Send a packet on the device. Returns the length of the packet sent.
+ * @send: Send a packet on the device. Returns the length of the packet sent,
+ * zero for dropped packets or negative for error.
*/
struct vport_ops {
enum ovs_vport_type type;
- u32 flags;
-
- /* Called at module init and exit respectively. */
- int (*init)(void);
- void (*exit)(void);
/* Called with ovs_mutex. */
struct vport *(*create)(const struct vport_parms *);
/* Called with rcu_read_lock or ovs_mutex. */
const char *(*get_name)(const struct vport *);
- void (*get_config)(const struct vport *, void *);
int (*send)(struct vport *, struct sk_buff *);
};
struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *,
const struct vport_parms *);
void ovs_vport_free(struct vport *);
+void ovs_vport_deferred_free(struct vport *vport);
#define VPORT_ALIGN 8
return (struct vport *)(priv - ALIGN(sizeof(struct vport), VPORT_ALIGN));
}
-void ovs_vport_receive(struct vport *, struct sk_buff *);
+void ovs_vport_receive(struct vport *, struct sk_buff *,
+ struct ovs_key_ipv4_tunnel *);
void ovs_vport_record_error(struct vport *, enum vport_err_type err_type);
/* List of statically compiled vport implementations. Don't forget to also
* - NXM_NX_ND_SLL
* - NXM_NX_ND_TLL
* - NXM_NX_REG(idx) for idx in the switch's accepted range.
+ * - NXM_NX_TUN_IPV4_SRC
+ * - NXM_NX_TUN_IPV4_DST
*
* The following nxm_header values are potentially acceptable as 'dst':
*
* Modifying any of the above fields changes the corresponding packet
* header.
*
+ * - NXM_OF_IN_PORT
+ *
* - NXM_NX_REG(idx) for idx in the switch's accepted range.
*
* - NXM_OF_VLAN_TCI. Modifying this field's value has side effects on the
* adds or modifies the 802.1Q header appropriately, setting the TCI field
* to the field's new value (with the CFI bit masked out).
*
- * - NXM_NX_TUN_ID. Modifying this value modifies the tunnel ID used for the
- * packet's next tunnel encapsulation.
+ * - NXM_NX_TUN_ID, NXM_NX_TUN_IPV4_SRC, NXM_NX_TUN_IPV4_DST. Modifying
+ * any of these values modifies the corresponding tunnel header field used
+ * for the packet's next tunnel encapsulation, if allowed by the
+ * configuration of the output tunnel port.
*
* A given nxm_header value may be used as 'src' or 'dst' only on a flow whose
* nx_match satisfies its prerequisites. For example, NXM_OF_IP_TOS may be
#define NXM_NX_COOKIE NXM_HEADER (0x0001, 30, 8)
#define NXM_NX_COOKIE_W NXM_HEADER_W(0x0001, 30, 8)
+/* The source or destination address in the outer IP header of a tunneled
+ * packet.
+ *
+ * For non-tunneled packets, the value is 0.
+ *
+ * Prereqs: None.
+ *
+ * Format: 32-bit integer in network byte order.
+ *
+ * Masking: Fully maskable. */
+#define NXM_NX_TUN_IPV4_SRC NXM_HEADER (0x0001, 31, 4)
+#define NXM_NX_TUN_IPV4_SRC_W NXM_HEADER_W(0x0001, 31, 4)
+#define NXM_NX_TUN_IPV4_DST NXM_HEADER (0x0001, 32, 4)
+#define NXM_NX_TUN_IPV4_DST_W NXM_HEADER_W(0x0001, 32, 4)
+
/* ## --------------------- ## */
/* ## Requests and replies. ## */
/* ## --------------------- ## */
lib/aes128.h \
lib/backtrace.c \
lib/backtrace.h \
+ lib/bfd.c \
+ lib/bfd.h \
lib/bitmap.c \
lib/bitmap.h \
lib/bond.c \
--- /dev/null
+/* Copyright (c) 2013 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License. */
+
+#include <config.h>
+#include "bfd.h"
+
+#include <arpa/inet.h>
+
+#include "csum.h"
+#include "dpif.h"
+#include "dynamic-string.h"
+#include "flow.h"
+#include "hash.h"
+#include "hmap.h"
+#include "list.h"
+#include "netlink.h"
+#include "odp-util.h"
+#include "ofpbuf.h"
+#include "openvswitch/types.h"
+#include "packets.h"
+#include "poll-loop.h"
+#include "random.h"
+#include "smap.h"
+#include "timeval.h"
+#include "unixctl.h"
+#include "util.h"
+#include "vlog.h"
+
+VLOG_DEFINE_THIS_MODULE(bfd);
+
+/* XXX Finish BFD.
+ *
+ * The goal of this module is to replace CFM with something both more flexible
+ * and standards compliant. In service of this goal, the following needs to be
+ * done.
+ *
+ * - Compliance
+ * * Implement Demand mode.
+ * * Go through the RFC line by line and verify we comply.
+ * * Test against a hardware implementation. Preferably a popular one.
+ * * Delete BFD packets with nw_ttl != 255 in the datapath to prevent DOS
+ * attacks.
+ *
+ * - Unit tests.
+ *
+ * - BFD show into ovs-bugtool.
+ *
+ * - Set TOS/PCP on inner BFD frame, and outer tunnel header when encapped.
+ *
+ * - CFM "check_tnl_key" option equivalent.
+ *
+ * - CFM "fault override" equivalent.
+ *
+ * - Sending BFD messages should be in its own thread/process.
+ *
+ * - Scale testing. How does it operate when there are large number of bfd
+ * sessions? Do we ever have random flaps? What's the CPU utilization?
+ *
+ * - Rely on data traffic for liveness by using BFD demand mode.
+ * If we're receiving traffic on a port, we can safely assume it's up (modulo
+ * unidrectional failures). BFD has a demand mode in which it can stay quiet
+ * unless it feels the need to check the status of the port. Using this, we
+ * can implement a strategy in which BFD only sends control messages on dark
+ * interfaces.
+ *
+ * - Depending on how one interprets the spec, it appears that a BFD session
+ * can never change bfd.LocalDiag to "No Diagnostic". We should verify that
+ * this is what hardware implementations actually do. Seems like "No
+ * Diagnostic" should be set once a BFD session state goes UP. */
+
+#define BFD_VERSION 1
+
+enum flags {
+ FLAG_MULTIPOINT = 1 << 0,
+ FLAG_DEMAND = 1 << 1,
+ FLAG_AUTH = 1 << 2,
+ FLAG_CTL = 1 << 3,
+ FLAG_FINAL = 1 << 4,
+ FLAG_POLL = 1 << 5
+};
+
+enum state {
+ STATE_ADMIN_DOWN = 0 << 6,
+ STATE_DOWN = 1 << 6,
+ STATE_INIT = 2 << 6,
+ STATE_UP = 3 << 6
+};
+
+enum diag {
+ DIAG_NONE = 0, /* No Diagnostic. */
+ DIAG_EXPIRED = 1, /* Control Detection Time Expired. */
+ DIAG_ECHO_FAILED = 2, /* Echo Function Failed. */
+ DIAG_RMT_DOWN = 3, /* Neighbor Signaled Session Down. */
+ DIAG_FWD_RESET = 4, /* Forwarding Plane Reset. */
+ DIAG_PATH_DOWN = 5, /* Path Down. */
+ DIAG_CPATH_DOWN = 6, /* Concatenated Path Down. */
+ DIAG_ADMIN_DOWN = 7, /* Administratively Down. */
+ DIAG_RCPATH_DOWN = 8 /* Reverse Concatenated Path Down. */
+};
+
+/* RFC 5880 Section 4.1
+ * 0 1 2 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |Vers | Diag |Sta|P|F|C|A|D|M| Detect Mult | Length |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | My Discriminator |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Your Discriminator |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Desired Min TX Interval |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Required Min RX Interval |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Required Min Echo RX Interval |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ */
+struct msg {
+ uint8_t vers_diag; /* Version and diagnostic. */
+ uint8_t flags; /* 2bit State field followed by flags. */
+ uint8_t mult; /* Fault detection multiplier. */
+ uint8_t length; /* Length of this BFD message. */
+ ovs_be32 my_disc; /* My discriminator. */
+ ovs_be32 your_disc; /* Your discriminator. */
+ ovs_be32 min_tx; /* Desired minimum tx interval. */
+ ovs_be32 min_rx; /* Required minimum rx interval. */
+ ovs_be32 min_rx_echo; /* Required minimum echo rx interval. */
+};
+BUILD_ASSERT_DECL(BFD_PACKET_LEN == sizeof(struct msg));
+
+#define DIAG_MASK 0x1f
+#define VERS_SHIFT 5
+#define STATE_MASK 0xC0
+#define FLAGS_MASK 0x3f
+
+struct bfd {
+ struct hmap_node node; /* In 'all_bfds'. */
+ uint32_t disc; /* bfd.LocalDiscr. Key in 'all_bfds' hmap. */
+
+ char *name; /* Name used for logging. */
+
+ bool cpath_down; /* Concatenated Path Down. */
+ uint8_t mult; /* bfd.DetectMult. */
+
+ enum state state; /* bfd.SessionState. */
+ enum state rmt_state; /* bfd.RemoteSessionState. */
+
+ enum diag diag; /* bfd.LocalDiag. */
+ enum diag rmt_diag; /* Remote diagnostic. */
+
+ enum flags flags; /* Flags sent on messages. */
+ enum flags rmt_flags; /* Flags last received. */
+
+ uint32_t rmt_disc; /* bfd.RemoteDiscr. */
+
+ uint16_t udp_src; /* UDP source port. */
+
+ /* All timers in milliseconds. */
+ long long int rmt_min_rx; /* bfd.RemoteMinRxInterval. */
+ long long int rmt_min_tx; /* Remote minimum TX interval. */
+
+ long long int cfg_min_tx; /* Configured minimum TX rate. */
+ long long int cfg_min_rx; /* Configured required minimum RX rate. */
+ long long int poll_min_tx; /* Min TX negotating in a poll sequence. */
+ long long int poll_min_rx; /* Min RX negotating in a poll sequence. */
+ long long int min_tx; /* bfd.DesiredMinTxInterval. */
+ long long int min_rx; /* bfd.RequiredMinRxInterval. */
+
+ long long int last_tx; /* Last TX time. */
+ long long int next_tx; /* Next TX time. */
+ long long int detect_time; /* RFC 5880 6.8.4 Detection time. */
+};
+
+static bool bfd_in_poll(const struct bfd *);
+static void bfd_poll(struct bfd *bfd);
+static const char *bfd_diag_str(enum diag);
+static const char *bfd_state_str(enum state);
+static long long int bfd_min_tx(const struct bfd *);
+static long long int bfd_tx_interval(const struct bfd *);
+static long long int bfd_rx_interval(const struct bfd *);
+static void bfd_set_next_tx(struct bfd *);
+static void bfd_set_state(struct bfd *, enum state, enum diag);
+static uint32_t generate_discriminator(void);
+static void bfd_put_details(struct ds *, const struct bfd *);
+static void bfd_unixctl_show(struct unixctl_conn *, int argc,
+ const char *argv[], void *aux OVS_UNUSED);
+static void log_msg(enum vlog_level, const struct msg *, const char *message,
+ const struct bfd *);
+
+static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(20, 20);
+static struct hmap all_bfds = HMAP_INITIALIZER(&all_bfds);
+
+/* Returns true if the interface on which 'bfd' is running may be used to
+ * forward traffic according to the BFD session state. */
+bool
+bfd_forwarding(const struct bfd *bfd)
+{
+ return bfd->state == STATE_UP
+ && bfd->rmt_diag != DIAG_PATH_DOWN
+ && bfd->rmt_diag != DIAG_CPATH_DOWN
+ && bfd->rmt_diag != DIAG_RCPATH_DOWN;
+}
+
+/* Returns a 'smap' of key value pairs representing the status of 'bfd'
+ * intended for the OVS database. */
+void
+bfd_get_status(const struct bfd *bfd, struct smap *smap)
+{
+ smap_add(smap, "forwarding", bfd_forwarding(bfd) ? "true" : "false");
+ smap_add(smap, "state", bfd_state_str(bfd->state));
+ smap_add(smap, "diagnostic", bfd_diag_str(bfd->diag));
+
+ if (bfd->state != STATE_DOWN) {
+ smap_add(smap, "remote_state", bfd_state_str(bfd->rmt_state));
+ smap_add(smap, "remote_diagnostic", bfd_diag_str(bfd->rmt_diag));
+ }
+}
+
+/* Initializes, destroys, or reconfigures the BFD session 'bfd' (named 'name'),
+ * according to the database configuration contained in 'cfg'. Takes ownership
+ * of 'bfd', which may be NULL. Returns a BFD object which may be used as a
+ * handle for the session, or NULL if BFD is not enabled according to 'cfg'. */
+struct bfd *
+bfd_configure(struct bfd *bfd, const char *name,
+ const struct smap *cfg)
+{
+ static uint16_t udp_src = 0;
+ static bool init = false;
+
+ long long int min_tx, min_rx;
+ bool cpath_down;
+
+ if (!init) {
+ unixctl_command_register("bfd/show", "[interface]", 0, 1,
+ bfd_unixctl_show, NULL);
+ init = true;
+ }
+
+ if (!smap_get_bool(cfg, "enable", false)) {
+ if (bfd) {
+ hmap_remove(&all_bfds, &bfd->node);
+ free(bfd->name);
+ free(bfd);
+ }
+ return NULL;
+ }
+
+ if (!bfd) {
+ bfd = xzalloc(sizeof *bfd);
+ bfd->name = xstrdup(name);
+ bfd->disc = generate_discriminator();
+ hmap_insert(&all_bfds, &bfd->node, bfd->disc);
+
+ bfd->diag = DIAG_NONE;
+ bfd->min_tx = 1000;
+ bfd->mult = 3;
+
+ /* RFC 5881 section 4
+ * The source port MUST be in the range 49152 through 65535. The same
+ * UDP source port number MUST be used for all BFD Control packets
+ * associated with a particular session. The source port number SHOULD
+ * be unique among all BFD sessions on the system. */
+ bfd->udp_src = (udp_src++ % 16384) + 49152;
+
+ bfd_set_state(bfd, STATE_DOWN, DIAG_NONE);
+ }
+
+ min_tx = smap_get_int(cfg, "min_tx", 100);
+ min_tx = MAX(min_tx, 100);
+ if (bfd->cfg_min_tx != min_tx) {
+ bfd->cfg_min_tx = min_tx;
+ if (bfd->state != STATE_UP
+ || (!bfd_in_poll(bfd) && bfd->cfg_min_tx < bfd->min_tx)) {
+ bfd->min_tx = bfd->cfg_min_tx;
+ }
+ bfd_poll(bfd);
+ }
+
+ min_rx = smap_get_int(cfg, "min_rx", 1000);
+ min_rx = MAX(min_rx, 100);
+ if (bfd->cfg_min_rx != min_rx) {
+ bfd->cfg_min_rx = min_rx;
+ if (bfd->state != STATE_UP
+ || (!bfd_in_poll(bfd) && bfd->cfg_min_rx > bfd->min_rx)) {
+ bfd->min_rx = bfd->cfg_min_rx;
+ }
+ bfd_poll(bfd);
+ }
+
+ cpath_down = smap_get_bool(cfg, "cpath_down", false);
+ if (bfd->cpath_down != cpath_down) {
+ bfd->cpath_down = cpath_down;
+ if (bfd->diag == DIAG_NONE || bfd->diag == DIAG_CPATH_DOWN) {
+ bfd_set_state(bfd, bfd->state, DIAG_NONE);
+ }
+ bfd_poll(bfd);
+ }
+ return bfd;
+}
+
+void
+bfd_wait(const struct bfd *bfd)
+{
+ if (bfd->flags & FLAG_FINAL) {
+ poll_immediate_wake();
+ }
+
+ poll_timer_wait_until(bfd->next_tx);
+ if (bfd->state > STATE_DOWN) {
+ poll_timer_wait_until(bfd->detect_time);
+ }
+}
+
+void
+bfd_run(struct bfd *bfd)
+{
+ if (bfd->state > STATE_DOWN && time_msec() >= bfd->detect_time) {
+ bfd_set_state(bfd, STATE_DOWN, DIAG_EXPIRED);
+ }
+
+ if (bfd->min_tx != bfd->cfg_min_tx || bfd->min_rx != bfd->cfg_min_rx) {
+ bfd_poll(bfd);
+ }
+}
+
+bool
+bfd_should_send_packet(const struct bfd *bfd)
+{
+ return bfd->flags & FLAG_FINAL || time_msec() >= bfd->next_tx;
+}
+
+void
+bfd_put_packet(struct bfd *bfd, struct ofpbuf *p,
+ uint8_t eth_src[ETH_ADDR_LEN])
+{
+ long long int min_tx, min_rx;
+ struct udp_header *udp;
+ struct eth_header *eth;
+ struct ip_header *ip;
+ struct msg *msg;
+
+ if (bfd->next_tx) {
+ long long int delay = time_msec() - bfd->next_tx;
+ long long int interval = bfd_tx_interval(bfd);
+ if (delay > interval * 3 / 2) {
+ VLOG_WARN("%s: long delay of %lldms (expected %lldms) sending BFD"
+ " control message", bfd->name, delay, interval);
+ }
+ }
+
+ /* RFC 5880 Section 6.5
+ * A BFD Control packet MUST NOT have both the Poll (P) and Final (F) bits
+ * set. */
+ ovs_assert(!(bfd->flags & FLAG_POLL) || !(bfd->flags & FLAG_FINAL));
+
+ ofpbuf_reserve(p, 2); /* Properly align after the ethernet header. */
+ eth = ofpbuf_put_uninit(p, sizeof *eth);
+ memcpy(eth->eth_dst, eth_addr_broadcast, ETH_ADDR_LEN);
+ memcpy(eth->eth_src, eth_src, ETH_ADDR_LEN);
+ eth->eth_type = htons(ETH_TYPE_IP);
+
+ ip = ofpbuf_put_zeros(p, sizeof *ip);
+ ip->ip_ihl_ver = IP_IHL_VER(5, 4);
+ ip->ip_tot_len = htons(sizeof *ip + sizeof *udp + sizeof *msg);
+ ip->ip_ttl = 255;
+ ip->ip_proto = IPPROTO_UDP;
+ ip->ip_src = htonl(0xA9FE0100); /* 169.254.1.0 Link Local. */
+ ip->ip_dst = htonl(0xA9FE0101); /* 169.254.1.1 Link Local. */
+ ip->ip_csum = csum(ip, sizeof *ip);
+
+ udp = ofpbuf_put_zeros(p, sizeof *udp);
+ udp->udp_src = htons(bfd->udp_src);
+ udp->udp_dst = htons(BFD_DEST_PORT);
+ udp->udp_len = htons(sizeof *udp + sizeof *msg);
+
+ msg = ofpbuf_put_uninit(p, sizeof *msg);
+ msg->vers_diag = (BFD_VERSION << 5) | bfd->diag;
+ msg->flags = (bfd->state & STATE_MASK) | bfd->flags;
+
+ msg->mult = bfd->mult;
+ msg->length = BFD_PACKET_LEN;
+ msg->my_disc = htonl(bfd->disc);
+ msg->your_disc = htonl(bfd->rmt_disc);
+ msg->min_rx_echo = htonl(0);
+
+ if (bfd_in_poll(bfd)) {
+ min_tx = bfd->poll_min_tx;
+ min_rx = bfd->poll_min_rx;
+ } else {
+ min_tx = bfd_min_tx(bfd);
+ min_rx = bfd->min_rx;
+ }
+
+ msg->min_tx = htonl(min_tx * 1000);
+ msg->min_rx = htonl(min_rx * 1000);
+
+ bfd->flags &= ~FLAG_FINAL;
+
+ log_msg(VLL_DBG, msg, "Sending BFD Message", bfd);
+
+ bfd->last_tx = time_msec();
+ bfd_set_next_tx(bfd);
+}
+
+bool
+bfd_should_process_flow(const struct flow *flow)
+{
+ return (flow->dl_type == htons(ETH_TYPE_IP)
+ && flow->nw_proto == IPPROTO_UDP
+ && flow->tp_dst == htons(3784));
+}
+
+void
+bfd_process_packet(struct bfd *bfd, const struct flow *flow,
+ const struct ofpbuf *p)
+{
+ uint32_t rmt_min_rx, pkt_your_disc;
+ enum state rmt_state;
+ enum flags flags;
+ uint8_t version;
+ struct msg *msg;
+
+ /* This function is designed to follow section RFC 5880 6.8.6 closely. */
+
+ if (flow->nw_ttl != 255) {
+ /* XXX Should drop in the kernel to prevent DOS. */
+ return;
+ }
+
+ msg = ofpbuf_at(p, (uint8_t *)p->l7 - (uint8_t *)p->data, BFD_PACKET_LEN);
+ if (!msg) {
+ VLOG_INFO_RL(&rl, "%s: Received unparseable BFD control message.",
+ bfd->name);
+ return;
+ }
+
+ /* RFC 5880 Section 6.8.6
+ * If the Length field is greater than the payload of the encapsulating
+ * protocol, the packet MUST be discarded.
+ *
+ * Note that we make this check implicity. Above we use ofpbuf_at() to
+ * ensure that there are at least BFD_PACKET_LEN bytes in the payload of
+ * the encapsulating protocol. Below we require msg->length to be exactly
+ * BFD_PACKET_LEN bytes. */
+
+ flags = msg->flags & FLAGS_MASK;
+ rmt_state = msg->flags & STATE_MASK;
+ version = msg->vers_diag >> VERS_SHIFT;
+
+ log_msg(VLL_DBG, msg, "Received BFD control message", bfd);
+
+ if (version != BFD_VERSION) {
+ log_msg(VLL_WARN, msg, "Incorrect version", bfd);
+ return;
+ }
+
+ /* Technically this should happen after the length check. We don't support
+ * authentication however, so it's simpler to do the check first. */
+ if (flags & FLAG_AUTH) {
+ log_msg(VLL_WARN, msg, "Authenticated control message with"
+ " authentication disabled", bfd);
+ return;
+ }
+
+ if (msg->length != BFD_PACKET_LEN) {
+ log_msg(VLL_WARN, msg, "Unexpected length", bfd);
+ if (msg->length < BFD_PACKET_LEN) {
+ return;
+ }
+ }
+
+ if (!msg->mult) {
+ log_msg(VLL_WARN, msg, "Zero multiplier", bfd);
+ return;
+ }
+
+ if (flags & FLAG_MULTIPOINT) {
+ log_msg(VLL_WARN, msg, "Unsupported multipoint flag", bfd);
+ return;
+ }
+
+ if (!msg->my_disc) {
+ log_msg(VLL_WARN, msg, "NULL my_disc", bfd);
+ return;
+ }
+
+ pkt_your_disc = ntohl(msg->your_disc);
+ if (pkt_your_disc) {
+ /* Technically, we should use the your discriminator field to figure
+ * out which 'struct bfd' this packet is destined towards. That way a
+ * bfd session could migrate from one interface to another
+ * transparently. This doesn't fit in with the OVS structure very
+ * well, so in this respect, we are not compliant. */
+ if (pkt_your_disc != bfd->disc) {
+ log_msg(VLL_WARN, msg, "Incorrect your_disc", bfd);
+ return;
+ }
+ } else if (rmt_state > STATE_DOWN) {
+ log_msg(VLL_WARN, msg, "Null your_disc", bfd);
+ return;
+ }
+
+ bfd->rmt_disc = ntohl(msg->my_disc);
+ bfd->rmt_state = rmt_state;
+ bfd->rmt_flags = flags;
+ bfd->rmt_diag = msg->vers_diag & DIAG_MASK;
+
+ if (flags & FLAG_FINAL && bfd_in_poll(bfd)) {
+ bfd->min_tx = bfd->poll_min_tx;
+ bfd->min_rx = bfd->poll_min_rx;
+ bfd->flags &= ~FLAG_POLL;
+ log_msg(VLL_INFO, msg, "Poll sequence terminated", bfd);
+ }
+
+ if (flags & FLAG_POLL) {
+ /* RFC 5880 Section 6.5
+ * When the other system receives a Poll, it immediately transmits a
+ * BFD Control packet with the Final (F) bit set, independent of any
+ * periodic BFD Control packets it may be sending
+ * (see section 6.8.7). */
+ bfd->flags &= ~FLAG_POLL;
+ bfd->flags |= FLAG_FINAL;
+ }
+
+ rmt_min_rx = MAX(ntohl(msg->min_rx) / 1000, 1);
+ if (bfd->rmt_min_rx != rmt_min_rx) {
+ bfd->rmt_min_rx = rmt_min_rx;
+ bfd_set_next_tx(bfd);
+ log_msg(VLL_INFO, msg, "New remote min_rx", bfd);
+ }
+
+ bfd->rmt_min_tx = MAX(ntohl(msg->min_tx) / 1000, 1);
+ bfd->detect_time = bfd_rx_interval(bfd) * bfd->mult + time_msec();
+
+ if (bfd->state == STATE_ADMIN_DOWN) {
+ VLOG_DBG_RL(&rl, "Administratively down, dropping control message.");
+ return;
+ }
+
+ if (rmt_state == STATE_ADMIN_DOWN) {
+ if (bfd->state != STATE_DOWN) {
+ bfd_set_state(bfd, STATE_DOWN, DIAG_RMT_DOWN);
+ }
+ } else {
+ switch (bfd->state) {
+ case STATE_DOWN:
+ if (rmt_state == STATE_DOWN) {
+ bfd_set_state(bfd, STATE_INIT, bfd->diag);
+ } else if (rmt_state == STATE_INIT) {
+ bfd_set_state(bfd, STATE_UP, bfd->diag);
+ }
+ break;
+ case STATE_INIT:
+ if (rmt_state > STATE_DOWN) {
+ bfd_set_state(bfd, STATE_UP, bfd->diag);
+ }
+ break;
+ case STATE_UP:
+ if (rmt_state <= STATE_DOWN) {
+ bfd_set_state(bfd, STATE_DOWN, DIAG_RMT_DOWN);
+ log_msg(VLL_INFO, msg, "Remote signaled STATE_DOWN", bfd);
+ }
+ break;
+ case STATE_ADMIN_DOWN:
+ default:
+ NOT_REACHED();
+ }
+ }
+ /* XXX: RFC 5880 Section 6.8.6 Demand mode related calculations here. */
+}
+\f
+/* Helpers. */
+static bool
+bfd_in_poll(const struct bfd *bfd)
+{
+ return (bfd->flags & FLAG_POLL) != 0;
+}
+
+static void
+bfd_poll(struct bfd *bfd)
+{
+ if (bfd->state > STATE_DOWN && !bfd_in_poll(bfd)
+ && !(bfd->flags & FLAG_FINAL)) {
+ bfd->poll_min_tx = bfd->cfg_min_tx;
+ bfd->poll_min_rx = bfd->cfg_min_rx;
+ bfd->flags |= FLAG_POLL;
+ bfd->next_tx = 0;
+ VLOG_INFO_RL(&rl, "%s: Initiating poll sequence", bfd->name);
+ }
+}
+
+static long long int
+bfd_min_tx(const struct bfd *bfd)
+{
+ /* RFC 5880 Section 6.8.3
+ * When bfd.SessionState is not Up, the system MUST set
+ * bfd.DesiredMinTxInterval to a value of not less than one second
+ * (1,000,000 microseconds). This is intended to ensure that the
+ * bandwidth consumed by BFD sessions that are not Up is negligible,
+ * particularly in the case where a neighbor may not be running BFD. */
+ return (bfd->state == STATE_UP ? bfd->min_tx : MAX(bfd->min_tx, 1000));
+}
+
+static long long int
+bfd_tx_interval(const struct bfd *bfd)
+{
+ long long int interval = bfd_min_tx(bfd);
+ return MAX(interval, bfd->rmt_min_rx);
+}
+
+static long long int
+bfd_rx_interval(const struct bfd *bfd)
+{
+ return MAX(bfd->min_rx, bfd->rmt_min_tx);
+}
+
+static void
+bfd_set_next_tx(struct bfd *bfd)
+{
+ long long int interval = bfd_tx_interval(bfd);
+ interval -= interval * random_range(26) / 100;
+ bfd->next_tx = bfd->last_tx + interval;
+}
+
+static const char *
+bfd_flag_str(enum flags flags)
+{
+ struct ds ds = DS_EMPTY_INITIALIZER;
+ static char flag_str[128];
+
+ if (!flags) {
+ return "none";
+ }
+
+ if (flags & FLAG_MULTIPOINT) {
+ ds_put_cstr(&ds, "multipoint ");
+ }
+
+ if (flags & FLAG_DEMAND) {
+ ds_put_cstr(&ds, "demand ");
+ }
+
+ if (flags & FLAG_AUTH) {
+ ds_put_cstr(&ds, "auth ");
+ }
+
+ if (flags & FLAG_CTL) {
+ ds_put_cstr(&ds, "ctl ");
+ }
+
+ if (flags & FLAG_FINAL) {
+ ds_put_cstr(&ds, "final ");
+ }
+
+ if (flags & FLAG_POLL) {
+ ds_put_cstr(&ds, "poll ");
+ }
+
+ ovs_strlcpy(flag_str, ds_cstr(&ds), sizeof flag_str);
+ ds_destroy(&ds);
+ return flag_str;
+}
+
+static const char *
+bfd_state_str(enum state state)
+{
+ switch (state) {
+ case STATE_ADMIN_DOWN: return "admin_down";
+ case STATE_DOWN: return "down";
+ case STATE_INIT: return "init";
+ case STATE_UP: return "up";
+ default: return "invalid";
+ }
+}
+
+static const char *
+bfd_diag_str(enum diag diag) {
+ switch (diag) {
+ case DIAG_NONE: return "No Diagnostic";
+ case DIAG_EXPIRED: return "Control Detection Time Expired";
+ case DIAG_ECHO_FAILED: return "Echo Function Failed";
+ case DIAG_RMT_DOWN: return "Neighbor Signaled Session Down";
+ case DIAG_FWD_RESET: return "Forwarding Plane Reset";
+ case DIAG_PATH_DOWN: return "Path Down";
+ case DIAG_CPATH_DOWN: return "Concatenated Path Down";
+ case DIAG_ADMIN_DOWN: return "Administratively Down";
+ case DIAG_RCPATH_DOWN: return "Reverse Concatenated Path Down";
+ default: return "Invalid Diagnostic";
+ }
+};
+
+static void
+log_msg(enum vlog_level level, const struct msg *p, const char *message,
+ const struct bfd *bfd)
+{
+ struct ds ds = DS_EMPTY_INITIALIZER;
+
+ if (vlog_should_drop(THIS_MODULE, level, &rl)) {
+ return;
+ }
+
+ ds_put_format(&ds,
+ "%s: %s."
+ "\n\tvers:%"PRIu8" diag:\"%s\" state:%s mult:%"PRIu8
+ " length:%"PRIu8
+ "\n\tflags: %s"
+ "\n\tmy_disc:0x%"PRIx32" your_disc:0x%"PRIx32
+ "\n\tmin_tx:%"PRIu32"us (%"PRIu32"ms)"
+ "\n\tmin_rx:%"PRIu32"us (%"PRIu32"ms)"
+ "\n\tmin_rx_echo:%"PRIu32"us (%"PRIu32"ms)",
+ bfd->name, message, p->vers_diag >> VERS_SHIFT,
+ bfd_diag_str(p->vers_diag & DIAG_MASK),
+ bfd_state_str(p->flags & STATE_MASK),
+ p->mult, p->length, bfd_flag_str(p->flags & FLAGS_MASK),
+ ntohl(p->my_disc), ntohl(p->your_disc),
+ ntohl(p->min_tx), ntohl(p->min_tx) / 1000,
+ ntohl(p->min_rx), ntohl(p->min_rx) / 1000,
+ ntohl(p->min_rx_echo), ntohl(p->min_rx_echo) / 1000);
+ bfd_put_details(&ds, bfd);
+ VLOG(level, "%s", ds_cstr(&ds));
+ ds_destroy(&ds);
+}
+
+static void
+bfd_set_state(struct bfd *bfd, enum state state, enum diag diag)
+{
+ if (diag == DIAG_NONE && bfd->cpath_down) {
+ diag = DIAG_CPATH_DOWN;
+ }
+
+ if (bfd->state != state || bfd->diag != diag) {
+ if (!VLOG_DROP_INFO(&rl)) {
+ struct ds ds = DS_EMPTY_INITIALIZER;
+
+ ds_put_format(&ds, "%s: BFD state change: %s->%s"
+ " \"%s\"->\"%s\".\n",
+ bfd->name, bfd_state_str(bfd->state),
+ bfd_state_str(state), bfd_diag_str(bfd->diag),
+ bfd_diag_str(diag));
+ bfd_put_details(&ds, bfd);
+ VLOG_INFO("%s", ds_cstr(&ds));
+ ds_destroy(&ds);
+ }
+
+ bfd->state = state;
+ bfd->diag = diag;
+
+ if (bfd->state <= STATE_DOWN) {
+ bfd->rmt_state = STATE_DOWN;
+ bfd->rmt_diag = DIAG_NONE;
+ bfd->rmt_min_rx = 1;
+ bfd->rmt_flags = 0;
+ bfd->rmt_disc = 0;
+ bfd->rmt_min_tx = 0;
+ }
+ }
+}
+
+static uint32_t
+generate_discriminator(void)
+{
+ uint32_t disc = 0;
+
+ /* RFC 5880 Section 6.8.1
+ * It SHOULD be set to a random (but still unique) value to improve
+ * security. The value is otherwise outside the scope of this
+ * specification. */
+
+ while (!disc) {
+ struct bfd *bfd;
+
+ /* 'disc' is by defnition random, so there's no reason to waste time
+ * hashing it. */
+ disc = random_uint32();
+ HMAP_FOR_EACH_IN_BUCKET (bfd, node, disc, &all_bfds) {
+ if (bfd->disc == disc) {
+ disc = 0;
+ break;
+ }
+ }
+ }
+
+ return disc;
+}
+
+static struct bfd *
+bfd_find_by_name(const char *name)
+{
+ struct bfd *bfd;
+
+ HMAP_FOR_EACH (bfd, node, &all_bfds) {
+ if (!strcmp(bfd->name, name)) {
+ return bfd;
+ }
+ }
+ return NULL;
+}
+
+static void
+bfd_put_details(struct ds *ds, const struct bfd *bfd)
+{
+ ds_put_format(ds, "\tForwarding: %s\n",
+ bfd_forwarding(bfd) ? "true" : "false");
+ ds_put_format(ds, "\tDetect Multiplier: %d\n", bfd->mult);
+ ds_put_format(ds, "\tConcatenated Path Down: %s\n",
+ bfd->cpath_down ? "true" : "false");
+ ds_put_format(ds, "\tTX Interval: Approx %lldms\n", bfd_tx_interval(bfd));
+ ds_put_format(ds, "\tRX Interval: Approx %lldms\n", bfd_rx_interval(bfd));
+ ds_put_format(ds, "\tDetect Time: now %+lldms\n",
+ time_msec() - bfd->detect_time);
+ ds_put_format(ds, "\tNext TX Time: now %+lldms\n",
+ time_msec() - bfd->next_tx);
+ ds_put_format(ds, "\tLast TX Time: now %+lldms\n",
+ time_msec() - bfd->last_tx);
+
+ ds_put_cstr(ds, "\n");
+
+ ds_put_format(ds, "\tLocal Flags: %s\n", bfd_flag_str(bfd->flags));
+ ds_put_format(ds, "\tLocal Session State: %s\n",
+ bfd_state_str(bfd->state));
+ ds_put_format(ds, "\tLocal Diagnostic: %s\n", bfd_diag_str(bfd->diag));
+ ds_put_format(ds, "\tLocal Discriminator: 0x%"PRIx32"\n", bfd->disc);
+ ds_put_format(ds, "\tLocal Minimum TX Interval: %lldms\n",
+ bfd_min_tx(bfd));
+ ds_put_format(ds, "\tLocal Minimum RX Interval: %lldms\n", bfd->min_rx);
+
+ ds_put_cstr(ds, "\n");
+
+ ds_put_format(ds, "\tRemote Flags: %s\n", bfd_flag_str(bfd->rmt_flags));
+ ds_put_format(ds, "\tRemote Session State: %s\n",
+ bfd_state_str(bfd->rmt_state));
+ ds_put_format(ds, "\tRemote Diagnostic: %s\n",
+ bfd_diag_str(bfd->rmt_diag));
+ ds_put_format(ds, "\tRemote Discriminator: 0x%"PRIx32"\n", bfd->rmt_disc);
+ ds_put_format(ds, "\tRemote Minimum TX Interval: %lldms\n",
+ bfd->rmt_min_tx);
+ ds_put_format(ds, "\tRemote Minimum RX Interval: %lldms\n",
+ bfd->rmt_min_rx);
+}
+
+static void
+bfd_unixctl_show(struct unixctl_conn *conn, int argc, const char *argv[],
+ void *aux OVS_UNUSED)
+{
+ struct ds ds = DS_EMPTY_INITIALIZER;
+ struct bfd *bfd;
+
+ if (argc > 1) {
+ bfd = bfd_find_by_name(argv[1]);
+ if (!bfd) {
+ unixctl_command_reply_error(conn, "no such bfd object");
+ return;
+ }
+ bfd_put_details(&ds, bfd);
+ } else {
+ HMAP_FOR_EACH (bfd, node, &all_bfds) {
+ ds_put_format(&ds, "---- %s ----\n", bfd->name);
+ bfd_put_details(&ds, bfd);
+ }
+ }
+ unixctl_command_reply(conn, ds_cstr(&ds));
+ ds_destroy(&ds);
+}
--- /dev/null
+/* Copyright (c) 2012 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License. */
+
+#ifndef BFD_H
+#define BFD_H 1
+
+#define BFD_PACKET_LEN 24
+#define BFD_DEST_PORT 3784
+
+#include <stdbool.h>
+#include <inttypes.h>
+
+struct bfd;
+struct flow;
+struct ofpbuf;
+struct smap;
+
+void bfd_wait(const struct bfd *);
+void bfd_run(struct bfd *);
+
+bool bfd_should_send_packet(const struct bfd *);
+void bfd_put_packet(struct bfd *bfd, struct ofpbuf *packet,
+ uint8_t eth_src[6]);
+
+bool bfd_should_process_flow(const struct flow *);
+void bfd_process_packet(struct bfd *, const struct flow *,
+ const struct ofpbuf *);
+
+struct bfd *bfd_configure(struct bfd *, const char *name,
+ const struct smap *smap);
+
+bool bfd_forwarding(const struct bfd *);
+void bfd_get_status(const struct bfd *, struct smap *);
+
+#endif /* bfd.h */
int port_no; /* Index into dp_netdev's 'ports'. */
struct list node; /* Element in dp_netdev's 'port_list'. */
struct netdev *netdev;
+ struct netdev_saved_flags *sf;
char *type; /* Port type as requested by user. */
};
do_add_port(struct dp_netdev *dp, const char *devname, const char *type,
uint32_t port_no)
{
+ struct netdev_saved_flags *sf;
struct dp_netdev_port *port;
struct netdev *netdev;
const char *open_type;
return error;
}
- error = netdev_turn_flags_on(netdev, NETDEV_PROMISC, false);
+ error = netdev_turn_flags_on(netdev, NETDEV_PROMISC, &sf);
if (error) {
netdev_close(netdev);
return error;
port = xmalloc(sizeof *port);
port->port_no = port_no;
port->netdev = netdev;
+ port->sf = sf;
port->type = xstrdup(type);
error = netdev_get_mtu(netdev, &mtu);
dp->serial++;
netdev_close(port->netdev);
+ netdev_restore_flags(port->sf);
free(port->type);
free(port);
}
}
-/* Writes the current time to 'string' based on 'template'.
- * The current time is either local time or UTC based on 'utc'. */
+/* Writes time 'when' to 'string' based on 'template', in local time or UTC
+ * based on 'utc'. */
void
-ds_put_strftime(struct ds *ds, const char *template, bool utc)
+ds_put_strftime(struct ds *ds, const char *template, time_t when, bool utc)
{
struct tm tm;
- time_t now = time_wall();
if (utc) {
- gmtime_r(&now, &tm);
+ gmtime_r(&when, &tm);
} else {
- localtime_r(&now, &tm);
+ localtime_r(&when, &tm);
}
for (;;) {
}
}
+/* Returns a malloc()'d string for time 'when' based on 'template', in local
+ * time or UTC based on 'utc'. */
+char *
+xastrftime(const char *template, time_t when, bool utc)
+{
+ struct ds s;
+
+ ds_init(&s);
+ ds_put_strftime(&s, template, when, utc);
+ return s.string;
+}
+
int
ds_get_line(struct ds *ds, FILE *file)
{
/*
- * Copyright (c) 2008, 2009, 2010, 2011, 2012 Nicira, Inc.
+ * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
+#include <time.h>
#include "compiler.h"
-struct tm;
-
/* A "dynamic string", that is, a buffer that can be used to construct a
* string across a series of operations that extend or modify it.
*
void ds_put_format_valist(struct ds *, const char *, va_list)
PRINTF_FORMAT(2, 0);
void ds_put_printable(struct ds *, const char *, size_t);
-void ds_put_strftime(struct ds *, const char *, bool utc)
- STRFTIME_FORMAT(2);
void ds_put_hex_dump(struct ds *ds, const void *buf_, size_t size,
uintptr_t ofs, bool ascii);
int ds_get_line(struct ds *, FILE *);
int ds_get_preprocessed_line(struct ds *, FILE *);
int ds_get_test_line(struct ds *, FILE *);
+void ds_put_strftime(struct ds *, const char *template, time_t when, bool utc)
+ STRFTIME_FORMAT(2);
+char *xastrftime(const char *template, time_t when, bool utc)
+ STRFTIME_FORMAT(1);
+
char *ds_cstr(struct ds *);
const char *ds_cstr_ro(const struct ds *);
char *ds_steal_cstr(struct ds *);
/* We only process the first fragment. */
if (frag_hdr->ip6f_offlg != htons(0)) {
- if ((frag_hdr->ip6f_offlg & IP6F_OFF_MASK) == htons(0)) {
- flow->nw_frag = FLOW_NW_FRAG_ANY;
- } else {
+ flow->nw_frag = FLOW_NW_FRAG_ANY;
+ if ((frag_hdr->ip6f_offlg & IP6F_OFF_MASK) != htons(0)) {
flow->nw_frag |= FLOW_NW_FRAG_LATER;
nexthdr = IPPROTO_FRAGMENT;
break;
BUILD_ASSERT_DECL(FLOW_WC_SEQ == 20);
fmd->tun_id = flow->tunnel.tun_id;
+ fmd->tun_src = flow->tunnel.ip_src;
+ fmd->tun_dst = flow->tunnel.ip_dst;
fmd->metadata = flow->metadata;
memcpy(fmd->regs, flow->regs, sizeof fmd->regs);
fmd->in_port = flow->in_port;
/* Represents the metadata fields of struct flow. */
struct flow_metadata {
ovs_be64 tun_id; /* Encapsulating tunnel ID. */
+ ovs_be32 tun_src; /* Tunnel outer IPv4 src addr */
+ ovs_be32 tun_dst; /* Tunnel outer IPv4 dst addr */
ovs_be64 metadata; /* OpenFlow 1.1+ metadata field. */
uint32_t regs[FLOW_N_REGS]; /* Registers. */
uint16_t in_port; /* OpenFlow port or zero. */
*
* Modifies 'arg'. */
void
-learn_parse(char *arg, const struct flow *flow, struct ofpbuf *ofpacts)
+learn_parse(char *arg, struct ofpbuf *ofpacts)
{
char *orig = xstrdup(arg);
char *name, *value;
struct ofpact_learn *learn;
struct match match;
- enum ofperr error;
learn = ofpact_put_LEARN(ofpacts);
learn->idle_timeout = OFP_FLOW_PERMANENT;
learn_parse_spec(orig, name, value, spec);
- /* Check prerequisites. */
- if (spec->src_type == NX_LEARN_SRC_FIELD
- && flow && !mf_are_prereqs_ok(spec->src.field, flow)) {
- ovs_fatal(0, "%s: cannot specify source field %s because "
- "prerequisites are not satisfied",
- orig, spec->src.field->name);
- }
- if ((spec->dst_type == NX_LEARN_DST_MATCH
- || spec->dst_type == NX_LEARN_DST_LOAD)
- && !mf_are_prereqs_ok(spec->dst.field, &match.flow)) {
- ovs_fatal(0, "%s: cannot specify destination field %s because "
- "prerequisites are not satisfied",
- orig, spec->dst.field->name);
- }
-
/* Update 'match' to allow for satisfying destination
* prerequisites. */
if (spec->src_type == NX_LEARN_SRC_IMMEDIATE
}
ofpact_update_len(ofpacts, &learn->ofpact);
- /* In theory the above should have caught any errors, but... */
- if (flow) {
- error = learn_check(learn, flow);
- if (error) {
- ovs_fatal(0, "%s: %s", orig, ofperr_to_string(error));
- }
- }
free(orig);
}
void learn_execute(const struct ofpact_learn *, const struct flow *,
struct ofputil_flow_mod *, struct ofpbuf *ofpacts);
-void learn_parse(char *, const struct flow *, struct ofpbuf *ofpacts);
+void learn_parse(char *, struct ofpbuf *ofpacts);
void learn_format(const struct ofpact_learn *, struct ds *);
#endif /* learn.h */
void
match_init_exact(struct match *match, const struct flow *flow)
{
- ovs_be64 tun_id = flow->tunnel.tun_id;
-
match->flow = *flow;
match->flow.skb_priority = 0;
match->flow.skb_mark = 0;
- memset(&match->flow.tunnel, 0, sizeof match->flow.tunnel);
- match->flow.tunnel.tun_id = tun_id;
flow_wildcards_init_exact(&match->wc);
}
}, {
MFF_TUN_SRC, "tun_src", NULL,
MF_FIELD_SIZES(be32),
- MFM_NONE,
+ MFM_FULLY,
MFS_IPV4,
MFP_NONE,
- false,
- 0, NULL,
- 0, NULL,
+ true,
+ NXM_NX_TUN_IPV4_SRC, "NXM_NX_TUN_IPV4_SRC",
+ NXM_NX_TUN_IPV4_SRC, "NXM_NX_TUN_IPV4_SRC",
}, {
MFF_TUN_DST, "tun_dst", NULL,
MF_FIELD_SIZES(be32),
- MFM_NONE,
+ MFM_FULLY,
MFS_IPV4,
MFP_NONE,
- false,
- 0, NULL,
- 0, NULL,
+ true,
+ NXM_NX_TUN_IPV4_DST, "NXM_NX_TUN_IPV4_DST",
+ NXM_NX_TUN_IPV4_DST, "NXM_NX_TUN_IPV4_DST",
}, {
MFF_TUN_FLAGS, "tun_flags", NULL,
MF_FIELD_SIZES(be16),
MFM_NONE,
MFS_OFP_PORT,
MFP_NONE,
- false,
+ true,
NXM_OF_IN_PORT, "NXM_OF_IN_PORT",
OXM_OF_IN_PORT, "OXM_OF_IN_PORT",
}, {
mf_is_all_wild(const struct mf_field *mf, const struct flow_wildcards *wc)
{
switch (mf->id) {
- case MFF_TUN_ID:
case MFF_TUN_SRC:
+ return !wc->masks.tunnel.ip_src;
case MFF_TUN_DST:
+ return !wc->masks.tunnel.ip_dst;
+ case MFF_TUN_ID:
case MFF_TUN_TOS:
case MFF_TUN_TTL:
case MFF_TUN_FLAGS:
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
-static int netdev_bsd_do_ioctl(const struct netdev *, struct ifreq *,
- unsigned long cmd, const char *cmd_name);
+static int netdev_bsd_do_ioctl(const char *, struct ifreq *, unsigned long cmd,
+ const char *cmd_name);
static void destroy_tap(int fd, const char *name);
-static int get_flags(const struct netdev *, int *flagsp);
-static int set_flags(struct netdev *, int flags);
+static int get_flags(const struct netdev_dev *, int *flagsp);
+static int set_flags(const char *, int flags);
static int do_set_addr(struct netdev *netdev,
int ioctl_nr, const char *ioctl_name,
struct in_addr addr);
struct ifreq ifr;
int error;
- error = netdev_bsd_do_ioctl(netdev_, &ifr, SIOCGIFMTU, "SIOCGIFMTU");
+ error = netdev_bsd_do_ioctl(netdev_get_name(netdev_), &ifr, SIOCGIFMTU,
+ "SIOCGIFMTU");
if (error) {
return error;
}
stats->rx_errors = ifmd.ifmd_data.ifi_ierrors;
stats->tx_errors = ifmd.ifmd_data.ifi_oerrors;
stats->rx_dropped = ifmd.ifmd_data.ifi_iqdrops;
- stats->tx_dropped = 0;
+ stats->tx_dropped = UINT64_MAX;
stats->multicast = ifmd.ifmd_data.ifi_imcasts;
stats->collisions = ifmd.ifmd_data.ifi_collisions;
- stats->rx_length_errors = 0;
- stats->rx_over_errors = 0;
- stats->rx_crc_errors = 0;
- stats->rx_frame_errors = 0;
- stats->rx_fifo_errors = 0;
- stats->rx_missed_errors = 0;
-
- stats->tx_aborted_errors = 0;
- stats->tx_carrier_errors = 0;
- stats->tx_fifo_errors = 0;
- stats->tx_heartbeat_errors = 0;
- stats->tx_window_errors = 0;
+ stats->rx_length_errors = UINT64_MAX;
+ stats->rx_over_errors = UINT64_MAX;
+ stats->rx_crc_errors = UINT64_MAX;
+ stats->rx_frame_errors = UINT64_MAX;
+ stats->rx_fifo_errors = UINT64_MAX;
+ stats->rx_missed_errors = UINT64_MAX;
+
+ stats->tx_aborted_errors = UINT64_MAX;
+ stats->tx_carrier_errors = UINT64_MAX;
+ stats->tx_fifo_errors = UINT64_MAX;
+ stats->tx_heartbeat_errors = UINT64_MAX;
+ stats->tx_window_errors = UINT64_MAX;
break;
}
}
int error;
ifr.ifr_addr.sa_family = AF_INET;
- error = netdev_bsd_do_ioctl(netdev_, &ifr,
+ error = netdev_bsd_do_ioctl(netdev_get_name(netdev_), &ifr,
SIOCGIFADDR, "SIOCGIFADDR");
if (error) {
return error;
sin = (struct sockaddr_in *) &ifr.ifr_addr;
netdev_dev->in4 = sin->sin_addr;
netdev_dev->cache_valid |= VALID_IN4;
- error = netdev_bsd_do_ioctl(netdev_, &ifr,
+ error = netdev_bsd_do_ioctl(netdev_get_name(netdev_), &ifr,
SIOCGIFNETMASK, "SIOCGIFNETMASK");
if (error) {
return error;
}
static int
-netdev_bsd_update_flags(struct netdev *netdev, enum netdev_flags off,
+netdev_bsd_update_flags(struct netdev_dev *dev_, enum netdev_flags off,
enum netdev_flags on, enum netdev_flags *old_flagsp)
{
+ struct netdev_dev_bsd *netdev_dev;
int old_flags, new_flags;
int error;
- error = get_flags(netdev, &old_flags);
+ netdev_dev = netdev_dev_bsd_cast(dev_);
+ error = get_flags(dev_, &old_flags);
if (!error) {
*old_flagsp = iff_to_nd_flags(old_flags);
new_flags = (old_flags & ~nd_to_iff_flags(off)) | nd_to_iff_flags(on);
if (new_flags != old_flags) {
- error = set_flags(netdev, new_flags);
- netdev_dev_bsd_changed(netdev_dev_bsd_cast(netdev_get_dev(netdev)));
+ error = set_flags(netdev_dev_get_name(dev_), new_flags);
+ netdev_dev_bsd_changed(netdev_dev);
}
}
return error;
}
static int
-get_flags(const struct netdev *netdev, int *flags)
+get_flags(const struct netdev_dev *dev, int *flags)
{
struct ifreq ifr;
int error;
- error = netdev_bsd_do_ioctl(netdev, &ifr, SIOCGIFFLAGS, "SIOCGIFFLAGS");
+ error = netdev_bsd_do_ioctl(dev->name, &ifr, SIOCGIFFLAGS, "SIOCGIFFLAGS");
*flags = 0xFFFF0000 & (ifr.ifr_flagshigh << 16);
*flags |= 0x0000FFFF & ifr.ifr_flags;
}
static int
-set_flags(struct netdev *netdev, int flags)
+set_flags(const char *name, int flags)
{
struct ifreq ifr;
ifr.ifr_flags = 0x0000FFFF & flags;
ifr.ifr_flagshigh = (0xFFFF0000 & flags) >> 16;
- return netdev_bsd_do_ioctl(netdev, &ifr, SIOCSIFFLAGS, "SIOCSIFFLAGS");
+ return netdev_bsd_do_ioctl(name, &ifr, SIOCSIFFLAGS, "SIOCSIFFLAGS");
}
static int
}
static int
-netdev_bsd_do_ioctl(const struct netdev *netdev, struct ifreq *ifr,
- unsigned long cmd, const char *cmd_name)
+netdev_bsd_do_ioctl(const char *name, struct ifreq *ifr, unsigned long cmd,
+ const char *cmd_name)
{
- strncpy(ifr->ifr_name, netdev_get_name(netdev), sizeof ifr->ifr_name);
+ strncpy(ifr->ifr_name, name, sizeof ifr->ifr_name);
if (ioctl(af_inet_sock, cmd, ifr) == -1) {
- VLOG_DBG_RL(&rl, "%s: ioctl(%s) failed: %s",
- netdev_get_name(netdev), cmd_name, strerror(errno));
+ VLOG_DBG_RL(&rl, "%s: ioctl(%s) failed: %s", name, cmd_name,
+ strerror(errno));
return errno;
}
return 0;
}
static int
-netdev_dummy_update_flags(struct netdev *netdev,
+netdev_dummy_update_flags(struct netdev_dev *dev_,
enum netdev_flags off, enum netdev_flags on,
enum netdev_flags *old_flagsp)
{
- struct netdev_dev_dummy *dev =
- netdev_dev_dummy_cast(netdev_get_dev(netdev));
+ struct netdev_dev_dummy *dev = netdev_dev_dummy_cast(dev_);
return netdev_dev_dummy_update_flags(dev, off, on, old_flagsp);
}
static int netdev_linux_get_ipv4(const struct netdev *, struct in_addr *,
int cmd, const char *cmd_name);
static int get_flags(const struct netdev_dev *, unsigned int *flags);
-static int set_flags(struct netdev *, unsigned int flags);
+static int set_flags(const char *, unsigned int flags);
static int do_get_ifindex(const char *netdev_name);
static int get_ifindex(const struct netdev *, int *ifindexp);
static int do_set_addr(struct netdev *netdev,
{
struct netdev_dev_linux *netdev_dev =
netdev_dev_linux_cast(netdev_get_dev(netdev_));
+ struct netdev_saved_flags *sf = NULL;
int error;
- bool up_again = false;
if (netdev_dev->cache_valid & VALID_ETHERADDR) {
if (netdev_dev->ether_addr_error) {
enum netdev_flags flags;
if (!netdev_get_flags(netdev_, &flags) && (flags & NETDEV_UP)) {
- netdev_turn_flags_off(netdev_, NETDEV_UP, false);
- up_again = true;
+ netdev_turn_flags_off(netdev_, NETDEV_UP, &sf);
}
}
error = set_etheraddr(netdev_get_name(netdev_), mac);
}
}
- if (up_again) {
- netdev_turn_flags_on(netdev_, NETDEV_UP, false);
- }
+ netdev_restore_flags(sf);
return error;
}
}
static int
-netdev_linux_update_flags(struct netdev *netdev, enum netdev_flags off,
+netdev_linux_update_flags(struct netdev_dev *dev_, enum netdev_flags off,
enum netdev_flags on, enum netdev_flags *old_flagsp)
{
struct netdev_dev_linux *netdev_dev;
int old_flags, new_flags;
int error = 0;
- netdev_dev = netdev_dev_linux_cast(netdev_get_dev(netdev));
+ netdev_dev = netdev_dev_linux_cast(dev_);
old_flags = netdev_dev->ifi_flags;
*old_flagsp = iff_to_nd_flags(old_flags);
new_flags = (old_flags & ~nd_to_iff_flags(off)) | nd_to_iff_flags(on);
if (new_flags != old_flags) {
- error = set_flags(netdev, new_flags);
+ error = set_flags(netdev_dev_get_name(dev_), new_flags);
get_flags(&netdev_dev->netdev_dev, &netdev_dev->ifi_flags);
}
return error;
}
static int
-set_flags(struct netdev *netdev, unsigned int flags)
+set_flags(const char *name, unsigned int flags)
{
struct ifreq ifr;
ifr.ifr_flags = flags;
- return netdev_linux_do_ioctl(netdev_get_name(netdev), &ifr, SIOCSIFFLAGS,
- "SIOCSIFFLAGS");
+ return netdev_linux_do_ioctl(name, &ifr, SIOCSIFFLAGS, "SIOCSIFFLAGS");
}
static int
this device. */
int ref_cnt; /* Times this devices was opened. */
struct shash_node *node; /* Pointer to element in global map. */
+ struct list saved_flags_list; /* Contains "struct netdev_saved_flags". */
};
void netdev_dev_init(struct netdev_dev *, const char *name,
struct netdev {
struct netdev_dev *netdev_dev; /* Parent netdev_dev. */
struct list node; /* Element in global list. */
-
- enum netdev_flags save_flags; /* Initial device flags. */
- enum netdev_flags changed_flags; /* Flags that we changed. */
};
void netdev_init(struct netdev *, struct netdev_dev *);
int (*arp_lookup)(const struct netdev *netdev, ovs_be32 ip,
uint8_t mac[6]);
- /* Retrieves the current set of flags on 'netdev' into '*old_flags'.
- * Then, turns off the flags that are set to 1 in 'off' and turns on the
- * flags that are set to 1 in 'on'. (No bit will be set to 1 in both 'off'
- * and 'on'; that is, off & on == 0.)
+ /* Retrieves the current set of flags on 'dev' into '*old_flags'. Then,
+ * turns off the flags that are set to 1 in 'off' and turns on the flags
+ * that are set to 1 in 'on'. (No bit will be set to 1 in both 'off' and
+ * 'on'; that is, off & on == 0.)
*
* This function may be invoked from a signal handler. Therefore, it
* should not do anything that is not signal-safe (such as logging). */
- int (*update_flags)(struct netdev *netdev, enum netdev_flags off,
+ int (*update_flags)(struct netdev_dev *dev, enum netdev_flags off,
enum netdev_flags on, enum netdev_flags *old_flags);
/* Returns a sequence number which indicates changes in one of 'netdev''s
}
static int
-netdev_vport_update_flags(struct netdev *netdev OVS_UNUSED,
+netdev_vport_update_flags(struct netdev_dev *netdev_dev OVS_UNUSED,
enum netdev_flags off, enum netdev_flags on OVS_UNUSED,
enum netdev_flags *old_flagsp)
{
SMAP_FOR_EACH (node, args) {
if (!strcmp(node->key, "remote_ip")) {
struct in_addr in_addr;
- if (lookup_ip(node->value, &in_addr)) {
+ if (!strcmp(node->value, "flow")) {
+ tnl_cfg.ip_dst_flow = true;
+ tnl_cfg.ip_dst = htonl(0);
+ } else if (lookup_ip(node->value, &in_addr)) {
VLOG_WARN("%s: bad %s 'remote_ip'", name, type);
} else if (ip_is_multicast(in_addr.s_addr)) {
VLOG_WARN("%s: multicast remote_ip="IP_FMT" not allowed",
}
} else if (!strcmp(node->key, "local_ip")) {
struct in_addr in_addr;
- if (lookup_ip(node->value, &in_addr)) {
+ if (!strcmp(node->value, "flow")) {
+ tnl_cfg.ip_src_flow = true;
+ tnl_cfg.ip_src = htonl(0);
+ } else if (lookup_ip(node->value, &in_addr)) {
VLOG_WARN("%s: bad %s 'local_ip'", name, type);
} else {
tnl_cfg.ip_src = in_addr.s_addr;
}
}
- if (!tnl_cfg.ip_dst) {
+ if (!tnl_cfg.ip_dst && !tnl_cfg.ip_dst_flow) {
VLOG_ERR("%s: %s type requires valid 'remote_ip' argument",
name, type);
return EINVAL;
}
+ if (tnl_cfg.ip_src_flow && !tnl_cfg.ip_dst_flow) {
+ VLOG_ERR("%s: %s type requires 'remote_ip=flow' with 'local_ip=flow'",
+ name, type);
+ return EINVAL;
+ }
if (!tnl_cfg.ttl) {
tnl_cfg.ttl = DEFAULT_TTL;
}
if (tnl_cfg->ip_dst) {
smap_add_format(args, "remote_ip", IP_FMT, IP_ARGS(tnl_cfg->ip_dst));
+ } else if (tnl_cfg->ip_dst_flow) {
+ smap_add(args, "remote_ip", "flow");
}
if (tnl_cfg->ip_src) {
smap_add_format(args, "local_ip", IP_FMT, IP_ARGS(tnl_cfg->ip_src));
+ } else if (tnl_cfg->ip_src_flow) {
+ smap_add(args, "local_ip", "flow");
}
if (tnl_cfg->in_key_flow && tnl_cfg->out_key_flow) {
COVERAGE_DEFINE(netdev_add_router);
COVERAGE_DEFINE(netdev_get_stats);
+struct netdev_saved_flags {
+ struct netdev_dev *dev;
+ struct list node; /* In struct netdev_dev's saved_flags_list. */
+ enum netdev_flags saved_flags;
+ enum netdev_flags saved_values;
+};
+
static struct shash netdev_classes = SHASH_INITIALIZER(&netdev_classes);
/* All created network devices. */
* additional log messages. */
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
-static void close_all_netdevs(void *aux OVS_UNUSED);
-static int restore_flags(struct netdev *netdev);
+static void restore_all_flags(void *aux OVS_UNUSED);
void update_device_args(struct netdev_dev *, const struct shash *args);
static void
if (!inited) {
inited = true;
- fatal_signal_add_hook(close_all_netdevs, NULL, NULL, true);
+ fatal_signal_add_hook(restore_all_flags, NULL, NULL, true);
netdev_vport_patch_register();
#ifdef LINUX_DATAPATH
}
}
+static void
+netdev_dev_unref(struct netdev_dev *dev)
+{
+ ovs_assert(dev->ref_cnt);
+ if (!--dev->ref_cnt) {
+ netdev_dev_uninit(dev, true);
+ }
+}
+
/* Closes and destroys 'netdev'. */
void
netdev_close(struct netdev *netdev)
{
if (netdev) {
- struct netdev_dev *netdev_dev = netdev_get_dev(netdev);
+ struct netdev_dev *dev = netdev_get_dev(netdev);
- ovs_assert(netdev_dev->ref_cnt);
- netdev_dev->ref_cnt--;
netdev_uninit(netdev, true);
-
- /* If the reference count for the netdev device is zero, destroy it. */
- if (!netdev_dev->ref_cnt) {
- netdev_dev_uninit(netdev_dev, true);
- }
+ netdev_dev_unref(dev);
}
}
}
/* On 'netdev', turns off the flags in 'off' and then turns on the flags in
- * 'on'. If 'permanent' is true, the changes will persist; otherwise, they
- * will be reverted when 'netdev' is closed or the program exits. Returns 0 if
- * successful, otherwise a positive errno value. */
+ * 'on'. Returns 0 if successful, otherwise a positive errno value. */
static int
do_update_flags(struct netdev *netdev, enum netdev_flags off,
enum netdev_flags on, enum netdev_flags *old_flagsp,
- bool permanent)
+ struct netdev_saved_flags **sfp)
{
+ struct netdev_dev *dev = netdev_get_dev(netdev);
+ struct netdev_saved_flags *sf = NULL;
enum netdev_flags old_flags;
int error;
- error = netdev_get_dev(netdev)->netdev_class->update_flags(netdev,
- off & ~on, on, &old_flags);
+ error = dev->netdev_class->update_flags(dev, off & ~on, on, &old_flags);
if (error) {
VLOG_WARN_RL(&rl, "failed to %s flags for network device %s: %s",
off || on ? "set" : "get", netdev_get_name(netdev),
strerror(error));
old_flags = 0;
- } else if ((off || on) && !permanent) {
+ } else if ((off || on) && sfp) {
enum netdev_flags new_flags = (old_flags & ~off) | on;
enum netdev_flags changed_flags = old_flags ^ new_flags;
if (changed_flags) {
- if (!netdev->changed_flags) {
- netdev->save_flags = old_flags;
- }
- netdev->changed_flags |= changed_flags;
+ *sfp = sf = xmalloc(sizeof *sf);
+ sf->dev = dev;
+ list_push_front(&dev->saved_flags_list, &sf->node);
+ sf->saved_flags = changed_flags;
+ sf->saved_values = changed_flags & new_flags;
+
+ dev->ref_cnt++;
}
}
+
if (old_flagsp) {
*old_flagsp = old_flags;
}
+ if (sfp) {
+ *sfp = sf;
+ }
+
return error;
}
netdev_get_flags(const struct netdev *netdev_, enum netdev_flags *flagsp)
{
struct netdev *netdev = CONST_CAST(struct netdev *, netdev_);
- return do_update_flags(netdev, 0, 0, flagsp, false);
+ return do_update_flags(netdev, 0, 0, flagsp, NULL);
}
/* Sets the flags for 'netdev' to 'flags'.
- * If 'permanent' is true, the changes will persist; otherwise, they
- * will be reverted when 'netdev' is closed or the program exits.
* Returns 0 if successful, otherwise a positive errno value. */
int
netdev_set_flags(struct netdev *netdev, enum netdev_flags flags,
- bool permanent)
+ struct netdev_saved_flags **sfp)
{
- return do_update_flags(netdev, -1, flags, NULL, permanent);
+ return do_update_flags(netdev, -1, flags, NULL, sfp);
}
-/* Turns on the specified 'flags' on 'netdev'.
- * If 'permanent' is true, the changes will persist; otherwise, they
- * will be reverted when 'netdev' is closed or the program exits.
- * Returns 0 if successful, otherwise a positive errno value. */
+/* Turns on the specified 'flags' on 'netdev':
+ *
+ * - On success, returns 0. If 'sfp' is nonnull, sets '*sfp' to a newly
+ * allocated 'struct netdev_saved_flags *' that may be passed to
+ * netdev_restore_flags() to restore the original values of 'flags' on
+ * 'netdev' (this will happen automatically at program termination if
+ * netdev_restore_flags() is never called) , or to NULL if no flags were
+ * actually changed.
+ *
+ * - On failure, returns a positive errno value. If 'sfp' is nonnull, sets
+ * '*sfp' to NULL. */
int
netdev_turn_flags_on(struct netdev *netdev, enum netdev_flags flags,
- bool permanent)
+ struct netdev_saved_flags **sfp)
{
- return do_update_flags(netdev, 0, flags, NULL, permanent);
+ return do_update_flags(netdev, 0, flags, NULL, sfp);
}
-/* Turns off the specified 'flags' on 'netdev'.
- * If 'permanent' is true, the changes will persist; otherwise, they
- * will be reverted when 'netdev' is closed or the program exits.
- * Returns 0 if successful, otherwise a positive errno value. */
+/* Turns off the specified 'flags' on 'netdev'. See netdev_turn_flags_on() for
+ * details of the interface. */
int
netdev_turn_flags_off(struct netdev *netdev, enum netdev_flags flags,
- bool permanent)
+ struct netdev_saved_flags **sfp)
+{
+ return do_update_flags(netdev, flags, 0, NULL, sfp);
+}
+
+/* Restores the flags that were saved in 'sf', and destroys 'sf'.
+ * Does nothing if 'sf' is NULL. */
+void
+netdev_restore_flags(struct netdev_saved_flags *sf)
{
- return do_update_flags(netdev, flags, 0, NULL, permanent);
+ if (sf) {
+ struct netdev_dev *dev = sf->dev;
+ enum netdev_flags old_flags;
+
+ dev->netdev_class->update_flags(dev,
+ sf->saved_flags & sf->saved_values,
+ sf->saved_flags & ~sf->saved_values,
+ &old_flags);
+ list_remove(&sf->node);
+ free(sf);
+
+ netdev_dev_unref(dev);
+ }
}
/* Looks up the ARP table entry for 'ip' on 'netdev'. If one exists and can be
if (class->set_qos) {
if (!details) {
- static struct smap empty = SMAP_INITIALIZER(&empty);
+ static const struct smap empty = SMAP_INITIALIZER(&empty);
details = ∅
}
return class->set_qos(netdev, type, details);
netdev_dev->netdev_class = netdev_class;
netdev_dev->name = xstrdup(name);
netdev_dev->node = shash_add(&netdev_dev_shash, name, netdev_dev);
+ list_init(&netdev_dev->saved_flags_list);
}
/* Undoes the results of initialization.
char *name = netdev_dev->name;
ovs_assert(!netdev_dev->ref_cnt);
+ ovs_assert(list_is_empty(&netdev_dev->saved_flags_list));
shash_delete(&netdev_dev_shash, netdev_dev->node);
void
netdev_uninit(struct netdev *netdev, bool close)
{
- /* Restore flags that we changed, if any. */
- int error = restore_flags(netdev);
list_remove(&netdev->node);
- if (error) {
- VLOG_WARN("failed to restore network device flags on %s: %s",
- netdev_get_name(netdev), strerror(error));
- }
-
if (close) {
netdev_get_dev(netdev)->netdev_class->close(netdev);
}
}
-
/* Returns the class type of 'netdev'.
*
* The caller must not free the returned value. */
return netdev_get_dev(netdev)->netdev_class->type;
}
-
const char *
netdev_get_type_from_name(const char *name)
{
return netdev->netdev_dev;
}
\f
-/* Restore the network device flags on 'netdev' to those that were active
- * before we changed them. Returns 0 if successful, otherwise a positive
- * errno value.
- *
- * To avoid reentry, the caller must ensure that fatal signals are blocked. */
-static int
-restore_flags(struct netdev *netdev)
-{
- if (netdev->changed_flags) {
- enum netdev_flags restore = netdev->save_flags & netdev->changed_flags;
- enum netdev_flags old_flags;
- return netdev_get_dev(netdev)->netdev_class->update_flags(netdev,
- netdev->changed_flags & ~restore,
- restore, &old_flags);
- }
- return 0;
-}
-
-/* Close all netdevs on shutdown so they can do any needed cleanup such as
- * destroying devices, restoring flags, etc. */
+/* Restores all flags that have been saved with netdev_save_flags() and not yet
+ * restored with netdev_restore_flags(). */
static void
-close_all_netdevs(void *aux OVS_UNUSED)
+restore_all_flags(void *aux OVS_UNUSED)
{
- struct netdev *netdev, *next;
- LIST_FOR_EACH_SAFE(netdev, next, node, &netdev_list) {
- netdev_close(netdev);
+ struct shash_node *node;
+
+ SHASH_FOR_EACH (node, &netdev_dev_shash) {
+ struct netdev_dev *dev = node->data;
+ const struct netdev_saved_flags *sf;
+ enum netdev_flags saved_values;
+ enum netdev_flags saved_flags;
+
+ saved_values = saved_flags = 0;
+ LIST_FOR_EACH (sf, node, &dev->saved_flags_list) {
+ saved_flags |= sf->saved_flags;
+ saved_values &= ~sf->saved_flags;
+ saved_values |= sf->saved_flags & sf->saved_values;
+ }
+ if (saved_flags) {
+ enum netdev_flags old_flags;
+
+ dev->netdev_class->update_flags(dev,
+ saved_flags & saved_values,
+ saved_flags & ~saved_values,
+ &old_flags);
+ }
}
}
* The PORTING file at the top of the source tree has more information in the
* "Writing a netdev Provider" section. */
+struct netdev_saved_flags;
struct ofpbuf;
struct in_addr;
struct in6_addr;
struct smap;
struct sset;
-enum netdev_flags {
- NETDEV_UP = 0x0001, /* Device enabled? */
- NETDEV_PROMISC = 0x0002, /* Promiscuous mode? */
- NETDEV_LOOPBACK = 0x0004 /* This is a loopback device. */
-};
-
/* Network device statistics.
*
* Values of unsupported statistics are set to all-1-bits (UINT64_MAX). */
ovs_be16 dst_port;
+ bool ip_src_flow;
+ bool ip_dst_flow;
ovs_be32 ip_src;
ovs_be32 ip_dst;
bool netdev_features_is_full_duplex(enum netdev_features features);
int netdev_set_advertisements(struct netdev *, enum netdev_features advertise);
+/* Flags. */
+enum netdev_flags {
+ NETDEV_UP = 0x0001, /* Device enabled? */
+ NETDEV_PROMISC = 0x0002, /* Promiscuous mode? */
+ NETDEV_LOOPBACK = 0x0004 /* This is a loopback device. */
+};
+
+int netdev_get_flags(const struct netdev *, enum netdev_flags *);
+int netdev_set_flags(struct netdev *, enum netdev_flags,
+ struct netdev_saved_flags **);
+int netdev_turn_flags_on(struct netdev *, enum netdev_flags,
+ struct netdev_saved_flags **);
+int netdev_turn_flags_off(struct netdev *, enum netdev_flags,
+ struct netdev_saved_flags **);
+
+void netdev_restore_flags(struct netdev_saved_flags *);
+
/* TCP/IP stack interface. */
int netdev_get_in4(const struct netdev *, struct in_addr *address,
struct in_addr *netmask);
int netdev_get_status(const struct netdev *, struct smap *);
int netdev_arp_lookup(const struct netdev *, ovs_be32 ip, uint8_t mac[6]);
-int netdev_get_flags(const struct netdev *, enum netdev_flags *);
-int netdev_set_flags(struct netdev *, enum netdev_flags, bool permanent);
-int netdev_turn_flags_on(struct netdev *, enum netdev_flags, bool permanent);
-int netdev_turn_flags_off(struct netdev *, enum netdev_flags, bool permanent);
struct netdev *netdev_find_dev_by_in4(const struct in_addr *);
/* Statistics. */
/* Tunnel ID. */
nxm_put_64m(b, oxm ? OXM_OF_TUNNEL_ID : NXM_NX_TUN_ID,
- flow->tunnel.tun_id, match->wc.masks.tunnel.tun_id);
+ flow->tunnel.tun_id, match->wc.masks.tunnel.tun_id);
+
+ /* Other tunnel metadata. */
+ nxm_put_32m(b, NXM_NX_TUN_IPV4_SRC,
+ flow->tunnel.ip_src, match->wc.masks.tunnel.ip_src);
+ nxm_put_32m(b, NXM_NX_TUN_IPV4_DST,
+ flow->tunnel.ip_dst, match->wc.masks.tunnel.ip_dst);
/* Registers. */
for (i = 0; i < FLOW_N_REGS; i++) {
return "stp";
case SLOW_IN_BAND:
return "in_band";
+ case SLOW_BFD:
+ return "bfd";
case SLOW_CONTROLLER:
return "controller";
case SLOW_MATCH:
SLOW_LACP = 1 << 1, /* LACP packets need per-packet processing. */
SLOW_STP = 1 << 2, /* STP packets need per-packet processing. */
SLOW_IN_BAND = 1 << 3, /* In-band control needs every packet. */
+ SLOW_BFD = 1 << 4, /* BFD packets need per-packet processing. */
- /* Mutually exclusive with SLOW_CFM, SLOW_LACP, SLOW_STP.
+ /* Mutually exclusive with SLOW_BFD, SLOW_CFM, SLOW_LACP, SLOW_STP.
* Could possibly appear with SLOW_IN_BAND. */
- SLOW_CONTROLLER = 1 << 4, /* Packets must go to OpenFlow controller. */
+ SLOW_CONTROLLER = 1 << 5, /* Packets must go to OpenFlow controller. */
/* This can appear on its own, or, theoretically at least, along with any
* other combination of reasons. */
- SLOW_MATCH = 1 << 5, /* Datapath can't match specifically enough. */
+ SLOW_MATCH = 1 << 6, /* Datapath can't match specifically enough. */
};
#endif /* odp-util.h */
return nxm_reg_move_check(ofpact_get_REG_MOVE(a), flow);
case OFPACT_REG_LOAD:
- if (*dl_type != flow->dl_type) {
- struct flow updated_flow = *flow;
- updated_flow.dl_type = *dl_type;
- return nxm_reg_load_check(ofpact_get_REG_LOAD(a), &updated_flow);
- } else {
- return nxm_reg_load_check(ofpact_get_REG_LOAD(a), flow);
- }
+ return nxm_reg_load_check(ofpact_get_REG_LOAD(a), flow);
case OFPACT_STACK_PUSH:
return nxm_stack_push_check(ofpact_get_STACK_PUSH(a), flow);
{
const struct ofpact *a;
ovs_be16 dl_type = flow->dl_type;
+ struct flow updated_flow;
OFPACT_FOR_EACH (a, ofpacts, ofpacts_len) {
- enum ofperr error = ofpact_check__(a, flow, max_ports, &dl_type);
+ enum ofperr error;
+
+ /* If the dl_type was changed by an action then its new value
+ * should be present in the flow passed to ofpact_check__(). */
+ if (flow->dl_type != dl_type) {
+ /* Only copy flow at most once */
+ if (flow != &updated_flow) {
+ updated_flow = *flow;
+ flow = &updated_flow;
+ }
+ updated_flow.dl_type = dl_type;
+ }
+
+ error = ofpact_check__(a, flow, max_ports, &dl_type);
if (error) {
return error;
}
}
static void
-parse_named_action(enum ofputil_action_code code, const struct flow *flow,
+parse_named_action(enum ofputil_action_code code,
char *arg, struct ofpbuf *ofpacts)
{
struct ofpact_tunnel *tunnel;
NOT_REACHED();
case OFPUTIL_NXAST_LEARN:
- learn_parse(arg, flow, ofpacts);
+ learn_parse(arg, ofpacts);
break;
case OFPUTIL_NXAST_EXIT:
}
static bool
-str_to_ofpact__(const struct flow *flow, char *pos, char *act, char *arg,
+str_to_ofpact__(char *pos, char *act, char *arg,
struct ofpbuf *ofpacts, int n_actions)
{
int code = ofputil_action_code_from_name(act);
if (code >= 0) {
- parse_named_action(code, flow, arg, ofpacts);
+ parse_named_action(code, arg, ofpacts);
} else if (!strcasecmp(act, "drop")) {
if (n_actions) {
ovs_fatal(0, "Drop actions must not be preceded by other "
}
static void
-str_to_ofpacts(const struct flow *flow, char *str, struct ofpbuf *ofpacts)
+str_to_ofpacts(char *str, struct ofpbuf *ofpacts)
{
char *pos, *act, *arg;
enum ofperr error;
pos = str;
n_actions = 0;
while (ofputil_parse_key_value(&pos, &act, &arg)) {
- if (!str_to_ofpact__(flow, pos, act, arg, ofpacts, n_actions)) {
+ if (!str_to_ofpact__(pos, act, arg, ofpacts, n_actions)) {
break;
}
n_actions++;
}
static void
-str_to_inst_ofpacts(const struct flow *flow, char *str, struct ofpbuf *ofpacts)
+str_to_inst_ofpacts(char *str, struct ofpbuf *ofpacts)
{
char *pos, *inst, *arg;
int type;
while (ofputil_parse_key_value(&pos, &inst, &arg)) {
type = ofpact_instruction_type_from_name(inst);
if (type < 0) {
- if (!str_to_ofpact__(flow, pos, inst, arg, ofpacts, n_actions)) {
+ if (!str_to_ofpact__(pos, inst, arg, ofpacts, n_actions)) {
break;
}
}
if (fields & F_ACTIONS) {
struct ofpbuf ofpacts;
+ enum ofperr err;
ofpbuf_init(&ofpacts, 32);
- str_to_inst_ofpacts(&fm->match.flow, act_str, &ofpacts);
+ str_to_inst_ofpacts(act_str, &ofpacts);
fm->ofpacts_len = ofpacts.size;
fm->ofpacts = ofpbuf_steal_data(&ofpacts);
+
+ err = ofpacts_check(fm->ofpacts, fm->ofpacts_len, &fm->match.flow,
+ OFPP_MAX);
+ if (err) {
+ exit(EXIT_FAILURE);
+ }
+
} else {
fm->ofpacts_len = 0;
fm->ofpacts = NULL;
parse_ofpacts(const char *s_, struct ofpbuf *ofpacts)
{
char *s = xstrdup(s_);
- str_to_ofpacts(NULL, s, ofpacts);
+ str_to_ofpacts(s, ofpacts);
free(s);
}
ds_put_format(string, " tun_id=0x%"PRIx64, ntohll(pin.fmd.tun_id));
}
+ if (pin.fmd.tun_src != htonl(0)) {
+ ds_put_format(string, " tun_src="IP_FMT, IP_ARGS(pin.fmd.tun_src));
+ }
+
+ if (pin.fmd.tun_dst != htonl(0)) {
+ ds_put_format(string, " tun_dst="IP_FMT, IP_ARGS(pin.fmd.tun_dst));
+ }
+
if (pin.fmd.metadata != htonll(0)) {
ds_put_format(string, " metadata=0x%"PRIx64, ntohll(pin.fmd.metadata));
}
return true;
}
-static bool
-tun_parms_fully_wildcarded(const struct flow_wildcards *wc)
-{
- return (!wc->masks.tunnel.ip_src &&
- !wc->masks.tunnel.ip_dst &&
- !wc->masks.tunnel.ip_ttl &&
- !wc->masks.tunnel.ip_tos &&
- !wc->masks.tunnel.flags);
-}
-
/* Returns a bit-mask of ofputil_protocols that can be used for sending 'match'
* to a switch (e.g. to add or remove a flow). Only NXM can handle tunnel IDs,
* registers, or fixing the Ethernet multicast bit. Otherwise, it's better to
BUILD_ASSERT_DECL(FLOW_WC_SEQ == 20);
- /* tunnel params other than tun_id can't be sent in a flow_mod */
- if (!tun_parms_fully_wildcarded(wc)) {
+ /* These tunnel params can't be sent in a flow_mod */
+ if (wc->masks.tunnel.ip_ttl
+ || wc->masks.tunnel.ip_tos || wc->masks.tunnel.flags) {
return OFPUTIL_P_NONE;
}
| OFPUTIL_P_OF13_OXM;
}
- /* NXM and OXM support matching tun_id. */
- if (wc->masks.tunnel.tun_id != htonll(0)) {
+ /* NXM and OXM support matching tun_id, tun_src, and tun_dst. */
+ if (wc->masks.tunnel.tun_id != htonll(0)
+ || wc->masks.tunnel.ip_src != htonl(0)
+ || wc->masks.tunnel.ip_dst != htonl(0)) {
return OFPUTIL_P_OF10_NXM_ANY | OFPUTIL_P_OF12_OXM
| OFPUTIL_P_OF13_OXM;
}
pin->fmd.in_port = match->flow.in_port;
pin->fmd.tun_id = match->flow.tunnel.tun_id;
+ pin->fmd.tun_src = match->flow.tunnel.ip_src;
+ pin->fmd.tun_dst = match->flow.tunnel.ip_dst;
pin->fmd.metadata = match->flow.metadata;
memcpy(pin->fmd.regs, match->flow.regs, sizeof pin->fmd.regs);
}
if (pin->fmd.tun_id != htonll(0)) {
match_set_tun_id(match, pin->fmd.tun_id);
}
+ if (pin->fmd.tun_src != htonl(0)) {
+ match_set_tun_src(match, pin->fmd.tun_src);
+ }
+ if (pin->fmd.tun_dst != htonl(0)) {
+ match_set_tun_dst(match, pin->fmd.tun_dst);
+ }
if (pin->fmd.metadata != htonll(0)) {
match_set_metadata(match, pin->fmd.metadata);
}
int
ofputil_action_code_from_name(const char *name)
{
- static const char *names[OFPUTIL_N_ACTIONS] = {
+ static const char *const names[OFPUTIL_N_ACTIONS] = {
NULL,
#define OFPAT10_ACTION(ENUM, STRUCT, NAME) NAME,
#define OFPAT11_ACTION(ENUM, STRUCT, EXTENSIBLE, NAME) NAME,
#include "ofp-util.def"
};
- const char **p;
+ const char *const *p;
for (p = names; p < &names[ARRAY_SIZE(names)]; p++) {
if (*p && !strcasecmp(name, *p)) {
* will be for Open Flow version 'ofp_version'. Returns message
* as a struct ofpbuf. Returns encoded message on success, NULL on error */
struct ofpbuf *
-ofputil_encode_dump_ports_request(enum ofp_version ofp_version, int16_t port)
+ofputil_encode_dump_ports_request(enum ofp_version ofp_version, uint16_t port)
{
struct ofpbuf *request;
};
struct ofpbuf *ofputil_encode_dump_ports_request(enum ofp_version ofp_version,
- int16_t port);
+ uint16_t port);
void ofputil_append_port_stat(struct list *replies,
const struct ofputil_port_stats *ops);
size_t ofputil_count_port_stats(const struct ofp_header *);
/* Initializes 'b' as an empty ofpbuf that contains the 'allocated' bytes of
* memory starting at 'base'. 'base' should point to a buffer on the stack.
* (Nothing actually relies on 'base' being allocated on the stack. It could
- * be static or malloc()'d memory. But stack space is the most common usen
+ * be static or malloc()'d memory. But stack space is the most common use
* case.)
*
* 'base' should be appropriately aligned. Using an array of uint32_t or
/*
- * Copyright (c) 2008, 2009, 2011 Nicira, Inc.
+ * Copyright (c) 2008, 2009, 2011, 2013 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
int process_run(char **argv,
const int *keep_fds, size_t n_keep_fds,
- const int *null_fds, size_t n_null_fds,
+ const int *null_fds, size_t n_null_fds,
int *status);
pid_t process_pid(const struct process *);
static void report_error(struct rconn *, int error);
static void disconnect(struct rconn *, int error);
static void flush_queue(struct rconn *);
+static void close_monitor(struct rconn *, size_t idx, int retval);
static void copy_to_monitor(struct rconn *, const struct ofpbuf *);
static bool is_connected_state(enum state);
static bool is_admitted_msg(const struct ofpbuf *);
if (rc->vconn) {
vconn_run(rc->vconn);
}
- for (i = 0; i < rc->n_monitors; i++) {
+ for (i = 0; i < rc->n_monitors; ) {
+ struct ofpbuf *msg;
+ int retval;
+
vconn_run(rc->monitors[i]);
+
+ /* Drain any stray message that came in on the monitor connection. */
+ retval = vconn_recv(rc->monitors[i], &msg);
+ if (!retval) {
+ ofpbuf_delete(msg);
+ } else if (retval != EAGAIN) {
+ close_monitor(rc, i, retval);
+ continue;
+ }
+ i++;
}
do {
}
for (i = 0; i < rc->n_monitors; i++) {
vconn_run_wait(rc->monitors[i]);
+ vconn_recv_wait(rc->monitors[i]);
}
timeo = timeout(rc);
rc->state_entered = time_now();
}
+static void
+close_monitor(struct rconn *rc, size_t idx, int retval)
+{
+ VLOG_DBG("%s: closing monitor connection to %s: %s",
+ rconn_get_name(rc), vconn_get_name(rc->monitors[idx]),
+ ovs_retval_to_string(retval));
+ rc->monitors[idx] = rc->monitors[--rc->n_monitors];
+}
+
static void
copy_to_monitor(struct rconn *rc, const struct ofpbuf *b)
{
if (!retval) {
clone = NULL;
} else if (retval != EAGAIN) {
- VLOG_DBG("%s: closing monitor connection to %s: %s",
- rconn_get_name(rc), vconn_get_name(vconn),
- strerror(retval));
- rc->monitors[i] = rc->monitors[--rc->n_monitors];
+ close_monitor(rc, i, retval);
continue;
}
i++;
freeaddrinfo(result);
return 0;
+#ifdef EAI_ADDRFAMILY
case EAI_ADDRFAMILY:
+#endif
case EAI_NONAME:
case EAI_SERVICE:
return ENOENT;
case EAI_MEMORY:
return ENOMEM;
+#ifdef EAI_NODATA
case EAI_NODATA:
return ENXIO;
+#endif
case EAI_SYSTEM:
return errno;
char *unlink_path;
};
-static struct pstream_class fd_pstream_class;
+static const struct pstream_class fd_pstream_class;
static struct fd_pstream *
fd_pstream_cast(struct pstream *pstream)
return 0;
}
-static struct pstream_class fd_pstream_class = {
+static const struct pstream_class fd_pstream_class = {
"pstream",
false,
NULL,
ds_clear(line);
}
-static void
-table_format_timestamp__(char *s, size_t size)
+static char *
+table_format_timestamp__(void)
{
- time_t now = time_wall();
- struct tm tm;
- strftime(s, size, "%Y-%m-%d %H:%M:%S", gmtime_r(&now, &tm));
+ return xastrftime("%Y-%m-%d %H:%M:%S", time_wall(), true);
}
static void
table_print_timestamp__(const struct table *table)
{
if (table->timestamp) {
- char s[32];
-
- table_format_timestamp__(s, sizeof s);
+ char *s = table_format_timestamp__();
puts(s);
+ free(s);
}
}
json_object_put_string(json, "caption", table->caption);
}
if (table->timestamp) {
- char s[32];
-
- table_format_timestamp__(s, sizeof s);
+ char *s = table_format_timestamp__();
json_object_put_string(json, "time", s);
+ free(s);
}
headings = json_array_create_empty();
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
+#include <stdlib.h>
#include <string.h>
#include "compiler.h"
*
* This structure should be treated as opaque by vconn implementations. */
struct vconn {
- struct vconn_class *class;
+ const struct vconn_class *class;
int state;
int error;
char *name;
};
-void vconn_init(struct vconn *, struct vconn_class *, int connect_status,
+void vconn_init(struct vconn *, const struct vconn_class *, int connect_status,
const char *name, uint32_t allowed_versions);
void vconn_free_data(struct vconn *vconn);
void vconn_set_remote_ip(struct vconn *, ovs_be32 remote_ip);
*
* This structure should be treated as opaque by vconn implementations. */
struct pvconn {
- struct pvconn_class *class;
+ const struct pvconn_class *class;
char *name;
uint32_t allowed_versions;
};
-void pvconn_init(struct pvconn *pvconn, struct pvconn_class *class,
+void pvconn_init(struct pvconn *pvconn, const struct pvconn_class *class,
const char *name, uint32_t allowed_versions);
static inline void pvconn_assert_class(const struct pvconn *pvconn,
const struct pvconn_class *class)
};
/* Active and passive vconn classes. */
-extern struct vconn_class tcp_vconn_class;
-extern struct pvconn_class ptcp_pvconn_class;
-extern struct vconn_class unix_vconn_class;
-extern struct pvconn_class punix_pvconn_class;
+extern const struct vconn_class tcp_vconn_class;
+extern const struct pvconn_class ptcp_pvconn_class;
+extern const struct vconn_class unix_vconn_class;
+extern const struct pvconn_class punix_pvconn_class;
#ifdef HAVE_OPENSSL
-extern struct vconn_class ssl_vconn_class;
-extern struct pvconn_class pssl_pvconn_class;
+extern const struct vconn_class ssl_vconn_class;
+extern const struct pvconn_class pssl_pvconn_class;
#endif
#endif /* vconn-provider.h */
int n_packets;
};
-static struct vconn_class stream_vconn_class;
+static const struct vconn_class stream_vconn_class;
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(10, 25);
struct pstream *pstream;
};
-static struct pvconn_class pstream_pvconn_class;
+static const struct pvconn_class pstream_pvconn_class;
static struct pvconn_pstream *
pvconn_pstream_cast(struct pvconn *pvconn)
pvconn_pstream_wait \
}
-static struct vconn_class stream_vconn_class = STREAM_INIT("stream");
-static struct pvconn_class pstream_pvconn_class = PSTREAM_INIT("pstream");
+static const struct vconn_class stream_vconn_class = STREAM_INIT("stream");
+static const struct pvconn_class pstream_pvconn_class = PSTREAM_INIT("pstream");
-struct vconn_class tcp_vconn_class = STREAM_INIT("tcp");
-struct pvconn_class ptcp_pvconn_class = PSTREAM_INIT("ptcp");
+const struct vconn_class tcp_vconn_class = STREAM_INIT("tcp");
+const struct pvconn_class ptcp_pvconn_class = PSTREAM_INIT("ptcp");
-struct vconn_class unix_vconn_class = STREAM_INIT("unix");
-struct pvconn_class punix_pvconn_class = PSTREAM_INIT("punix");
+const struct vconn_class unix_vconn_class = STREAM_INIT("unix");
+const struct pvconn_class punix_pvconn_class = PSTREAM_INIT("punix");
#ifdef HAVE_OPENSSL
-struct vconn_class ssl_vconn_class = STREAM_INIT("ssl");
-struct pvconn_class pssl_pvconn_class = PSTREAM_INIT("pssl");
+const struct vconn_class ssl_vconn_class = STREAM_INIT("ssl");
+const struct pvconn_class pssl_pvconn_class = PSTREAM_INIT("pssl");
#endif
VCS_DISCONNECTED /* Connection failed or connection closed. */
};
-static struct vconn_class *vconn_classes[] = {
+static const struct vconn_class *vconn_classes[] = {
&tcp_vconn_class,
&unix_vconn_class,
#ifdef HAVE_OPENSSL
#endif
};
-static struct pvconn_class *pvconn_classes[] = {
+static const struct pvconn_class *pvconn_classes[] = {
&ptcp_pvconn_class,
&punix_pvconn_class,
#ifdef HAVE_OPENSSL
size_t i;
for (i = 0; i < ARRAY_SIZE(vconn_classes); i++) {
- struct vconn_class *class = vconn_classes[i];
+ const struct vconn_class *class = vconn_classes[i];
ovs_assert(class->name != NULL);
ovs_assert(class->open != NULL);
if (class->close || class->recv || class->send
}
for (i = 0; i < ARRAY_SIZE(pvconn_classes); i++) {
- struct pvconn_class *class = pvconn_classes[i];
+ const struct pvconn_class *class = pvconn_classes[i];
ovs_assert(class->name != NULL);
ovs_assert(class->listen != NULL);
if (class->close || class->accept || class->wait) {
* a null pointer into '*classp' if 'name' is in the wrong form or if no such
* class exists. */
static int
-vconn_lookup_class(const char *name, struct vconn_class **classp)
+vconn_lookup_class(const char *name, const struct vconn_class **classp)
{
size_t prefix_len;
size_t i;
for (i = 0; i < ARRAY_SIZE(vconn_classes); i++) {
- struct vconn_class *class = vconn_classes[i];
+ const struct vconn_class *class = vconn_classes[i];
if (strlen(class->name) == prefix_len
&& !memcmp(class->name, name, prefix_len)) {
*classp = class;
int
vconn_verify_name(const char *name)
{
- struct vconn_class *class;
+ const struct vconn_class *class;
return vconn_lookup_class(name, &class);
}
vconn_open(const char *name, uint32_t allowed_versions, uint8_t dscp,
struct vconn **vconnp)
{
- struct vconn_class *class;
+ const struct vconn_class *class;
struct vconn *vconn;
char *suffix_copy;
int error;
* a null pointer into '*classp' if 'name' is in the wrong form or if no such
* class exists. */
static int
-pvconn_lookup_class(const char *name, struct pvconn_class **classp)
+pvconn_lookup_class(const char *name, const struct pvconn_class **classp)
{
size_t prefix_len;
size_t i;
for (i = 0; i < ARRAY_SIZE(pvconn_classes); i++) {
- struct pvconn_class *class = pvconn_classes[i];
+ const struct pvconn_class *class = pvconn_classes[i];
if (strlen(class->name) == prefix_len
&& !memcmp(class->name, name, prefix_len)) {
*classp = class;
int
pvconn_verify_name(const char *name)
{
- struct pvconn_class *class;
+ const struct pvconn_class *class;
return pvconn_lookup_class(name, &class);
}
pvconn_open(const char *name, uint32_t allowed_versions, uint8_t dscp,
struct pvconn **pvconnp)
{
- struct pvconn_class *class;
+ const struct pvconn_class *class;
struct pvconn *pvconn;
char *suffix_copy;
int error;
*
* The caller retains ownership of 'name'. */
void
-vconn_init(struct vconn *vconn, struct vconn_class *class, int connect_status,
- const char *name, uint32_t allowed_versions)
+vconn_init(struct vconn *vconn, const struct vconn_class *class,
+ int connect_status, const char *name, uint32_t allowed_versions)
{
memset(vconn, 0, sizeof *vconn);
vconn->class = class;
}
void
-pvconn_init(struct pvconn *pvconn, struct pvconn_class *class,
+pvconn_init(struct pvconn *pvconn, const struct pvconn_class *class,
const char *name, uint32_t allowed_versions)
{
pvconn->class = class;
#define ovs_assert use_assert_instead_of_ovs_assert_in_this_module
/* Name for each logging level. */
-static const char *level_names[VLL_N_LEVELS] = {
+static const char *const level_names[VLL_N_LEVELS] = {
#define VLOG_LEVEL(NAME, SYSLOG_LEVEL) #NAME,
VLOG_LEVELS
#undef VLOG_LEVEL
};
/* Syslog value for each logging level. */
-static int syslog_levels[VLL_N_LEVELS] = {
+static const int syslog_levels[VLL_N_LEVELS] = {
#define VLOG_LEVEL(NAME, SYSLOG_LEVEL) SYSLOG_LEVEL,
VLOG_LEVELS
#undef VLOG_LEVEL
/* Searches the 'n_names' in 'names'. Returns the index of a match for
* 'target', or 'n_names' if no name matches. */
static size_t
-search_name_array(const char *target, const char **names, size_t n_names)
+search_name_array(const char *target, const char *const *names, size_t n_names)
{
size_t i;
now = time_wall();
if (now < 0) {
- struct tm tm;
- char s[128];
-
- gmtime_r(&now, &tm);
- strftime(s, sizeof s, "%a, %d %b %Y %H:%M:%S", &tm);
+ char *s = xastrftime("%a, %d %b %Y %H:%M:%S", now, true);
VLOG_ERR("current time is negative: %s (%ld)", s, (long int) now);
+ free(s);
}
unixctl_command_register(
break;
case 'd':
p = fetch_braces(p, "%Y-%m-%d %H:%M:%S", tmp, sizeof tmp);
- ds_put_strftime(s, tmp, false);
+ ds_put_strftime(s, tmp, time_wall(), false);
break;
case 'D':
p = fetch_braces(p, "%Y-%m-%d %H:%M:%S", tmp, sizeof tmp);
- ds_put_strftime(s, tmp, true);
+ ds_put_strftime(s, tmp, time_wall(), true);
break;
case 'm':
/* Format user-supplied log message and trim trailing new-lines. */
#include <errno.h>
+#include "bfd.h"
#include "bond.h"
#include "bundle.h"
#include "byte-order.h"
* this flow when actions change header fields. */
struct flow flow;
+ /* Flow at the last commit. */
+ struct flow base_flow;
+
+ /* Tunnel IP destination address as received. This is stored separately
+ * as the base_flow.tunnel is cleared on init to reflect the datapath
+ * behavior. Used to make sure not to send tunneled output to ourselves,
+ * which might lead to an infinite loop. This could happen easily
+ * if a tunnel is marked as 'ip_remote=flow', and the flow does not
+ * actually set the tun_dst field. */
+ ovs_be32 orig_tunnel_ip_dst;
+
/* stack for the push and pop actions.
* Each stack element is of the type "union mf_subvalue". */
struct ofpbuf stack;
int recurse; /* Recursion level, via xlate_table_action. */
bool max_resubmit_trigger; /* Recursed too deeply during translation. */
- struct flow base_flow; /* Flow at the last commit. */
uint32_t orig_skb_priority; /* Priority when packet arrived. */
uint8_t table_id; /* OpenFlow table ID where flow was found. */
uint32_t sflow_n_outputs; /* Number of output ports. */
* This member should be removed when the VLAN splinters feature is no
* longer needed. */
ovs_be16 vlan_tci;
-
- /* If received on a tunnel, the IP TOS value of the tunnel. */
- uint8_t tunnel_ip_tos;
};
static void action_xlate_ctx_init(struct action_xlate_ctx *,
struct ofbundle *bundle; /* Bundle that contains this port, if any. */
struct list bundle_node; /* In struct ofbundle's "ports" list. */
struct cfm *cfm; /* Connectivity Fault Management, if any. */
+ struct bfd *bfd; /* BFD, if any. */
tag_type tag; /* Tag associated with this port. */
bool may_enable; /* May be enabled in bonds. */
long long int carrier_seq; /* Carrier status changes. */
static struct ofport_dpif *
ofport_dpif_cast(const struct ofport *ofport)
{
- ovs_assert(ofport->ofproto->ofproto_class == &ofproto_dpif_class);
return ofport ? CONTAINER_OF(ofport, struct ofport_dpif, up) : NULL;
}
ofproto->backer->need_revalidate = REV_RECONFIGURE;
port->bundle = NULL;
port->cfm = NULL;
+ port->bfd = NULL;
port->tag = tag_create_random();
port->may_enable = true;
port->stp_port = NULL;
return false;
}
}
+
+static int
+set_bfd(struct ofport *ofport_, const struct smap *cfg)
+{
+ struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport_->ofproto);
+ struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
+ struct bfd *old;
+
+ old = ofport->bfd;
+ ofport->bfd = bfd_configure(old, netdev_get_name(ofport->up.netdev), cfg);
+ if (ofport->bfd != old) {
+ ofproto->backer->need_revalidate = REV_RECONFIGURE;
+ }
+
+ return 0;
+}
+
+static int
+get_bfd_status(struct ofport *ofport_, struct smap *smap)
+{
+ struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
+
+ if (ofport->bfd) {
+ bfd_get_status(ofport->bfd, smap);
+ return 0;
+ } else {
+ return ENOENT;
+ }
+}
\f
/* Spanning Tree. */
send_packet(ofport, &packet);
ofpbuf_uninit(&packet);
}
+
+ if (ofport->bfd && bfd_should_send_packet(ofport->bfd)) {
+ struct ofpbuf packet;
+
+ ofpbuf_init(&packet, 0);
+ bfd_put_packet(ofport->bfd, &packet, ofport->up.pp.hw_addr);
+ send_packet(ofport, &packet);
+ ofpbuf_uninit(&packet);
+ }
}
static void
}
}
+ if (ofport->bfd) {
+ bfd_run(ofport->bfd);
+ enable = enable && bfd_forwarding(ofport->bfd);
+ }
+
if (ofport->bundle) {
enable = enable && lacp_slave_may_enable(ofport->bundle->lacp, ofport);
if (carrier_changed) {
if (ofport->cfm) {
cfm_wait(ofport->cfm);
}
+
+ if (ofport->bfd) {
+ bfd_wait(ofport->bfd);
+ }
}
static int
return error;
}
-/* Account packets for LOCAL port. */
-static void
-ofproto_update_local_port_stats(const struct ofproto *ofproto_,
- size_t tx_size, size_t rx_size)
-{
- struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
-
- if (rx_size) {
- ofproto->stats.rx_packets++;
- ofproto->stats.rx_bytes += rx_size;
- }
- if (tx_size) {
- ofproto->stats.tx_packets++;
- ofproto->stats.tx_bytes += tx_size;
- }
-}
-
struct port_dump_state {
uint32_t bucket;
uint32_t offset;
cfm_process_heartbeat(ofport->cfm, packet);
}
return SLOW_CFM;
+ } else if (ofport->bfd && bfd_should_process_flow(flow)) {
+ if (packet) {
+ bfd_process_packet(ofport->bfd, flow, packet);
+ }
+ return SLOW_BFD;
} else if (ofport->bundle && ofport->bundle->lacp
&& flow->dl_type == htons(ETH_TYPE_LACP)) {
if (packet) {
* to the VLAN TCI with which the packet was really received, that is, the
* actual VLAN TCI extracted by odp_flow_key_to_flow(). (This differs from
* the value returned in flow->vlan_tci only for packets received on
- * VLAN splinters.) Also, if received on an IP tunnel, sets
- * 'initial_vals->tunnel_ip_tos' to the tunnel's IP TOS.
+ * VLAN splinters.)
*
* Similarly, this function also includes some logic to help with tunnels. It
* may modify 'flow' as necessary to make the tunneling implementation
if (initial_vals) {
initial_vals->vlan_tci = flow->vlan_tci;
- initial_vals->tunnel_ip_tos = flow->tunnel.ip_tos;
}
if (odp_in_port) {
*odp_in_port = flow->in_port;
}
- if (tnl_port_should_receive(flow)) {
- const struct ofport *ofport = tnl_port_receive(flow);
- if (!ofport) {
- flow->in_port = OFPP_NONE;
- goto exit;
- }
- port = ofport_dpif_cast(ofport);
-
- /* We can't reproduce 'key' from 'flow'. */
- fitness = fitness == ODP_FIT_PERFECT ? ODP_FIT_TOO_MUCH : fitness;
+ port = (tnl_port_should_receive(flow)
+ ? ofport_dpif_cast(tnl_port_receive(flow))
+ : odp_port_to_ofport(backer, flow->in_port));
+ flow->in_port = port ? port->up.ofp_port : OFPP_NONE;
+ if (!port) {
+ goto exit;
+ }
- /* XXX: Since the tunnel module is not scoped per backer, it's
- * theoretically possible that we'll receive an ofport belonging to an
- * entirely different datapath. In practice, this can't happen because
- * no platforms has two separate datapaths which each support
- * tunneling. */
- ovs_assert(ofproto_dpif_cast(port->up.ofproto)->backer == backer);
- } else {
- port = odp_port_to_ofport(backer, flow->in_port);
- if (!port) {
- flow->in_port = OFPP_NONE;
- goto exit;
- }
+ /* XXX: Since the tunnel module is not scoped per backer, for a tunnel port
+ * it's theoretically possible that we'll receive an ofport belonging to an
+ * entirely different datapath. In practice, this can't happen because no
+ * platforms has two separate datapaths which each support tunneling. */
+ ovs_assert(ofproto_dpif_cast(port->up.ofproto)->backer == backer);
- flow->in_port = port->up.ofp_port;
- if (vsp_adjust_flow(ofproto_dpif_cast(port->up.ofproto), flow)) {
- if (packet) {
- /* Make the packet resemble the flow, so that it gets sent to
- * an OpenFlow controller properly, so that it looks correct
- * for sFlow, and so that flow_extract() will get the correct
- * vlan_tci if it is called on 'packet'.
- *
- * The allocated space inside 'packet' probably also contains
- * 'key', that is, both 'packet' and 'key' are probably part of
- * a struct dpif_upcall (see the large comment on that
- * structure definition), so pushing data on 'packet' is in
- * general not a good idea since it could overwrite 'key' or
- * free it as a side effect. However, it's OK in this special
- * case because we know that 'packet' is inside a Netlink
- * attribute: pushing 4 bytes will just overwrite the 4-byte
- * "struct nlattr", which is fine since we don't need that
- * header anymore. */
- eth_push_vlan(packet, flow->vlan_tci);
- }
- /* We can't reproduce 'key' from 'flow'. */
- fitness = fitness == ODP_FIT_PERFECT ? ODP_FIT_TOO_MUCH : fitness;
+ if (vsp_adjust_flow(ofproto_dpif_cast(port->up.ofproto), flow)) {
+ if (packet) {
+ /* Make the packet resemble the flow, so that it gets sent to
+ * an OpenFlow controller properly, so that it looks correct
+ * for sFlow, and so that flow_extract() will get the correct
+ * vlan_tci if it is called on 'packet'.
+ *
+ * The allocated space inside 'packet' probably also contains
+ * 'key', that is, both 'packet' and 'key' are probably part of
+ * a struct dpif_upcall (see the large comment on that
+ * structure definition), so pushing data on 'packet' is in
+ * general not a good idea since it could overwrite 'key' or
+ * free it as a side effect. However, it's OK in this special
+ * case because we know that 'packet' is inside a Netlink
+ * attribute: pushing 4 bytes will just overwrite the 4-byte
+ * "struct nlattr", which is fine since we don't need that
+ * header anymore. */
+ eth_push_vlan(packet, flow->vlan_tci);
}
+ /* We can't reproduce 'key' from 'flow'. */
+ fitness = fitness == ODP_FIT_PERFECT ? ODP_FIT_TOO_MUCH : fitness;
}
error = 0;
&ofproto->subfacets) {
long long int cutoff;
- cutoff = (subfacet->slow & (SLOW_CFM | SLOW_LACP | SLOW_STP)
+ cutoff = (subfacet->slow & (SLOW_CFM | SLOW_BFD | SLOW_LACP | SLOW_STP)
? special_cutoff
: normal_cutoff);
if (subfacet->used < cutoff) {
netflow_expire(ofproto->netflow, &facet->nf_flow, &expired);
}
- facet->rule->packet_count += facet->packet_count;
- facet->rule->byte_count += facet->byte_count;
-
/* Reset counters to prevent double counting if 'facet' ever gets
* reinstalled. */
facet_reset_counters(facet);
facet->prev_byte_count = facet->byte_count;
facet->prev_used = facet->used;
+ rule_credit_stats(facet->rule, &stats);
flow_push_stats(facet, &stats);
update_mirror_stats(ofproto_dpif_cast(facet->rule->up.ofproto),
rule_get_stats(struct rule *rule_, uint64_t *packets, uint64_t *bytes)
{
struct rule_dpif *rule = rule_dpif_cast(rule_);
- struct facet *facet;
/* push_all_stats() can handle flow misses which, when using the learn
* action, can cause rules to be added and deleted. This can corrupt our
* in facets. This counts, for example, facets that have expired. */
*packets = rule->packet_count;
*bytes = rule->byte_count;
-
- /* Add any statistics that are tracked by facets. This includes
- * statistical data recently updated by ofproto_update_stats() as well as
- * stats for packets that were executed "by hand" via dpif_execute(). */
- LIST_FOR_EACH (facet, list_node, &rule->facets) {
- *packets += facet->packet_count;
- *bytes += facet->byte_count;
- }
}
static void
rule_credit_stats(rule, &stats);
initial_vals.vlan_tci = flow->vlan_tci;
- initial_vals.tunnel_ip_tos = flow->tunnel.ip_tos;
ofpbuf_use_stub(&odp_actions, odp_actions_stub, sizeof odp_actions_stub);
action_xlate_ctx_init(&ctx, ofproto, flow, &initial_vals,
rule, stats.tcp_flags, packet);
static int
send_packet(const struct ofport_dpif *ofport, struct ofpbuf *packet)
{
- const struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
+ struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
uint64_t odp_actions_stub[1024 / 8];
struct ofpbuf key, odp_actions;
+ struct dpif_flow_stats stats;
struct odputil_keybuf keybuf;
- uint32_t odp_port;
+ struct ofpact_output output;
+ struct action_xlate_ctx ctx;
struct flow flow;
int error;
- flow_extract(packet, 0, 0, NULL, OFPP_LOCAL, &flow);
- if (netdev_vport_is_patch(ofport->up.netdev)) {
- struct ofproto_dpif *peer_ofproto;
- struct dpif_flow_stats stats;
- struct ofport_dpif *peer;
- struct rule_dpif *rule;
-
- peer = ofport_get_peer(ofport);
- if (!peer) {
- return ENODEV;
- }
-
- dpif_flow_stats_extract(&flow, packet, time_msec(), &stats);
- netdev_vport_inc_tx(ofport->up.netdev, &stats);
- netdev_vport_inc_rx(peer->up.netdev, &stats);
-
- flow.in_port = peer->up.ofp_port;
- peer_ofproto = ofproto_dpif_cast(peer->up.ofproto);
- rule = rule_dpif_lookup(peer_ofproto, &flow);
- rule_dpif_execute(rule, &flow, packet);
-
- return 0;
- }
-
ofpbuf_use_stub(&odp_actions, odp_actions_stub, sizeof odp_actions_stub);
+ ofpbuf_use_stack(&key, &keybuf, sizeof keybuf);
- if (ofport->tnl_port) {
- struct dpif_flow_stats stats;
-
- odp_port = tnl_port_send(ofport->tnl_port, &flow);
- if (odp_port == OVSP_NONE) {
- return ENODEV;
- }
+ /* Use OFPP_NONE as the in_port to avoid special packet processing. */
+ flow_extract(packet, 0, 0, NULL, OFPP_NONE, &flow);
+ odp_flow_key_from_flow(&key, &flow, ofp_port_to_odp_port(ofproto,
+ OFPP_LOCAL));
+ dpif_flow_stats_extract(&flow, packet, time_msec(), &stats);
- dpif_flow_stats_extract(&flow, packet, time_msec(), &stats);
- netdev_vport_inc_tx(ofport->up.netdev, &stats);
- odp_put_tunnel_action(&flow.tunnel, &odp_actions);
- odp_put_skb_mark_action(flow.skb_mark, &odp_actions);
- } else {
- odp_port = vsp_realdev_to_vlandev(ofproto, ofport->odp_port,
- flow.vlan_tci);
- if (odp_port != ofport->odp_port) {
- eth_pop_vlan(packet);
- flow.vlan_tci = htons(0);
- }
- }
+ ofpact_init(&output.ofpact, OFPACT_OUTPUT, sizeof output);
+ output.port = ofport->up.ofp_port;
+ output.max_len = 0;
- ofpbuf_use_stack(&key, &keybuf, sizeof keybuf);
- odp_flow_key_from_flow(&key, &flow,
- ofp_port_to_odp_port(ofproto, flow.in_port));
-
- compose_sflow_action(ofproto, &odp_actions, &flow, odp_port);
- compose_ipfix_action(ofproto, &odp_actions, &flow);
+ action_xlate_ctx_init(&ctx, ofproto, &flow, NULL, NULL, 0, packet);
+ ctx.resubmit_stats = &stats;
+ xlate_actions(&ctx, &output.ofpact, sizeof output, &odp_actions);
- nl_msg_put_u32(&odp_actions, OVS_ACTION_ATTR_OUTPUT, odp_port);
error = dpif_execute(ofproto->backer->dpif,
key.data, key.size,
odp_actions.data, odp_actions.size,
ofpbuf_uninit(&odp_actions);
if (error) {
- VLOG_WARN_RL(&rl, "%s: failed to send packet on port %"PRIu32" (%s)",
- ofproto->up.name, odp_port, strerror(error));
+ VLOG_WARN_RL(&rl, "%s: failed to send packet on port %s (%s)",
+ ofproto->up.name, netdev_get_name(ofport->up.netdev),
+ strerror(error));
}
- ofproto_update_local_port_stats(ofport->up.ofproto, packet->size, 0);
+
+ ofproto->stats.tx_packets++;
+ ofproto->stats.tx_bytes += packet->size;
return error;
}
\f
cookie.slow_path.reason = slow;
ofpbuf_use_stack(&buf, stub, stub_size);
- if (slow & (SLOW_CFM | SLOW_LACP | SLOW_STP)) {
+ if (slow & (SLOW_CFM | SLOW_BFD | SLOW_LACP | SLOW_STP)) {
uint32_t pid = dpif_port_get_pid(ofproto->backer->dpif, UINT32_MAX);
odp_put_userspace_action(pid, &cookie, sizeof cookie.slow_path, &buf);
} else {
xlate_report(ctx, "Tunneling decided against output");
goto out; /* restore flow_nw_tos */
}
-
+ if (ctx->flow.tunnel.ip_dst == ctx->orig_tunnel_ip_dst) {
+ xlate_report(ctx, "Not tunneling to our own address");
+ goto out; /* restore flow_nw_tos */
+ }
if (ctx->resubmit_stats) {
netdev_vport_inc_tx(ofport->up.netdev, ctx->resubmit_stats);
}
tunnel_ecn_ok(struct action_xlate_ctx *ctx)
{
if (is_ip_any(&ctx->base_flow)
- && (ctx->base_flow.tunnel.ip_tos & IP_ECN_MASK) == IP_ECN_CE) {
+ && (ctx->flow.tunnel.ip_tos & IP_ECN_MASK) == IP_ECN_CE) {
if ((ctx->base_flow.nw_tos & IP_ECN_MASK) == IP_ECN_NOT_ECT) {
VLOG_WARN_RL(&rl, "dropping tunnel packet marked ECN CE"
" but is not ECN capable");
struct rule_dpif *rule,
uint8_t tcp_flags, const struct ofpbuf *packet)
{
- ovs_be64 initial_tun_id = flow->tunnel.tun_id;
-
/* Flow initialization rules:
* - 'base_flow' must match the kernel's view of the packet at the
* time that action processing starts. 'flow' represents any
* to another device without any modifications this will cause us to
* insert a new tag since the original one was stripped off by the
* VLAN device.
- * - Tunnel 'flow' is largely cleared when transitioning between
- * the input and output stages since it does not make sense to output
- * a packet with the exact headers that it was received with (i.e.
- * the destination IP is us). The one exception is the tun_id, which
- * is preserved to allow use in later resubmit lookups and loads into
- * registers.
+ * - Tunnel metadata as received is retained in 'flow'. This allows
+ * tunnel metadata matching also in later tables.
+ * Since a kernel action for setting the tunnel metadata will only be
+ * generated with actual tunnel output, changing the tunnel metadata
+ * values in 'flow' (such as tun_id) will only have effect with a later
+ * tunnel output action.
* - Tunnel 'base_flow' is completely cleared since that is what the
* kernel does. If we wish to maintain the original values an action
* needs to be generated. */
ctx->ofproto = ofproto;
ctx->flow = *flow;
- memset(&ctx->flow.tunnel, 0, sizeof ctx->flow.tunnel);
ctx->base_flow = ctx->flow;
- ctx->base_flow.vlan_tci = initial_vals->vlan_tci;
- ctx->base_flow.tunnel.ip_tos = initial_vals->tunnel_ip_tos;
- ctx->flow.tunnel.tun_id = initial_tun_id;
+ memset(&ctx->base_flow.tunnel, 0, sizeof ctx->base_flow.tunnel);
+ ctx->orig_tunnel_ip_dst = flow->tunnel.ip_dst;
ctx->rule = rule;
ctx->packet = packet;
ctx->may_learn = packet != NULL;
ctx->resubmit_hook = NULL;
ctx->report_hook = NULL;
ctx->resubmit_stats = NULL;
+
+ if (initial_vals) {
+ ctx->base_flow.vlan_tci = initial_vals->vlan_tci;
+ }
}
/* Translates the 'ofpacts_len' bytes of "struct ofpacts" starting at 'ofpacts'
} else {
static struct vlog_rate_limit trace_rl = VLOG_RATE_LIMIT_INIT(1, 1);
struct initial_vals initial_vals;
+ size_t sample_actions_len;
uint32_t local_odp_port;
initial_vals.vlan_tci = ctx->base_flow.vlan_tci;
- initial_vals.tunnel_ip_tos = ctx->base_flow.tunnel.ip_tos;
add_sflow_action(ctx);
add_ipfix_action(ctx);
+ sample_actions_len = ctx->odp_actions->size;
if (tunnel_ecn_ok(ctx) && (!in_port || may_receive(in_port, ctx))) {
do_xlate_actions(ofpacts, ofpacts_len, ctx);
/* We've let OFPP_NORMAL and the learning action look at the
* packet, so drop it now if forwarding is disabled. */
if (in_port && !stp_forward_in_state(in_port->stp_state)) {
- ofpbuf_clear(ctx->odp_actions);
- add_sflow_action(ctx);
- add_ipfix_action(ctx);
+ ctx->odp_actions->size = sample_actions_len;
}
}
dpif_flow_stats_extract(flow, packet, time_msec(), &stats);
initial_vals.vlan_tci = flow->vlan_tci;
- initial_vals.tunnel_ip_tos = 0;
action_xlate_ctx_init(&ctx, ofproto, flow, &initial_vals, NULL,
packet_get_tcp_flags(packet, flow), packet);
ctx.resubmit_stats = &stats;
}
initial_vals.vlan_tci = flow.vlan_tci;
- initial_vals.tunnel_ip_tos = flow.tunnel.ip_tos;
}
/* Generate a packet, if requested. */
flow_extract(packet, priority, mark, NULL, in_port, &flow);
flow.tunnel.tun_id = tun_id;
initial_vals.vlan_tci = flow.vlan_tci;
- initial_vals.tunnel_ip_tos = flow.tunnel.ip_tos;
} else {
unixctl_command_reply_error(conn, "Bad command syntax");
goto exit;
case SLOW_STP:
ds_put_cstr(ds, "\n\t- Consists of STP packets.");
break;
+ case SLOW_BFD:
+ ds_put_cstr(ds, "\n\t- Consists of BFD packets.");
+ break;
case SLOW_IN_BAND:
ds_put_cstr(ds, "\n\t- Needs in-band special case "
"processing.");
set_ipfix,
set_cfm,
get_cfm_status,
+ set_bfd,
+ get_bfd_status,
set_stp,
get_stp_status,
set_stp_port,
struct match;
struct ofpact;
struct ofputil_flow_mod;
+struct bfd_cfg;
/* An OpenFlow switch.
*
bool (*get_cfm_status)(const struct ofport *ofport,
struct ofproto_cfm_status *status);
+ /* Configures BFD on 'ofport'.
+ *
+ * If 'cfg' is NULL, or 'cfg' does not contain the key value pair
+ * "enable=true", removes BFD from 'ofport'. Otherwise, configures BFD
+ * according to 'cfg'.
+ *
+ * EOPNOTSUPP as a return value indicates that this ofproto_class does not
+ * support BFD, as does a null pointer. */
+ int (*set_bfd)(struct ofport *ofport, const struct smap *cfg);
+
+ /* Populates 'smap' with the status of BFD on 'ofport'. Returns 0 on
+ * success, or a positive errno. EOPNOTSUPP as a return value indicates
+ * that this ofproto_class does not support BFD, as does a null pointer. */
+ int (*get_bfd_status)(struct ofport *ofport, struct smap *smap);
+
/* Configures spanning tree protocol (STP) on 'ofproto' using the
* settings defined in 's'.
*
}
}
+/* Configures BFD on 'ofp_port' in 'ofproto'. This function has no effect if
+ * 'ofproto' does not have a port 'ofp_port'. */
+void
+ofproto_port_set_bfd(struct ofproto *ofproto, uint16_t ofp_port,
+ const struct smap *cfg)
+{
+ struct ofport *ofport;
+ int error;
+
+ ofport = ofproto_get_port(ofproto, ofp_port);
+ if (!ofport) {
+ VLOG_WARN("%s: cannot configure bfd on nonexistent port %"PRIu16,
+ ofproto->name, ofp_port);
+ }
+
+ error = (ofproto->ofproto_class->set_bfd
+ ? ofproto->ofproto_class->set_bfd(ofport, cfg)
+ : EOPNOTSUPP);
+ if (error) {
+ VLOG_WARN("%s: bfd configuration on port %"PRIu16" (%s) failed (%s)",
+ ofproto->name, ofp_port, netdev_get_name(ofport->netdev),
+ strerror(error));
+ }
+}
+
+/* Populates 'status' with key value pairs indicating the status of the BFD
+ * session on 'ofp_port'. This information is intended to be populated in the
+ * OVS database. Has no effect if 'ofp_port' is not na OpenFlow port in
+ * 'ofproto'. */
+int
+ofproto_port_get_bfd_status(struct ofproto *ofproto, uint16_t ofp_port,
+ struct smap *status)
+{
+ struct ofport *ofport = ofproto_get_port(ofproto, ofp_port);
+ return (ofport && ofproto->ofproto_class->get_bfd_status
+ ? ofproto->ofproto_class->get_bfd_status(ofport, status)
+ : EOPNOTSUPP);
+}
+
/* Checks the status of LACP negotiation for 'ofp_port' within ofproto.
* Returns 1 if LACP partner information for 'ofp_port' is up-to-date,
* 0 if LACP partner information is not current (generally indicating a
toggle = (config ^ port->pp.config) & mask;
if (toggle & OFPUTIL_PC_PORT_DOWN) {
if (config & OFPUTIL_PC_PORT_DOWN) {
- netdev_turn_flags_off(port->netdev, NETDEV_UP, true);
+ netdev_turn_flags_off(port->netdev, NETDEV_UP, NULL);
} else {
- netdev_turn_flags_on(port->netdev, NETDEV_UP, true);
+ netdev_turn_flags_on(port->netdev, NETDEV_UP, NULL);
}
toggle &= ~OFPUTIL_PC_PORT_DOWN;
}
extern "C" {
#endif
+struct bfd_cfg;
+struct cfm_settings;
struct cls_rule;
struct netdev;
struct ofproto;
struct ofport;
struct shash;
struct simap;
+struct smap;
struct netdev_stats;
struct ofproto_controller_info {
void ofproto_port_clear_cfm(struct ofproto *, uint16_t ofp_port);
void ofproto_port_set_cfm(struct ofproto *, uint16_t ofp_port,
const struct cfm_settings *);
+void ofproto_port_set_bfd(struct ofproto *, uint16_t ofp_port,
+ const struct smap *cfg);
+int ofproto_port_get_bfd_status(struct ofproto *, uint16_t ofp_port,
+ struct smap *);
int ofproto_port_is_lacp_current(struct ofproto *, uint16_t ofp_port);
int ofproto_port_set_stp(struct ofproto *, uint16_t ofp_port,
const struct ofproto_port_stp_settings *);
uint32_t odp_port;
uint32_t skb_mark;
bool in_key_flow;
+ bool ip_src_flow;
+ bool ip_dst_flow;
};
struct tnl_port {
tnl_port->match.in_key = cfg->in_key;
tnl_port->match.ip_src = cfg->ip_src;
tnl_port->match.ip_dst = cfg->ip_dst;
+ tnl_port->match.ip_src_flow = cfg->ip_src_flow;
+ tnl_port->match.ip_dst_flow = cfg->ip_dst_flow;
tnl_port->match.skb_mark = cfg->ipsec ? IPSEC_MARK : 0;
tnl_port->match.in_key_flow = cfg->in_key_flow;
tnl_port->match.odp_port = odp_port;
}
}
-/* Transforms 'flow' so that it appears to have been received by a tunnel
- * OpenFlow port controlled by this module instead of the datapath port it
- * actually came in on. Sets 'flow''s in_port to the appropriate OpenFlow port
- * number. Returns the 'ofport' corresponding to the new in_port.
+/* Looks in the table of tunnels for a tunnel matching the metadata in 'flow'.
+ * Returns the 'ofport' corresponding to the new in_port, or a null pointer if
+ * none is found.
*
* Callers should verify that 'flow' needs to be received by calling
- * tnl_port_should_receive() before this function.
- *
- * Leaves 'flow' untouched and returns null if unsuccessful. */
+ * tnl_port_should_receive() before this function. */
const struct ofport *
-tnl_port_receive(struct flow *flow)
+tnl_port_receive(const struct flow *flow)
{
char *pre_flow_str = NULL;
struct tnl_port *tnl_port;
pre_flow_str = flow_to_string(flow);
}
- flow->in_port = tnl_port->ofport->ofp_port;
- memset(&flow->tunnel, 0, sizeof flow->tunnel);
- flow->tunnel.tun_id = match.in_key;
-
if (pre_flow_str) {
char *post_flow_str = flow_to_string(flow);
char *tnl_str = tnl_port_fmt(tnl_port);
pre_flow_str = flow_to_string(flow);
}
- flow->tunnel.ip_src = tnl_port->match.ip_src;
- flow->tunnel.ip_dst = tnl_port->match.ip_dst;
+ if (!cfg->ip_src_flow) {
+ flow->tunnel.ip_src = tnl_port->match.ip_src;
+ }
+ if (!cfg->ip_dst_flow) {
+ flow->tunnel.ip_dst = tnl_port->match.ip_dst;
+ }
flow->skb_mark = tnl_port->match.skb_mark;
if (!cfg->out_key_flow) {
return tnl_port;
}
+ /* Flow-based remote */
+ match.ip_dst = 0;
+ match.ip_dst_flow = true;
+ tnl_port = tnl_find_exact(&match);
+ if (tnl_port) {
+ return tnl_port;
+ }
+
+ /* Flow-based everything */
+ match.ip_src = 0;
+ match.ip_src_flow = true;
+ tnl_port = tnl_find_exact(&match);
+ if (tnl_port) {
+ return tnl_port;
+ }
+
return NULL;
}
static void
tnl_match_fmt(const struct tnl_match *match, struct ds *ds)
{
- ds_put_format(ds, IP_FMT"->"IP_FMT, IP_ARGS(match->ip_src),
- IP_ARGS(match->ip_dst));
+ if (!match->ip_dst_flow) {
+ ds_put_format(ds, IP_FMT"->"IP_FMT, IP_ARGS(match->ip_src),
+ IP_ARGS(match->ip_dst));
+ } else if (!match->ip_src_flow) {
+ ds_put_format(ds, IP_FMT"->flow", IP_ARGS(match->ip_src));
+ } else {
+ ds_put_cstr(ds, "flow->flow");
+ }
if (match->in_key_flow) {
ds_put_cstr(ds, ", key=flow");
struct tnl_port *tnl_port_add(const struct ofport *, uint32_t odp_port);
void tnl_port_del(struct tnl_port *);
-const struct ofport *tnl_port_receive(struct flow *);
+const struct ofport *tnl_port_receive(const struct flow *);
uint32_t tnl_port_send(const struct tnl_port *, struct flow *);
/* Returns true if 'flow' should be submitted to tnl_port_receive(). */
svec_add(dbs, name->u.string);
}
jsonrpc_msg_destroy(reply);
+ svec_sort(dbs);
}
\f
static void
svec_init(&dbs);
fetch_dbs(rpc, &dbs);
- svec_sort(&dbs);
SVEC_FOR_EACH (i, db_name, &dbs) {
puts(db_name);
}
date = shash_find_data(json_object(json), "_date");
if (date && date->type == JSON_INTEGER) {
time_t t = json_integer(date);
- struct tm tm;
- char s[128];
-
- strftime(s, sizeof s, "%Y-%m-%d %H:%M:%S", gmtime_r(&t, &tm));
- printf(" %s", s);
+ char *s = xastrftime(" %Y-%m-%d %H:%M:%S", t, true);
+ fputs(s, stdout);
+ free(s);
}
comment = shash_find_data(json_object(json), "_comment");
AT_SETUP([learning action - invalid prerequisites])
AT_CHECK([[ovs-ofctl parse-flow 'actions=learn(load:5->NXM_OF_IP_DST[])']],
- [1], [],
- [[ovs-ofctl: load:5->NXM_OF_IP_DST[]: cannot specify destination field ip_dst because prerequisites are not satisfied
-]])
+ [1], [], [stderr])
+AT_CHECK([sed -e 's/.*|meta_flow|WARN|//' < stderr], [0],
+ [[destination field ip_dst lacks correct prerequisites
+]], [[]])
AT_CHECK([[ovs-ofctl parse-flow 'actions=learn(load:NXM_OF_IP_DST[]->NXM_NX_REG1[])']],
- [1], [],
- [[ovs-ofctl: load:NXM_OF_IP_DST[]->NXM_NX_REG1[]: cannot specify source field ip_dst because prerequisites are not satisfied
+ [1], [], [stderr])
+AT_CHECK([sed -e 's/.*|meta_flow|WARN|//' < stderr], [0],
+ [[source field ip_dst lacks correct prerequisites
]])
AT_CLEANUP
OVS_VSWITCHD_START
ADD_OF_PORTS([br0], [1], [9], [10], [11], [55], [66], [77], [88])
AT_DATA([flows.txt], [dnl
-in_port=1 actions=resubmit:2,resubmit:3,resubmit:4,resubmit:5,resubmit:6,resubmit:7
+in_port=1 actions=resubmit:2,resubmit:3,resubmit:4,resubmit:5,resubmit:6,resubmit:7,resubmit:8
in_port=2 actions=output:9
in_port=3 actions=load:55->NXM_NX_REG0[[]],output:NXM_NX_REG0[[]],load:66->NXM_NX_REG1[[]]
in_port=4 actions=output:10,output:NXM_NX_REG0[[]],output:NXM_NX_REG1[[]],output:11
in_port=5 actions=load:77->NXM_NX_REG0[[0..15]],load:88->NXM_NX_REG0[[16..31]]
in_port=6 actions=output:NXM_NX_REG0[[0..15]],output:NXM_NX_REG0[[16..31]]
in_port=7 actions=load:0x110000ff->NXM_NX_REG0[[]],output:NXM_NX_REG0[[]]
+in_port=8 actions=1,9,load:9->NXM_OF_IN_PORT[[]],1,9
])
AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
AT_CHECK([ovs-appctl ofproto/trace br0 'in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=128,frag=no),icmp(type=8,code=0)'], [0], [stdout])
AT_CHECK([tail -1 stdout], [0],
- [Datapath actions: 9,55,10,55,66,11,77,88
+ [Datapath actions: 9,55,10,55,66,11,77,88,9,1
])
OVS_VSWITCHD_STOP
AT_CLEANUP
OVS_VSWITCHD_STOP
AT_CLEANUP
+dnl This test checks that tunnel metadata is encoded in packet_in structures.
+AT_SETUP([ofproto - packet-out with tunnel metadata (OpenFlow 1.2)])
+OVS_VSWITCHD_START
+
+# Start a monitor listening for packet-ins.
+AT_CHECK([ovs-ofctl -O OpenFlow12 monitor br0 --detach --no-chdir --pidfile])
+ovs-appctl -t ovs-ofctl ofctl/send 0309000c0123456700000080
+ovs-appctl -t ovs-ofctl ofctl/barrier
+ovs-appctl -t ovs-ofctl ofctl/set-output-file monitor.log
+AT_CAPTURE_FILE([monitor.log])
+
+# Send a packet-out with set field actions to set some tunnel metadata, and forward to controller
+AT_CHECK([ovs-ofctl -O OpenFlow12 packet-out br0 none 'set_field:127.0.0.1->tun_src,set_field:0x01020304->tun_id,set_field:192.168.0.1->tun_dst, controller' '0001020304050010203040501234'])
+
+# Stop the monitor and check its output.
+ovs-appctl -t ovs-ofctl ofctl/barrier
+ovs-appctl -t ovs-ofctl exit
+
+AT_CHECK([sed 's/ (xid=0x[[0-9a-fA-F]]*)//' monitor.log], [0], [dnl
+OFPT_PACKET_IN (OF1.2): total_len=14 in_port=ANY tun_id=0x1020304 tun_src=127.0.0.1 tun_dst=192.168.0.1 (via action) data_len=14 (unbuffered)
+metadata=0,in_port=0,vlan_tci=0x0000,dl_src=00:10:20:30:40:50,dl_dst=00:01:02:03:04:05,dl_type=0x1234
+OFPT_BARRIER_REPLY (OF1.2):
+])
+
+OVS_VSWITCHD_STOP
+AT_CLEANUP
+
AT_SETUP([ofproto - flow monitoring])
AT_KEYWORDS([monitor])
OVS_VSWITCHD_START
udp,nw_src=192.168.0.3,tp_dst=53 actions=pop_queue,output:1
cookie=0x123456789abcdef hard_timeout=10 priority=60000 actions=controller
actions=note:41.42.43,note:00.01.02.03.04.05.06.07,note
-actions=set_field:fe80:0123:4567:890a:a6ba:dbff:fefe:59fa->ipv6_src
+ip,actions=set_field:10.4.3.77->ip_src
in_port=0 actions=resubmit:0
actions=sample(probability=12345,collector_set_id=23456,obs_domain_id=34567,obs_point_id=45678)
]])
OFPT_FLOW_MOD: ADD udp,nw_src=192.168.0.3,tp_dst=53 actions=pop_queue,output:1
OFPT_FLOW_MOD: ADD priority=60000 cookie:0x123456789abcdef hard:10 actions=CONTROLLER:65535
OFPT_FLOW_MOD: ADD actions=note:41.42.43.00.00.00,note:00.01.02.03.04.05.06.07.00.00.00.00.00.00,note:00.00.00.00.00.00
-OFPT_FLOW_MOD: ADD actions=load:0xa6badbfffefe59fa->NXM_NX_IPV6_SRC[0..63],load:0xfe8001234567890a->NXM_NX_IPV6_SRC[64..127]
+OFPT_FLOW_MOD: ADD ip actions=load:0xa04034d->NXM_OF_IP_SRC[]
OFPT_FLOW_MOD: ADD in_port=0 actions=resubmit:0
OFPT_FLOW_MOD: ADD actions=sample(probability=12345,collector_set_id=23456,obs_domain_id=34567,obs_point_id=45678)
]])
udp,nw_src=192.168.0.3,tp_dst=53 actions=pop_queue,output:1
cookie=0x123456789abcdef hard_timeout=10 priority=60000 actions=controller
actions=note:41.42.43,note:00.01.02.03.04.05.06.07,note
-actions=set_field:fe80:0123:4567:890a:a6ba:dbff:fefe:59fa->ipv6_src
+ipv6,actions=set_field:fe80:0123:4567:890a:a6ba:dbff:fefe:59fa->ipv6_src
in_port=0 actions=resubmit:0
actions=sample(probability=12345,collector_set_id=23456,obs_domain_id=34567,obs_point_id=45678)
]])
AT_CHECK([ovs-ofctl --protocols OpenFlow12 parse-flows flows.txt
], [0], [stdout])
AT_CHECK([[sed 's/ (xid=0x[0-9a-fA-F]*)//' stdout]], [0],
-[[usable protocols: any
+[[usable protocols: NXM,OXM
chosen protocol: OXM-OpenFlow12
OFPT_FLOW_MOD (OF1.2): ADD table:255 tcp,tp_src=123 actions=FLOOD
OFPT_FLOW_MOD (OF1.2): ADD table:255 in_port=LOCAL,dl_vlan=9,dl_src=00:0a:e4:25:6b:b0 actions=drop
OFPT_FLOW_MOD (OF1.2): ADD table:255 udp,nw_src=192.168.0.3,tp_dst=53 actions=pop_queue,output:1
OFPT_FLOW_MOD (OF1.2): ADD table:255 priority=60000 cookie:0x123456789abcdef hard:10 actions=CONTROLLER:65535
OFPT_FLOW_MOD (OF1.2): ADD table:255 actions=note:41.42.43.00.00.00,note:00.01.02.03.04.05.06.07.00.00.00.00.00.00,note:00.00.00.00.00.00
-OFPT_FLOW_MOD (OF1.2): ADD table:255 actions=set_field:fe80:123:4567:890a:a6ba:dbff:fefe:59fa->ipv6_src
+OFPT_FLOW_MOD (OF1.2): ADD table:255 ipv6 actions=set_field:fe80:123:4567:890a:a6ba:dbff:fefe:59fa->ipv6_src
OFPT_FLOW_MOD (OF1.2): ADD table:255 in_port=0 actions=resubmit:0
OFPT_FLOW_MOD (OF1.2): ADD table:255 actions=sample(probability=12345,collector_set_id=23456,obs_domain_id=34567,obs_point_id=45678)
]])
br0 65534/100: (dummy)
p1 1/1: (vxlan: remote_ip=1.1.1.1)
])
+OVS_VSWITCHD_STOP
+AT_CLEANUP
+AT_SETUP([ofproto-dpif - set_field - tun_src/tun_dst/tun_id])
+OVS_VSWITCHD_START([dnl
+ add-port br0 p1 -- set Interface p1 type=gre options:key=flow \
+ options:remote_ip=1.1.1.1 ofport_request=1 \
+ -- add-port br0 p2 -- set Interface p2 type=gre options:key=flow \
+ options:remote_ip=flow ofport_request=2 \
+ -- add-port br0 p3 -- set Interface p3 type=gre options:key=flow \
+ options:remote_ip=flow options:local_ip=flow ofport_request=3 \
+ -- add-port br0 p4 -- set Interface p4 type=gre options:key=3 \
+ options:remote_ip=flow ofport_request=4 \
+ -- add-port br0 p5 -- set Interface p5 type=gre options:key=flow \
+ options:remote_ip=5.5.5.5 ofport_request=5])
+ADD_OF_PORTS([br0], [90])
+AT_DATA([flows.txt], [dnl
+in_port=90 actions=resubmit:1,resubmit:2,resubmit:3,resubmit:4,resubmit:5
+in_port=1 actions=set_field:42->tun_id,output:1
+in_port=2 actions=set_field:3.3.3.3->tun_dst,output:2
+in_port=3 actions=set_field:1.1.1.1->tun_src,set_field:4.4.4.4->tun_dst,output:3
+in_port=4 actions=set_field:2.2.2.2->tun_dst,output:4
+in_port=5 actions=set_field:5->tun_id
+])
+AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
+AT_CHECK([ovs-appctl ofproto/trace br0 'in_port(90),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=128,frag=no),icmp(type=8,code=0)'], [0], [stdout])
+AT_CHECK([tail -1 stdout], [0],
+ [Datapath actions: set(tunnel(tun_id=0x2a,src=0.0.0.0,dst=1.1.1.1,tos=0x0,ttl=64,flags(df,key))),1,set(tunnel(tun_id=0x2a,src=0.0.0.0,dst=3.3.3.3,tos=0x0,ttl=64,flags(df,key))),1,set(tunnel(tun_id=0x2a,src=1.1.1.1,dst=4.4.4.4,tos=0x0,ttl=64,flags(df,key))),1,set(tunnel(tun_id=0x3,src=0.0.0.0,dst=2.2.2.2,tos=0x0,ttl=64,flags(df,key))),1
+])
OVS_VSWITCHD_STOP
AT_CLEANUP
/*
- * Copyright (c) 2008, 2009, 2010, 2011, 2012 Nicira, Inc.
+ * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
retval = netdev_open(netdev_name, "system", &netdev);
if (!retval) {
- retval = netdev_turn_flags_on(netdev, NETDEV_UP, true);
+ retval = netdev_turn_flags_on(netdev, NETDEV_UP, NULL);
netdev_close(netdev);
}
return retval;
corresponding bit in \fItunnel-id\fR must match exactly, and a 0-bit
wildcards that bit.
.
+.IP \fBtun_src=\fIip\fR[\fB/\fInetmask\fR]
+.IQ \fBtun_dst=\fIip\fR[\fB/\fInetmask\fR]
+Matches tunnel IPv4 source (or destination) address \fIip\fR. Only packets
+that arrive over a tunnel will have nonzero tunnel addresses.
+The address may be specified as an IP address or host name
+(e.g. \fB192.168.1.1\fR or \fBwww.example.com\fR). The optional
+\fInetmask\fR allows restricting a match to a masked IPv4 address.
+The netmask may be specified as a dotted quad
+(e.g. \fB192.168.1.0/255.255.255.0\fR) or as a CIDR block
+(e.g. \fB192.168.1.0/24\fR).
+.
.IP "\fBreg\fIidx\fB=\fIvalue\fR[\fB/\fImask\fR]"
Matches \fIvalue\fR either exactly or with optional \fImask\fR in
register number \fIidx\fR. The valid range of \fIidx\fR depends on
vconn_close(vconn);
}
-/* Sends 'request', which should be a request that only has a reply if an error
- * occurs, and waits for it to succeed or fail. If an error does occur, prints
- * it and exits with an error.
+/* Sends all of the 'requests', which should be requests that only have replies
+ * if an error occurs, and waits for them to succeed or fail. If an error does
+ * occur, prints it and exits with an error.
*
* Destroys all of the 'requests'. */
static void
}
}
+/* Prints to stdout all of the messages received on 'vconn'.
+ *
+ * Iff 'reply_to_echo_requests' is true, sends a reply to any echo request
+ * received on 'vconn'. */
static void
-monitor_vconn(struct vconn *vconn)
+monitor_vconn(struct vconn *vconn, bool reply_to_echo_requests)
{
struct barrier_aux barrier_aux = { vconn, NULL };
struct unixctl_server *server;
run(retval, "vconn_recv");
if (timestamp) {
- time_t now = time_wall();
- struct tm tm;
- char s[32];
-
- strftime(s, sizeof s, "%Y-%m-%d %H:%M:%S: ",
- gmtime_r(&now, &tm));
+ char *s = xastrftime("%Y-%m-%d %H:%M:%S: ", time_wall(), true);
fputs(s, stderr);
+ free(s);
}
ofptype_decode(&type, b->data);
ofp_print(stderr, b->data, b->size, verbosity + 2);
- ofpbuf_delete(b);
- if (barrier_aux.conn && type == OFPTYPE_BARRIER_REPLY) {
- unixctl_command_reply(barrier_aux.conn, NULL);
- barrier_aux.conn = NULL;
+ switch ((int) type) {
+ case OFPTYPE_BARRIER_REPLY:
+ if (barrier_aux.conn) {
+ unixctl_command_reply(barrier_aux.conn, NULL);
+ barrier_aux.conn = NULL;
+ }
+ break;
+
+ case OFPTYPE_ECHO_REQUEST:
+ if (reply_to_echo_requests) {
+ struct ofpbuf *reply;
+
+ reply = make_echo_reply(b->data);
+ retval = vconn_send_block(vconn, reply);
+ if (retval) {
+ ovs_fatal(retval, "failed to send echo reply");
+ }
+ }
+ break;
}
+ ofpbuf_delete(b);
}
if (exiting) {
}
}
- monitor_vconn(vconn);
+ monitor_vconn(vconn, true);
}
static void
struct vconn *vconn;
open_vconn__(argv[1], SNOOP, &vconn);
- monitor_vconn(vconn);
+ monitor_vconn(vconn, false);
}
static void
#! /bin/sh
-# Copyright (c) 2008, 2009, 2010, 2011, 2012 Nicira, Inc.
+# Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
serial = $dir/serial # serial no file
private_key = $dir/private/cakey.pem# CA private key
RANDFILE = $dir/private/.rand # random number file
-default_days = 36525 # how long to certify for
+default_days = 3650 # how long to certify for
default_crl_days= 30 # how long before next CRL
default_md = md5 # md to use
policy = policy # default policy
-newkey $newkey -keyout private/cakey.pem -out careq.pem \
1>&3 2>&3
openssl ca -config ca.cnf -create_serial -out cacert.pem \
- -days 36525 -batch -keyfile private/cakey.pem -selfsign \
+ -days 3650 -batch -keyfile private/cakey.pem -selfsign \
-infiles careq.pem 1>&3 2>&3
chmod 0700 private/cakey.pem
# Create both the private key and certificate with restricted permissions.
(umask 077 && \
openssl x509 -in "$arg1-req.pem" -out "$arg1-cert.pem.tmp" \
- -signkey "$arg1-privkey.pem" -req -days 36525 -text) 2>&3 || exit $?
+ -signkey "$arg1-privkey.pem" -req -days 3650 -text) 2>&3 || exit $?
# Reset the permissions on the certificate to the user's default.
cat "$arg1-cert.pem.tmp" > "$arg1-cert.pem"
#include <errno.h>
#include <inttypes.h>
#include <stdlib.h>
+#include "bfd.h"
#include "bitmap.h"
#include "bond.h"
#include "cfm.h"
ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_cfm_remote_mpids);
ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_cfm_health);
ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_cfm_remote_opstate);
+ ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_bfd_status);
ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_lacp_current);
ovsdb_idl_omit(idl, &ovsrec_interface_col_external_ids);
iface_configure_cfm(iface);
iface_configure_qos(iface, port->cfg->qos);
iface_set_mac(iface);
+ ofproto_port_set_bfd(br->ofproto, iface->ofp_port,
+ &iface->cfg->bfd);
}
}
bridge_configure_mirrors(br);
if ((port_cfg->vlan_mode && !strcmp(port_cfg->vlan_mode, "splinter"))
|| iface_is_internal(iface_cfg, br->cfg)) {
- netdev_turn_flags_on(netdev, NETDEV_UP, true);
+ netdev_turn_flags_on(netdev, NETDEV_UP, NULL);
}
*netdevp = netdev;
IFACE_STAT(rx_crc_errors, "rx_crc_err") \
IFACE_STAT(collisions, "collisions")
-#define IFACE_STAT(MEMBER, NAME) NAME,
- static char *keys[] = { IFACE_STATS };
+#define IFACE_STAT(MEMBER, NAME) + 1
+ enum { N_IFACE_STATS = IFACE_STATS };
#undef IFACE_STAT
- int64_t values[ARRAY_SIZE(keys)];
- int i;
+ int64_t values[N_IFACE_STATS];
+ char *keys[N_IFACE_STATS];
+ int n;
struct netdev_stats stats;
* all-1s, and we will deal with that correctly below. */
netdev_get_stats(iface->netdev, &stats);
- /* Copy statistics into values[] array. */
- i = 0;
-#define IFACE_STAT(MEMBER, NAME) values[i++] = stats.MEMBER;
+ /* Copy statistics into keys[] and values[]. */
+ n = 0;
+#define IFACE_STAT(MEMBER, NAME) \
+ if (stats.MEMBER != UINT64_MAX) { \
+ keys[n] = NAME; \
+ values[n] = stats.MEMBER; \
+ n++; \
+ }
IFACE_STATS;
#undef IFACE_STAT
- ovs_assert(i == ARRAY_SIZE(keys));
+ ovs_assert(n <= N_IFACE_STATS);
- ovsrec_interface_set_statistics(iface->cfg, keys, values,
- ARRAY_SIZE(keys));
+ ovsrec_interface_set_statistics(iface->cfg, keys, values, n);
#undef IFACE_STATS
}
HMAP_FOR_EACH (iface, name_node, &br->iface_by_name) {
enum netdev_flags flags;
+ struct smap smap;
const char *link_state;
int64_t link_resets;
int current, error;
ovsrec_interface_set_link_resets(iface->cfg, &link_resets, 1);
iface_refresh_cfm_stats(iface);
+
+ smap_init(&smap);
+ if (!ofproto_port_get_bfd_status(br->ofproto, iface->ofp_port,
+ &smap)) {
+ ovsrec_interface_set_bfd_status(iface->cfg, &smap);
+ smap_destroy(&smap);
+ }
}
}
}
/* Bring up the local interface. */
netdev = local_iface->netdev;
- netdev_turn_flags_on(netdev, NETDEV_UP, true);
+ netdev_turn_flags_on(netdev, NETDEV_UP, NULL);
/* Configure the IP address and netmask. */
if (!c->local_netmask
{"name": "Open_vSwitch",
- "version": "7.1.0",
- "cksum": "432130924 19191",
+ "version": "7.2.0",
+ "cksum": "543912409 19436",
"tables": {
"Open_vSwitch": {
"columns": {
"maxInteger": 65279},
"min": 0,
"max": 1}},
+ "bfd": {
+ "type": {"key": "string", "value": "string",
+ "min": 0, "max": "unlimited"}},
+ "bfd_status": {
+ "type": {"key": "string", "value": "string",
+ "min": 0, "max": "unlimited"}},
"cfm_mpid": {
"type": {
"key": {"type": "integer"},
</p>
<column name="options" key="remote_ip">
- Required. The tunnel endpoint. Only unicast endpoints are supported.
+ <p>Required. The remote tunnel endpoint, one of:</p>
+
+ <ul>
+ <li>
+ An IPv4 address (not a DNS name), e.g. <code>192.168.0.123</code>.
+ Only unicast endpoints are supported.
+ </li>
+ <li>
+ The word <code>flow</code>. The tunnel accepts packets from any
+ remote tunnel endpoint. To process only packets from a specific
+ remote tunnel endpoint, the flow entries may match on the
+ <code>tun_src</code> field. When sending packets to a
+ <code>remote_ip=flow</code> tunnel, the flow actions must
+ explicitly set the <code>tun_dst</code> field to the IP address of
+ the desired remote tunnel endpoint, e.g. with a
+ <code>set_field</code> action.
+ </li>
+ </ul>
+
+ <p>
+ The remote tunnel endpoint for any packet received from a tunnel
+ is available in the <code>tun_src</code> field for matching in the
+ flow table.
+ </p>
</column>
<column name="options" key="local_ip">
- Optional. The destination IP that received packets must match.
- Default is to match all addresses.
+ <p>
+ Optional. The tunnel destination IP that received packets must
+ match. Default is to match all addresses. If specified, may be one
+ of:
+ </p>
+
+ <ul>
+ <li>
+ An IPv4 address (not a DNS name), e.g. <code>192.168.12.3</code>.
+ </li>
+ <li>
+ The word <code>flow</code>. The tunnel accepts packets sent to any
+ of the local IP addresses of the system running OVS. To process
+ only packets sent to a specific IP address, the flow entries may
+ match on the <code>tun_dst</code> field. When sending packets to a
+ <code>local_ip=flow</code> tunnel, the flow actions may
+ explicitly set the <code>tun_src</code> field to the desired IP
+ address, e.g. with a <code>set_field</code> action. However, while
+ routing the tunneled packet out, the local system may override the
+ specified address with the local IP address configured for the
+ outgoing system interface.
+
+ <p>
+ This option is valid only for tunnels also configured with the
+ <code>remote_ip=flow</code> option.
+ </p>
+ </li>
+ </ul>
+
+ <p>
+ The tunnel destination IP address for any packet received from a
+ tunnel is available in the <code>tun_dst</code> field for matching in
+ the flow table.
+ </p>
</column>
<column name="options" key="in_key">
</column>
</group>
+ <group title="Bidirectional Forwarding Detection (BFD)">
+ <p>
+ BFD, defined in RFC 5880 and RFC 5881, allows point to point
+ detection of connectivity failures by occasional transmission of
+ BFD control messages. It is implemented in Open vSwitch to serve
+ as a more popular and standards compliant alternative to CFM.
+ </p>
+
+ <p>
+ BFD operates by regularly transmitting BFD control messages at a
+ rate negotiated independently in each direction. Each endpoint
+ specifies the rate at which it expects to receive control messages,
+ and the rate at which it's willing to transmit them. Open vSwitch
+ uses a detection multiplier of three, meaning that an endpoint
+ which fails to receive BFD control messages for a period of three
+ times the expected reception rate, will signal a connectivity
+ fault. In the case of a unidirectional connectivity issue, the
+ system not receiving BFD control messages will signal the problem
+ to its peer in the messages is transmists.
+ </p>
+
+ <p>
+ The Open vSwitch implementation of BFD aims to comply faithfully
+ with the requirements put forth in RFC 5880. Currently, the only
+ known omission is ``Demand Mode'', which we hope to include in
+ future. Open vSwitch does not implement the optional
+ Authentication or ``Echo Mode'' features.
+ </p>
+
+ <column name="bfd" key="enable">
+ When <code>true</code> BFD is enabled on this
+ <ref table="Interface"/>, otherwise it's disabled. Defaults to
+ <code>false</code>.
+ </column>
+
+ <column name="bfd" key="min_rx"
+ type='{"type": "integer", "minInteger": 1}'>
+ The fastest rate, in milliseconds, at which this BFD session is
+ willing to receive BFD control messages. The actual rate may be
+ slower if the remote endpoint isn't willing to transmit as quickly as
+ specified. Defaults to <code>1000</code>.
+ </column>
+
+ <column name="bfd" key="min_tx"
+ type='{"type": "integer", "minInteger": 1}'>
+ The fastest rate, in milliseconds, at which this BFD session is
+ willing to transmit BFD control messages. The actual rate may be
+ slower if the remote endpoint isn't willing to receive as quickly as
+ specified. Defaults to <code>100</code>.
+ </column>
+
+ <column name="bfd" key="cpath_down" type='{"type": "boolean"}'>
+ Concatenated path down may be used when the local system should not
+ have traffic forwarded to it for some reason other than a connectivty
+ failure on the interface being monitored. When a controller thinks
+ this may be the case, it may set <code>cpath_down</code> to
+ <code>true</code> which may cause the remote BFD session not to
+ forward traffic to this <ref table="Interface"/>. Defaults to
+ <code>false</code>.
+ </column>
+
+ <column name="bfd_status" key="state"
+ type='{"type": "string",
+ "enum": ["set", ["admin_down", "down", "init", "up"]]}'>
+ State of the BFD session. The BFD session is fully healthy and
+ negotiated if <code>UP</code>.
+ </column>
+
+ <column name="bfd_status" key="forwarding" type='{"type": "boolean"}'>
+ True if the BFD session believes this <ref table="Interface"/> may be
+ used to forward traffic. Typically this means the local session is
+ signaling <code>UP</code>, and the remote system isn't signaling a
+ problem such as concatenated path down.
+ </column>
+
+ <column name="bfd_status" key="diagnostic">
+ A short message indicating what the BFD session thinks is wrong in
+ case of a problem.
+ </column>
+
+ <column name="bfd_status" key="remote_state"
+ type='{"type": "string",
+ "enum": ["set", ["admin_down", "down", "init", "up"]]}'>
+ State of the remote endpoint's BFD session.
+ </column>
+
+ <column name="bfd_status" key="remote_diagnostic">
+ A short message indicating what the remote endpoint's BFD session
+ thinks is wrong in case of a problem.
+ </column>
+ </group>
+
<group title="Connectivity Fault Management">
<p>
802.1ag Connectivity Fault Management (CFM) allows a group of