signed off on commits in the Open vSwitch version control repository.
Aaron Rosen arosen@clemson.edu
+Alexandru Copot alex.mihai.c@gmail.com
Alexey I. Froloff raorn@altlinux.org
Alex Wang alexw@nicira.com
Andrew Evans aevans@nicira.com
Isaku Yamahata yamahata@valinux.co.jp
James P. roampune@gmail.com
James Page james.page@ubuntu.com
-Jarno Rajahalme jarno.rajahalme@nsn.com
+Jarno Rajahalme jrajahalme@nicira.com
Jean Tourrilhes jt@hpl.hp.com
Jeremy Stribling strib@nicira.com
Jesse Gross jesse@nicira.com
ovs-ofctl add-flow br0 priority=65535,actions=drop
+Q: I added a flow to send packets out the ingress port, like this:
+
+ ovs-ofctl add-flow br0 in_port=2,actions=2
+
+ but OVS drops the packets instead.
+
+A: Yes, OpenFlow requires a switch to ignore attempts to send a packet
+ out its ingress port. The rationale is that dropping these packets
+ makes it harder to loop the network. Sometimes this behavior can
+ even be convenient, e.g. it is often the desired behavior in a flow
+ that forwards a packet to several ports ("floods" the packet).
+
+ Sometimes one really needs to send a packet out its ingress port.
+ In this case, output to OFPP_IN_PORT, which in ovs-ofctl syntax is
+ expressed as just "in_port", e.g.:
+
+ ovs-ofctl add-flow br0 in_port=2,actions=in_port
+
+ This also works in some circumstances where the flow doesn't match
+ on the input port. For example, if you know that your switch has
+ five ports numbered 2 through 6, then the following will send every
+ received packet out every port, even its ingress port:
+
+ ovs-ofctl add-flow br0 actions=2,3,4,5,6,in_port
+
+ or, equivalently:
+
+ ovs-ofctl add-flow br0 actions=all,in_port
+
+ Sometimes, in complicated flow tables with multiple levels of
+ "resubmit" actions, a flow needs to output to a particular port
+ that may or may not be the ingress port. It's difficult to take
+ advantage of OFPP_IN_PORT in this situation. To help, Open vSwitch
+ provides, as an OpenFlow extension, the ability to modify the
+ in_port field. Whatever value is currently in the in_port field is
+ the port to which outputs will be dropped, as well as the
+ destination for OFPP_IN_PORT. This means that the following will
+ reliably output to port 2 or to ports 2 through 6, respectively:
+
+ ovs-ofctl add-flow br0 in_port=2,actions=load:0->NXM_OF_IN_PORT[],2
+ ovs-ofctl add-flow br0 actions=load:0->NXM_OF_IN_PORT[],2,3,4,5,6
+
+ If the input port is important, then one may save and restore it on
+ the stack:
+
+ ovs-ofctl add-flow br0 actions=push:NXM_OF_IN_PORT[],\
+ load:0->NXM_OF_IN_PORT[],\
+ 2,3,4,5,6,\
+ pop:NXM_OF_IN_PORT[]
+
Contact
-------
#include "vport-netdev.h"
#define REHASH_FLOW_INTERVAL (10 * 60 * HZ)
-static void rehash_flow_table(struct work_struct *work);
-static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table);
int ovs_net_id __read_mostly;
/* Check if this is a duplicate flow */
flow = ovs_flow_lookup(table, &key);
if (!flow) {
+ struct flow_table *new_table = NULL;
struct sw_flow_mask *mask_p;
+
/* Bail out if we're not allowed to create a new flow. */
error = -ENOENT;
if (info->genlhdr->cmd == OVS_FLOW_CMD_SET)
goto err_unlock_ovs;
/* Expand table, if necessary, to make room. */
- if (ovs_flow_tbl_need_to_expand(table)) {
- struct flow_table *new_table;
-
+ if (ovs_flow_tbl_need_to_expand(table))
new_table = ovs_flow_tbl_expand(table);
- if (!IS_ERR(new_table)) {
- rcu_assign_pointer(dp->table, new_table);
- ovs_flow_tbl_destroy(table, true);
- table = ovsl_dereference(dp->table);
- }
+ else if (time_after(jiffies, dp->last_rehash + REHASH_FLOW_INTERVAL))
+ new_table = ovs_flow_tbl_rehash(table);
+
+ if (new_table && !IS_ERR(new_table)) {
+ rcu_assign_pointer(dp->table, new_table);
+ ovs_flow_tbl_destroy(table, true);
+ table = ovsl_dereference(dp->table);
+ dp->last_rehash = jiffies;
}
/* Allocate flow. */
return err;
}
-static void rehash_flow_table(struct work_struct *work)
-{
- struct datapath *dp;
- struct net *net;
-
- ovs_lock();
- rtnl_lock();
- for_each_net(net) {
- struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
-
- list_for_each_entry(dp, &ovs_net->dps, list_node) {
- struct flow_table *old_table = ovsl_dereference(dp->table);
- struct flow_table *new_table;
-
- new_table = ovs_flow_tbl_rehash(old_table);
- if (!IS_ERR(new_table)) {
- rcu_assign_pointer(dp->table, new_table);
- ovs_flow_tbl_destroy(old_table, true);
- }
- }
- }
- rtnl_unlock();
- ovs_unlock();
- schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL);
-}
-
static int __net_init ovs_init_net(struct net *net)
{
struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
pr_info("Open vSwitch switching datapath %s, built "__DATE__" "__TIME__"\n",
VERSION);
- err = ovs_workqueues_init();
- if (err)
- goto error;
-
err = ovs_flow_init();
if (err)
- goto error_wq;
+ goto error;
err = ovs_vport_init();
if (err)
if (err < 0)
goto error_unreg_notifier;
- schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL);
-
return 0;
error_unreg_notifier:
ovs_vport_exit();
error_flow_exit:
ovs_flow_exit();
-error_wq:
- ovs_workqueues_exit();
error:
return err;
}
static void dp_cleanup(void)
{
- cancel_delayed_work_sync(&rehash_flow_wq);
dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
unregister_netdevice_notifier(&ovs_dp_device_notifier);
unregister_pernet_device(&ovs_net_ops);
rcu_barrier();
ovs_vport_exit();
ovs_flow_exit();
- ovs_workqueues_exit();
}
module_init(dp_init);
* ovs_mutex and RCU.
* @stats_percpu: Per-CPU datapath statistics.
* @net: Reference to net namespace.
+ * @last_rehash: Timestamp of last rehash.
*
* Context: See the comment on locking at the top of datapath.c for additional
* locking information.
/* Network namespace ref. */
struct net *net;
#endif
+ unsigned long last_rehash;
};
/**
if (event == NETDEV_UNREGISTER) {
ovs_net = net_generic(dev_net(dev), ovs_net_id);
- queue_work(&ovs_net->dp_notify_work);
+ queue_work(system_wq, &ovs_net->dp_notify_work);
}
return NOTIFY_DONE;
if (type > OVS_KEY_ATTR_MAX) {
OVS_NLERR("Unknown key attribute (type=%d, max=%d).\n",
type, OVS_KEY_ATTR_MAX);
+ return -EINVAL;
}
if (attrs & (1ULL << type)) {
linux/compat/reciprocal_div.c \
linux/compat/skbuff-openvswitch.c \
linux/compat/vxlan.c \
- linux/compat/workqueue.c \
linux/compat/utils.c
openvswitch_headers += \
linux/compat/gso.h \
#ifndef __LINUX_WORKQUEUE_WRAPPER_H
#define __LINUX_WORKQUEUE_WRAPPER_H 1
-#include <linux/timer.h>
+#include_next <linux/workqueue.h>
-int __init ovs_workqueues_init(void);
-void ovs_workqueues_exit(void);
-
-/* Older kernels have an implementation of work queues with some very bad
- * characteristics when trying to cancel work (potential deadlocks, use after
- * free, etc. Therefore we implement simple ovs specific work queue using
- * single worker thread. work-queue API are kept similar for compatibility.
- * It seems it is useful even on newer kernel. As it can avoid system wide
- * freeze in event of softlockup due to workq blocked on genl_lock.
- */
-
-struct work_struct;
-
-typedef void (*work_func_t)(struct work_struct *work);
-
-#define work_data_bits(work) ((unsigned long *)(&(work)->data))
-
-struct work_struct {
-#define WORK_STRUCT_PENDING 0 /* T if work item pending execution */
- atomic_long_t data;
- struct list_head entry;
- work_func_t func;
-#ifdef CONFIG_LOCKDEP
- struct lockdep_map lockdep_map;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36)
+#define queue_work(wq, dw) schedule_work(dw);
#endif
-};
-
-#define WORK_DATA_INIT() ATOMIC_LONG_INIT(0)
-
-#define work_clear_pending(work) \
- clear_bit(WORK_STRUCT_PENDING, work_data_bits(work))
-
-struct delayed_work {
- struct work_struct work;
- struct timer_list timer;
-};
-
-#define __WORK_INITIALIZER(n, f) { \
- .data = WORK_DATA_INIT(), \
- .entry = { &(n).entry, &(n).entry }, \
- .func = (f), \
-}
-
-#define __DELAYED_WORK_INITIALIZER(n, f) { \
- .work = __WORK_INITIALIZER((n).work, (f)), \
- .timer = TIMER_INITIALIZER(NULL, 0, 0), \
-}
-
-#define DECLARE_DELAYED_WORK(n, f) \
- struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f)
-
-#define schedule_delayed_work rpl_schedule_delayed_work
-int schedule_delayed_work(struct delayed_work *dwork, unsigned long delay);
-
-#define cancel_delayed_work_sync rpl_cancel_delayed_work_sync
-int cancel_delayed_work_sync(struct delayed_work *dwork);
-
-#define INIT_WORK(_work, _func) \
- do { \
- (_work)->data = (atomic_long_t) WORK_DATA_INIT(); \
- INIT_LIST_HEAD(&(_work)->entry); \
- (_work)->func = (_func); \
- } while (0)
-
-extern void flush_scheduled_work(void);
-extern void queue_work(struct work_struct *work);
-extern bool cancel_work_sync(struct work_struct *work);
#endif
#define PACKET_RCVD 0
#define PACKET_REJECT 1
-int iptunnel_xmit(struct net *net, struct rtable *rt,
+int iptunnel_xmit(struct rtable *rt,
struct sk_buff *skb,
__be32 src, __be32 dst, __u8 proto,
__u8 tos, __u8 ttl, __be16 df);
void vxlan_sock_release(struct vxlan_sock *vs);
-int vxlan_xmit_skb(struct net *net, struct vxlan_sock *vs,
+int vxlan_xmit_skb(struct vxlan_sock *vs,
struct rtable *rt, struct sk_buff *skb,
__be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
__be16 src_port, __be16 dst_port, __be32 vni);
#include "compat.h"
#include "gso.h"
-int iptunnel_xmit(struct net *net, struct rtable *rt,
+int iptunnel_xmit(struct rtable *rt,
struct sk_buff *skb,
__be32 src, __be32 dst, __u8 proto,
__u8 tos, __u8 ttl, __be16 df)
return 0;
}
-int vxlan_xmit_skb(struct net *net, struct vxlan_sock *vs,
+int vxlan_xmit_skb(struct vxlan_sock *vs,
struct rtable *rt, struct sk_buff *skb,
__be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
__be16 src_port, __be16 dst_port, __be32 vni)
if (err)
return err;
- return iptunnel_xmit(net, rt, skb, src, dst,
- IPPROTO_UDP, tos, ttl, df);
+ return iptunnel_xmit(rt, skb, src, dst, IPPROTO_UDP, tos, ttl, df);
}
static void rcu_free_vs(struct rcu_head *rcu)
hlist_del_rcu(&vs->hlist);
spin_unlock(&vn->sock_lock);
- queue_work(&vs->del_work);
+ queue_work(system_wq, &vs->del_work);
}
static int vxlan_init_net(struct net *net)
+++ /dev/null
-/*
- * Derived from the kernel/workqueue.c
- *
- * This is the generic async execution mechanism. Work items as are
- * executed in process context.
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <linux/signal.h>
-#include <linux/completion.h>
-#include <linux/workqueue.h>
-#include <linux/slab.h>
-#include <linux/cpu.h>
-#include <linux/notifier.h>
-#include <linux/kthread.h>
-#include <linux/hardirq.h>
-#include <linux/mempolicy.h>
-#include <linux/kallsyms.h>
-#include <linux/debug_locks.h>
-#include <linux/lockdep.h>
-#include <linux/idr.h>
-
-static spinlock_t wq_lock;
-static struct list_head workq;
-static wait_queue_head_t more_work;
-static struct task_struct *workq_thread;
-static struct work_struct *current_work;
-
-static void add_work_to_ovs_wq(struct work_struct *work)
-{
- list_add_tail(&work->entry, &workq);
- wake_up(&more_work);
-}
-static void __queue_work(struct work_struct *work)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&wq_lock, flags);
- add_work_to_ovs_wq(work);
- spin_unlock_irqrestore(&wq_lock, flags);
-}
-
-void queue_work(struct work_struct *work)
-{
- if (test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work)))
- return;
- __queue_work(work);
-}
-
-static void _delayed_work_timer_fn(unsigned long __data)
-{
- struct delayed_work *dwork = (struct delayed_work *)__data;
- __queue_work(&dwork->work);
-}
-
-static void __queue_delayed_work(struct delayed_work *dwork,
- unsigned long delay)
-{
- struct timer_list *timer = &dwork->timer;
- struct work_struct *work = &dwork->work;
-
- BUG_ON(timer_pending(timer));
- BUG_ON(!list_empty(&work->entry));
-
- timer->expires = jiffies + delay;
- timer->data = (unsigned long)dwork;
- timer->function = _delayed_work_timer_fn;
-
- add_timer(timer);
-}
-
-int schedule_delayed_work(struct delayed_work *dwork, unsigned long delay)
-{
- if (test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(&dwork->work)))
- return 0;
-
- if (delay == 0)
- __queue_work(&dwork->work);
- else
- __queue_delayed_work(dwork, delay);
-
- return 1;
-}
-
-struct wq_barrier {
- struct work_struct work;
- struct completion done;
-};
-
-static void wq_barrier_func(struct work_struct *work)
-{
- struct wq_barrier *barr = container_of(work, struct wq_barrier, work);
- complete(&barr->done);
-}
-
-static void workqueue_barrier(struct work_struct *work)
-{
- bool need_barrier;
- struct wq_barrier barr;
-
- spin_lock_irq(&wq_lock);
- if (current_work != work)
- need_barrier = false;
- else {
- INIT_WORK(&barr.work, wq_barrier_func);
- init_completion(&barr.done);
- add_work_to_ovs_wq(&barr.work);
- need_barrier = true;
- }
- spin_unlock_irq(&wq_lock);
-
- if (need_barrier)
- wait_for_completion(&barr.done);
-}
-
-static int try_to_grab_pending(struct work_struct *work)
-{
- int ret;
-
- BUG_ON(in_interrupt());
-
- if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work)))
- return 0;
-
- spin_lock_irq(&wq_lock);
- if (!list_empty(&work->entry)) {
- list_del_init(&work->entry);
- ret = 0;
- } else
- /* Already executed, retry. */
- ret = -1;
- spin_unlock_irq(&wq_lock);
-
- return ret;
-}
-
-static int __cancel_work_timer(struct work_struct *work,
- struct timer_list *timer)
-{
- int ret;
-
- for (;;) {
- ret = (timer && likely(del_timer(timer)));
- if (ret) /* Was active timer, return true. */
- break;
-
- /* Inactive timer case */
- ret = try_to_grab_pending(work);
- if (!ret)
- break;
- }
- workqueue_barrier(work);
- work_clear_pending(work);
- return ret;
-}
-
-int cancel_delayed_work_sync(struct delayed_work *dwork)
-{
- return __cancel_work_timer(&dwork->work, &dwork->timer);
-}
-
-bool cancel_work_sync(struct work_struct *work)
-{
- return __cancel_work_timer(work, NULL);
-}
-
-static void run_workqueue(void)
-{
- spin_lock_irq(&wq_lock);
- while (!list_empty(&workq)) {
- struct work_struct *work = list_entry(workq.next,
- struct work_struct, entry);
-
- work_func_t f = work->func;
- list_del_init(workq.next);
- current_work = work;
- spin_unlock_irq(&wq_lock);
-
- work_clear_pending(work);
- f(work);
-
- BUG_ON(in_interrupt());
- spin_lock_irq(&wq_lock);
- current_work = NULL;
- }
- spin_unlock_irq(&wq_lock);
-}
-
-static int worker_thread(void *dummy)
-{
- for (;;) {
- wait_event_interruptible(more_work,
- (kthread_should_stop() || !list_empty(&workq)));
-
- if (kthread_should_stop())
- break;
-
- run_workqueue();
- }
-
- return 0;
-}
-
-int __init ovs_workqueues_init(void)
-{
- spin_lock_init(&wq_lock);
- INIT_LIST_HEAD(&workq);
- init_waitqueue_head(&more_work);
-
- workq_thread = kthread_create(worker_thread, NULL, "ovs_workq");
- if (IS_ERR(workq_thread))
- return PTR_ERR(workq_thread);
-
- wake_up_process(workq_thread);
- return 0;
-}
-
-void ovs_workqueues_exit(void)
-{
- BUG_ON(!list_empty(&workq));
- kthread_stop(workq_thread);
-}
int tunnel_hlen,
__be32 seq, __be16 gre64_flag)
{
- struct net *net = ovs_dp_get_net(vport->dp);
struct rtable *rt;
int min_headroom;
__be16 df;
skb->local_df = 1;
- return iptunnel_xmit(net, rt, skb, saddr,
+ return iptunnel_xmit(rt, skb, saddr,
OVS_CB(skb)->tun_key->ipv4_dst, IPPROTO_GRE,
OVS_CB(skb)->tun_key->ipv4_tos,
OVS_CB(skb)->tun_key->ipv4_ttl, df);
#include <net/xfrm.h>
#include "datapath.h"
+#include "gso.h"
#include "vport.h"
/*
/* Compute source UDP port for outgoing packet.
* Currently we use the flow hash.
*/
-static u16 ovs_tnl_get_src_port(struct sk_buff *skb)
+static u16 get_src_port(struct sk_buff *skb)
{
int low;
int high;
}
static void lisp_build_header(const struct vport *vport,
- struct sk_buff *skb,
- int tunnel_hlen)
+ struct sk_buff *skb)
{
struct lisp_port *lisp_port = lisp_vport(vport);
struct udphdr *udph = udp_hdr(skb);
const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key;
udph->dest = lisp_port->dst_port;
- udph->source = htons(ovs_tnl_get_src_port(skb));
+ udph->source = htons(get_src_port(skb));
udph->check = 0;
udph->len = htons(skb->len - skb_transport_offset(skb));
lisph->u2.word2.locator_status_bits = 1;
}
-/**
- * ovs_tnl_rcv - ingress point for generic tunnel code
- *
- * @vport: port this packet was received on
- * @skb: received packet
- * @tun_key: tunnel that carried packet
- *
- * Must be called with rcu_read_lock.
- *
- * Packets received by this function are in the following state:
- * - skb->data points to the inner Ethernet header.
- * - The inner Ethernet header is in the linear data area.
- * - The layer pointers are undefined.
- */
-static void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb,
- struct ovs_key_ipv4_tunnel *tun_key)
-{
- struct ethhdr *eh;
-
- skb_reset_mac_header(skb);
- eh = eth_hdr(skb);
-
- if (likely(ntohs(eh->h_proto) >= ETH_P_802_3_MIN))
- skb->protocol = eh->h_proto;
- else
- skb->protocol = htons(ETH_P_802_2);
-
- skb_dst_drop(skb);
- nf_reset(skb);
- skb_clear_rxhash(skb);
- secpath_reset(skb);
- vlan_set_tci(skb, 0);
-
- ovs_vport_receive(vport, skb, tun_key);
-}
-
/* Called with rcu_read_lock and BH disabled. */
static int lisp_rcv(struct sock *sk, struct sk_buff *skb)
{
if (unlikely(!lisp_port))
goto error;
- if (unlikely(!pskb_may_pull(skb, LISP_HLEN)))
+ if (iptunnel_pull_header(skb, LISP_HLEN, 0))
goto error;
lisph = lisp_hdr(skb);
- skb_pull_rcsum(skb, LISP_HLEN);
-
if (lisph->instance_id_present != 1)
key = 0;
else
default:
goto error;
}
+ skb->protocol = protocol;
/* Add Ethernet header */
ethh = (struct ethhdr *)skb_push(skb, ETH_HLEN);
ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN);
- ovs_tnl_rcv(vport_from_priv(lisp_port), skb, &tun_key);
+ ovs_vport_receive(vport_from_priv(lisp_port), skb, &tun_key);
goto out;
error:
return ERR_PTR(err);
}
-static bool need_linearize(const struct sk_buff *skb)
+static void lisp_fix_segment(struct sk_buff *skb)
{
- int i;
-
- if (unlikely(skb_shinfo(skb)->frag_list))
- return true;
-
- /*
- * Generally speaking we should linearize if there are paged frags.
- * However, if all of the refcounts are 1 we know nobody else can
- * change them from underneath us and we can skip the linearization.
- */
- for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- if (unlikely(page_count(skb_frag_page(&skb_shinfo(skb)->frags[i])) > 1))
- return true;
+ struct udphdr *udph = udp_hdr(skb);
- return false;
+ udph->len = htons(skb->len - skb_transport_offset(skb));
}
-static struct sk_buff *handle_offloads(struct sk_buff *skb)
+static void handle_offloads(struct sk_buff *skb)
{
- int err;
-
- if (skb_is_gso(skb)) {
- struct sk_buff *nskb;
- char cb[sizeof(skb->cb)];
-
- memcpy(cb, skb->cb, sizeof(cb));
-
- nskb = __skb_gso_segment(skb, 0, false);
- if (IS_ERR(nskb)) {
- err = PTR_ERR(nskb);
- goto error;
- }
-
- consume_skb(skb);
- skb = nskb;
- while (nskb) {
- memcpy(nskb->cb, cb, sizeof(cb));
- nskb = nskb->next;
- }
- } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
- /* Pages aren't locked and could change at any time.
- * If this happens after we compute the checksum, the
- * checksum will be wrong. We linearize now to avoid
- * this problem.
- */
- if (unlikely(need_linearize(skb))) {
- err = __skb_linearize(skb);
- if (unlikely(err))
- goto error;
- }
-
- err = skb_checksum_help(skb);
- if (unlikely(err))
- goto error;
- }
-
- skb->ip_summed = CHECKSUM_NONE;
- return skb;
-
-error:
- return ERR_PTR(err);
+ if (skb_is_gso(skb))
+ OVS_GSO_CB(skb)->fix_segment = lisp_fix_segment;
+ else if (skb->ip_summed != CHECKSUM_PARTIAL)
+ skb->ip_summed = CHECKSUM_NONE;
}
-static int ovs_tnl_send(struct vport *vport, struct sk_buff *skb,
- u8 ipproto, int tunnel_hlen,
- void (*build_header)(const struct vport *,
- struct sk_buff *,
- int tunnel_hlen))
+static int lisp_send(struct vport *vport, struct sk_buff *skb)
{
- int min_headroom;
+ int network_offset = skb_network_offset(skb);
struct rtable *rt;
+ int min_headroom;
__be32 saddr;
- int sent_len = 0;
+ __be16 df;
+ int sent_len;
int err;
- struct sk_buff *nskb;
+
+ if (unlikely(!OVS_CB(skb)->tun_key))
+ return -EINVAL;
+
+ if (skb->protocol != htons(ETH_P_IP) &&
+ skb->protocol != htons(ETH_P_IPV6)) {
+ kfree_skb(skb);
+ return 0;
+ }
/* Route lookup */
saddr = OVS_CB(skb)->tun_key->ipv4_src;
rt = find_route(ovs_dp_get_net(vport->dp),
&saddr,
OVS_CB(skb)->tun_key->ipv4_dst,
- ipproto,
+ IPPROTO_UDP,
OVS_CB(skb)->tun_key->ipv4_tos,
skb->mark);
if (IS_ERR(rt)) {
goto error;
}
- tunnel_hlen += sizeof(struct iphdr);
-
min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len
- + tunnel_hlen
- + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
+ + sizeof(struct iphdr) + LISP_HLEN;
if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
int head_delta = SKB_DATA_ALIGN(min_headroom -
goto err_free_rt;
}
+ /* Reset l2 headers. */
+ skb_pull(skb, network_offset);
+ skb_reset_mac_header(skb);
+ vlan_set_tci(skb, 0);
+
+ skb_reset_inner_headers(skb);
+
+ __skb_push(skb, LISP_HLEN);
+ skb_reset_transport_header(skb);
+
+ lisp_build_header(vport, skb);
+
/* Offloading */
- nskb = handle_offloads(skb);
- if (IS_ERR(nskb)) {
- err = PTR_ERR(nskb);
- goto err_free_rt;
- }
- skb = nskb;
-
- /* Reset SKB */
- nf_reset(skb);
- secpath_reset(skb);
- skb_dst_drop(skb);
- skb_clear_rxhash(skb);
-
- while (skb) {
- struct sk_buff *next_skb = skb->next;
- struct iphdr *iph;
- int frag_len;
-
- skb->next = NULL;
-
- if (vlan_tx_tag_present(skb)) {
- if (unlikely(!__vlan_put_tag(skb,
- skb->vlan_proto,
- vlan_tx_tag_get(skb))))
- goto next;
-
- vlan_set_tci(skb, 0);
- }
-
- frag_len = skb->len;
- skb_push(skb, tunnel_hlen);
- skb_reset_network_header(skb);
- skb_set_transport_header(skb, sizeof(struct iphdr));
-
- if (next_skb)
- skb_dst_set(skb, dst_clone(&rt_dst(rt)));
- else
- skb_dst_set(skb, &rt_dst(rt));
-
- /* Push Tunnel header. */
- build_header(vport, skb, tunnel_hlen);
-
- /* Push IP header. */
- iph = ip_hdr(skb);
- iph->version = 4;
- iph->ihl = sizeof(struct iphdr) >> 2;
- iph->protocol = ipproto;
- iph->daddr = OVS_CB(skb)->tun_key->ipv4_dst;
- iph->saddr = saddr;
- iph->tos = OVS_CB(skb)->tun_key->ipv4_tos;
- iph->ttl = OVS_CB(skb)->tun_key->ipv4_ttl;
- iph->frag_off = OVS_CB(skb)->tun_key->tun_flags &
+ handle_offloads(skb);
+ skb->local_df = 1;
+
+ df = OVS_CB(skb)->tun_key->tun_flags &
TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
- /*
- * Allow our local IP stack to fragment the outer packet even
- * if the DF bit is set as a last resort. We also need to
- * force selection of an IP ID here with __ip_select_ident(),
- * as ip_select_ident() assumes a proper ID is not needed when
- * when the DF bit is set.
- */
- skb->local_df = 1;
- __ip_select_ident(iph, skb_dst(skb), 0);
-
- memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
-
- err = ip_local_out(skb);
- if (unlikely(net_xmit_eval(err)))
- goto next;
-
- sent_len += frag_len;
-
-next:
- skb = next_skb;
- }
+ sent_len = iptunnel_xmit(rt, skb,
+ saddr, OVS_CB(skb)->tun_key->ipv4_dst,
+ IPPROTO_UDP, OVS_CB(skb)->tun_key->ipv4_tos,
+ OVS_CB(skb)->tun_key->ipv4_ttl, df);
- return sent_len;
+ return sent_len > 0 ? sent_len + network_offset : sent_len;
err_free_rt:
ip_rt_put(rt);
return err;
}
-static int lisp_tnl_send(struct vport *vport, struct sk_buff *skb)
-{
- int tnl_len;
- int network_offset = skb_network_offset(skb);
-
- if (unlikely(!OVS_CB(skb)->tun_key))
- return -EINVAL;
-
- /* We only encapsulate IPv4 and IPv6 packets */
- switch (skb->protocol) {
- case htons(ETH_P_IP):
- case htons(ETH_P_IPV6):
- /* Pop off "inner" Ethernet header */
- skb_pull(skb, network_offset);
- tnl_len = ovs_tnl_send(vport, skb, IPPROTO_UDP,
- LISP_HLEN, lisp_build_header);
- return tnl_len > 0 ? tnl_len + network_offset : tnl_len;
- default:
- kfree_skb(skb);
- return 0;
- }
-}
-
static const char *lisp_get_name(const struct vport *vport)
{
struct lisp_port *lisp_port = lisp_vport(vport);
.destroy = lisp_tnl_destroy,
.get_name = lisp_get_name,
.get_options = lisp_get_options,
- .send = lisp_tnl_send,
+ .send = lisp_send,
};
{
struct vxlan_port *vxlan_port = vxlan_vport(vport);
__be16 dst_port = inet_sport(vxlan_port->vs->sock->sk);
- struct net *net = ovs_dp_get_net(vport->dp);
struct rtable *rt;
__be16 src_port;
__be32 saddr;
inet_get_local_port_range(&port_min, &port_max);
src_port = vxlan_src_port(port_min, port_max, skb);
- err = vxlan_xmit_skb(net, vxlan_port->vs, rt, skb,
+ err = vxlan_xmit_skb(vxlan_port->vs, rt, skb,
saddr, OVS_CB(skb)->tun_key->ipv4_dst,
OVS_CB(skb)->tun_key->ipv4_tos,
OVS_CB(skb)->tun_key->ipv4_ttl, df,
These flags are used in ofp_table_stats messages to describe the current
configuration and in ofp_table_mod messages to configure table behavior. */
enum ofp11_table_config {
- OFPTC11_TABLE_MISS_CONTROLLER = 0, /* Send to controller. */
+ OFPTC11_TABLE_MISS_CONTROLLER = 0 << 0, /* Send to controller. */
OFPTC11_TABLE_MISS_CONTINUE = 1 << 0, /* Continue to the next table in the
pipeline (OpenFlow 1.0
behavior). */
- OFPTC11_TABLE_MISS_DROP = 1 << 1, /* Drop the packet. */
- OFPTC11_TABLE_MISS_MASK = 3
+ OFPTC11_TABLE_MISS_DROP = 2 << 0, /* Drop the packet. */
+#define OFPTC11_TABLE_MISS_MASK (3 << 0)
};
/* Flow setup and teardown (controller -> datapath). */
struct dp_netdev_port *port;
struct netdev *netdev;
struct netdev_rx *rx;
+ enum netdev_flags flags;
const char *open_type;
int mtu;
int error;
if (error) {
return error;
}
- /* XXX reject loopback devices */
/* XXX reject non-Ethernet devices */
+ netdev_get_flags(netdev, &flags);
+ if (flags & NETDEV_LOOPBACK) {
+ VLOG_ERR("%s: cannot add a loopback device", devname);
+ netdev_close(netdev);
+ return EINVAL;
+ }
+
error = netdev_rx_open(netdev, &rx);
if (error
&& !(error == EOPNOTSUPP && dpif_netdev_class_is_dummy(dp->class))) {
case OFPTYPE_FLOW_MOD:
case OFPTYPE_GROUP_MOD:
case OFPTYPE_PORT_MOD:
+ case OFPTYPE_TABLE_MOD:
case OFPTYPE_BARRIER_REQUEST:
case OFPTYPE_BARRIER_REPLY:
case OFPTYPE_QUEUE_GET_CONFIG_REQUEST:
iff |= IFF_PPROMISC;
#endif
}
+ if (nd & NETDEV_LOOPBACK) {
+ iff |= IFF_LOOPBACK;
+ }
return iff;
}
if (iff & IFF_PROMISC) {
nd |= NETDEV_PROMISC;
}
+ if (iff & IFF_LOOPBACK) {
+ nd |= NETDEV_LOOPBACK;
+ }
return nd;
}
if (nd & NETDEV_PROMISC) {
iff |= IFF_PROMISC;
}
+ if (nd & NETDEV_LOOPBACK) {
+ iff |= IFF_LOOPBACK;
+ }
return iff;
}
if (iff & IFF_PROMISC) {
nd |= NETDEV_PROMISC;
}
+ if (iff & IFF_LOOPBACK) {
+ nd |= NETDEV_LOOPBACK;
+ }
return nd;
}
/* OFPT 1.1+ (16): struct ofp11_port_mod. */
OFPRAW_OFPT11_PORT_MOD,
+ /* OFPT 1.1+ (17): struct ofp11_table_mod. */
+ OFPRAW_OFPT11_TABLE_MOD,
+
/* OFPT 1.0 (18): void. */
OFPRAW_OFPT10_BARRIER_REQUEST,
/* OFPT 1.1+ (20): void. */
OFPTYPE_GROUP_MOD, /* OFPRAW_OFPT11_GROUP_MOD. */
OFPTYPE_PORT_MOD, /* OFPRAW_OFPT10_PORT_MOD.
* OFPRAW_OFPT11_PORT_MOD. */
+ OFPTYPE_TABLE_MOD, /* OFPRAW_OFPT11_TABLE_MOD. */
/* Barrier messages. */
OFPTYPE_BARRIER_REQUEST, /* OFPRAW_OFPT10_BARRIER_REQUEST.
return error;
}
+/* Convert 'table_id' and 'flow_miss_handling' (as described for the
+ * "mod-table" command in the ovs-ofctl man page) into 'tm' for sending the
+ * specified table_mod 'command' to a switch.
+ *
+ * Returns NULL if successful, otherwise a malloc()'d string describing the
+ * error. The caller is responsible for freeing the returned string. */
+char * WARN_UNUSED_RESULT
+parse_ofp_table_mod(struct ofputil_table_mod *tm, const char *table_id,
+ const char *flow_miss_handling,
+ enum ofputil_protocol *usable_protocols)
+{
+ /* Table mod requires at least OF 1.1. */
+ *usable_protocols = OFPUTIL_P_OF11_UP;
+
+ if (!strcasecmp(table_id, "all")) {
+ tm->table_id = 255;
+ } else {
+ char *error = str_to_u8(table_id, "table_id", &tm->table_id);
+ if (error) {
+ return error;
+ }
+ }
+
+ if (strcmp(flow_miss_handling, "controller") == 0) {
+ tm->config = OFPTC11_TABLE_MISS_CONTROLLER;
+ } else if (strcmp(flow_miss_handling, "continue") == 0) {
+ tm->config = OFPTC11_TABLE_MISS_CONTINUE;
+ } else if (strcmp(flow_miss_handling, "drop") == 0) {
+ tm->config = OFPTC11_TABLE_MISS_DROP;
+ } else {
+ return xasprintf("invalid flow_miss_handling %s", flow_miss_handling);
+ }
+
+ if (tm->table_id == 0xfe && tm->config == OFPTC11_TABLE_MISS_CONTINUE) {
+ return xstrdup("last table's flow miss handling can not be continue");
+ }
+
+ return NULL;
+}
+
+
/* Opens file 'file_name' and reads each line as a flow_mod of the specified
* type (one of OFPFC_*). Stores each flow_mod in '*fm', an array allocated
* on the caller's behalf, and the number of flow_mods in '*n_fms'.
struct ofputil_flow_stats_request;
struct ofputil_group_mod;
struct ofputil_meter_mod;
+struct ofputil_table_mod;
enum ofputil_protocol;
char *parse_ofp_str(struct ofputil_flow_mod *, int command, const char *str_,
uint16_t command,
enum ofputil_protocol *usable_protocols)
WARN_UNUSED_RESULT;
+
+char *parse_ofp_table_mod(struct ofputil_table_mod *,
+ const char *table_id, const char *flow_miss_handling,
+ enum ofputil_protocol *usable_protocols)
+ WARN_UNUSED_RESULT;
+
char *parse_ofp_flow_mod_file(const char *file_name, uint16_t command,
struct ofputil_flow_mod **fms, size_t *n_fms,
enum ofputil_protocol *usable_protocols)
}
}
+static void
+ofp_print_table_miss_config(struct ds *string, const uint32_t config)
+{
+ uint32_t table_miss_config = config & OFPTC11_TABLE_MISS_MASK;
+
+ switch (table_miss_config) {
+ case OFPTC11_TABLE_MISS_CONTROLLER:
+ ds_put_cstr(string, "controller\n");
+ break;
+ case OFPTC11_TABLE_MISS_CONTINUE:
+ ds_put_cstr(string, "continue\n");
+ break;
+ case OFPTC11_TABLE_MISS_DROP:
+ ds_put_cstr(string, "drop\n");
+ break;
+ default:
+ ds_put_cstr(string, "Unknown\n");
+ break;
+ }
+}
+
+static void
+ofp_print_table_mod(struct ds *string, const struct ofp_header *oh)
+{
+ struct ofputil_table_mod pm;
+ enum ofperr error;
+
+ error = ofputil_decode_table_mod(oh, &pm);
+ if (error) {
+ ofp_print_error(string, error);
+ return;
+ }
+
+ if (pm.table_id == 0xff) {
+ ds_put_cstr(string, " table_id: ALL_TABLES");
+ } else {
+ ds_put_format(string, " table_id=%"PRIu8, pm.table_id);
+ }
+
+ ds_put_cstr(string, ", flow_miss_config=");
+ ofp_print_table_miss_config(string, pm.config);
+}
+
static void
ofp_print_meter_flags(struct ds *s, uint16_t flags)
{
case OFPTYPE_QUEUE_GET_CONFIG_REQUEST:
case OFPTYPE_QUEUE_GET_CONFIG_REPLY:
- case OFPTYPE_GET_ASYNC_REQUEST:
- case OFPTYPE_GET_ASYNC_REPLY:
case OFPTYPE_TABLE_FEATURES_STATS_REQUEST:
case OFPTYPE_TABLE_FEATURES_STATS_REPLY:
ofp_print_not_implemented(string);
ofp_print_port_mod(string, oh);
break;
+ case OFPTYPE_TABLE_MOD:
+ ofp_print_table_mod(string, oh);
+ break;
+
case OFPTYPE_METER_MOD:
ofp_print_meter_mod(string, oh);
break;
ofp_print_nxt_set_controller_id(string, ofpmsg_body(oh));
break;
+ case OFPTYPE_GET_ASYNC_REPLY:
case OFPTYPE_SET_ASYNC_CONFIG:
ofp_print_nxt_set_async_config(string, ofpmsg_body(oh));
break;
-
+ case OFPTYPE_GET_ASYNC_REQUEST:
+ break;
case OFPTYPE_FLOW_MONITOR_CANCEL:
ofp_print_nxt_flow_monitor_cancel(string, msg);
break;
ofpmsg_update_length(b);
return b;
}
-\f
+
/* ofputil_port_mod */
/* Decodes the OpenFlow "port mod" message in '*oh' into an abstract form in
opm->advertise = netdev_port_features_to_ofp11(pm->advertise);
break;
}
+ default:
+ NOT_REACHED();
+ }
+
+ return b;
+}
+
+/* ofputil_table_mod */
+
+/* Decodes the OpenFlow "table mod" message in '*oh' into an abstract form in
+ * '*pm'. Returns 0 if successful, otherwise an OFPERR_* value. */
+enum ofperr
+ofputil_decode_table_mod(const struct ofp_header *oh,
+ struct ofputil_table_mod *pm)
+{
+ enum ofpraw raw;
+ struct ofpbuf b;
+
+ ofpbuf_use_const(&b, oh, ntohs(oh->length));
+ raw = ofpraw_pull_assert(&b);
+
+ if (raw == OFPRAW_OFPT11_TABLE_MOD) {
+ const struct ofp11_table_mod *otm = b.data;
+
+ pm->table_id = otm->table_id;
+ pm->config = ntohl(otm->config);
+ } else {
+ return OFPERR_OFPBRC_BAD_TYPE;
+ }
+ return 0;
+}
+
+/* Converts the abstract form of a "table mod" message in '*pm' into an OpenFlow
+ * message suitable for 'protocol', and returns that encoded form in a buffer
+ * owned by the caller. */
+struct ofpbuf *
+ofputil_encode_table_mod(const struct ofputil_table_mod *pm,
+ enum ofputil_protocol protocol)
+{
+ enum ofp_version ofp_version = ofputil_protocol_to_ofp_version(protocol);
+ struct ofpbuf *b;
+
+ switch (ofp_version) {
+ case OFP10_VERSION: {
+ ovs_fatal(0, "table mod needs OpenFlow 1.1 or later "
+ "(\'-O OpenFlow11\')");
+ break;
+ }
+ case OFP11_VERSION:
+ case OFP12_VERSION:
+ case OFP13_VERSION: {
+ struct ofp11_table_mod *otm;
+
+ b = ofpraw_alloc(OFPRAW_OFPT11_TABLE_MOD, ofp_version, 0);
+ otm = ofpbuf_put_zeros(b, sizeof *otm);
+ otm->table_id = pm->table_id;
+ otm->config = htonl(pm->config);
+ break;
+ }
default:
NOT_REACHED();
}
/* Protocol-independent flow_mod flags. */
enum ofputil_flow_mod_flags {
+ /* Flags that are maintained with a flow as part of its state.
+ *
+ * (OFPUTIL_FF_EMERG would be here too, if OVS supported it.) */
OFPUTIL_FF_SEND_FLOW_REM = 1 << 0, /* All versions. */
- OFPUTIL_FF_CHECK_OVERLAP = 1 << 1, /* All versions. */
- OFPUTIL_FF_EMERG = 1 << 2, /* OpenFlow 1.0 only. */
- OFPUTIL_FF_RESET_COUNTS = 1 << 3, /* OpenFlow 1.2+. */
- OFPUTIL_FF_NO_PKT_COUNTS = 1 << 4, /* OpenFlow 1.3+. */
- OFPUTIL_FF_NO_BYT_COUNTS = 1 << 5 /* OpenFlow 1.3+. */
+ OFPUTIL_FF_NO_PKT_COUNTS = 1 << 1, /* OpenFlow 1.3+. */
+ OFPUTIL_FF_NO_BYT_COUNTS = 1 << 2, /* OpenFlow 1.3+. */
+#define OFPUTIL_FF_STATE (OFPUTIL_FF_SEND_FLOW_REM \
+ | OFPUTIL_FF_NO_PKT_COUNTS \
+ | OFPUTIL_FF_NO_BYT_COUNTS)
+
+ /* Flags that affect flow_mod behavior but are not part of flow state. */
+ OFPUTIL_FF_CHECK_OVERLAP = 1 << 3, /* All versions. */
+ OFPUTIL_FF_EMERG = 1 << 4, /* OpenFlow 1.0 only. */
+ OFPUTIL_FF_RESET_COUNTS = 1 << 5, /* OpenFlow 1.2+. */
};
/* Protocol-independent flow_mod.
struct ofpbuf *ofputil_encode_port_mod(const struct ofputil_port_mod *,
enum ofputil_protocol);
+/* Abstract ofp_table_mod. */
+struct ofputil_table_mod {
+ uint8_t table_id; /* ID of the table, 0xff indicates all tables. */
+ uint32_t config;
+};
+
+enum ofperr ofputil_decode_table_mod(const struct ofp_header *,
+ struct ofputil_table_mod *);
+struct ofpbuf *ofputil_encode_table_mod(const struct ofputil_table_mod *,
+ enum ofputil_protocol);
+
/* Meter band configuration for all supported band types. */
struct ofputil_meter_band {
uint16_t type;
case OFPTYPE_FLOW_MOD:
case OFPTYPE_GROUP_MOD:
case OFPTYPE_PORT_MOD:
+ case OFPTYPE_TABLE_MOD:
case OFPTYPE_METER_MOD:
case OFPTYPE_BARRIER_REQUEST:
case OFPTYPE_BARRIER_REPLY:
{
memset(c, 0, sizeof *c);
c->id = id;
+ ovs_rwlock_init(&c->rwlock);
xclock_gettime(c->id, &c->cache);
}
void
set_subprogram_name(const char *name)
{
+ const char *pname = name[0] ? name : program_name;
free(subprogram_name_set(xstrdup(name)));
#if HAVE_GLIBC_PTHREAD_SETNAME_NP
- pthread_setname_np(pthread_self(), name);
+ pthread_setname_np(pthread_self(), pname);
#elif HAVE_NETBSD_PTHREAD_SETNAME_NP
- pthread_setname_np(pthread_self(), "%s", name);
+ pthread_setname_np(pthread_self(), "%s", pname);
#elif HAVE_PTHREAD_SET_NAME_NP
- pthread_set_name_np(pthread_self(), name);
+ pthread_set_name_np(pthread_self(), pname);
#endif
}
memcpy(ofconn->slave_async_config, slave_masks, size);
}
+void
+ofconn_get_async_config(struct ofconn *ofconn,
+ uint32_t *master_masks, uint32_t *slave_masks)
+{
+ size_t size = sizeof ofconn->master_async_config;
+ memcpy(master_masks, ofconn->master_async_config, size);
+ memcpy(slave_masks, ofconn->slave_async_config, size);
+}
+
/* Sends 'msg' on 'ofconn', accounting it as a reply. (If there is a
* sufficient number of OpenFlow replies in-flight on a single ofconn, then the
* connmgr will stop accepting new OpenFlow requests on that ofconn until the
void ofconn_set_async_config(struct ofconn *,
const uint32_t master_masks[OAM_N_TYPES],
const uint32_t slave_masks[OAM_N_TYPES]);
+void ofconn_get_async_config(struct ofconn *,
+ uint32_t *master_masks,
+ uint32_t *slave_masks);
void ofconn_send_reply(const struct ofconn *, struct ofpbuf *);
void ofconn_send_replies(const struct ofconn *, struct list *);
bundle_send_learning_packets(struct ofbundle *bundle)
{
struct ofproto_dpif *ofproto = bundle->ofproto;
+ struct ofpbuf *learning_packet;
int error, n_packets, n_errors;
struct mac_entry *e;
+ struct list packets;
- error = n_packets = n_errors = 0;
+ list_init(&packets);
ovs_rwlock_rdlock(&ofproto->ml->rwlock);
LIST_FOR_EACH (e, lru_node, &ofproto->ml->lrus) {
if (e->port.p != bundle) {
- struct ofpbuf *learning_packet;
- struct ofport_dpif *port;
void *port_void;
- int ret;
- /* The assignment to "port" is unnecessary but makes "grep"ing for
- * struct ofport_dpif more effective. */
learning_packet = bond_compose_learning_packet(bundle->bond,
e->mac, e->vlan,
&port_void);
- port = port_void;
- ret = send_packet(port, learning_packet);
- ofpbuf_delete(learning_packet);
- if (ret) {
- error = ret;
- n_errors++;
- }
- n_packets++;
+ learning_packet->private_p = port_void;
+ list_push_back(&packets, &learning_packet->list_node);
}
}
ovs_rwlock_unlock(&ofproto->ml->rwlock);
+ error = n_packets = n_errors = 0;
+ LIST_FOR_EACH (learning_packet, list_node, &packets) {
+ int ret;
+
+ ret = send_packet(learning_packet->private_p, learning_packet);
+ if (ret) {
+ error = ret;
+ n_errors++;
+ }
+ n_packets++;
+ }
+ ofpbuf_list_delete(&packets);
+
if (n_errors) {
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
VLOG_WARN_RL(&rl, "bond %s: %d errors sending %d gratuitous learning "
long long int modified; /* Time of last modification. */
long long int used; /* Last use; time created if never used. */
uint8_t table_id; /* Index in ofproto's 'tables' array. */
- bool send_flow_removed; /* Send a flow removed message? */
+ enum ofputil_flow_mod_flags flags;
struct ovs_mutex timeout_mutex;
uint16_t hard_timeout OVS_GUARDED; /* In seconds from ->modified. */
/* OFOPERATION_DELETE. */
enum ofp_flow_removed_reason reason; /* Reason flow was removed. */
- ovs_be64 flow_cookie; /* Rule's old flow cookie. */
- uint16_t idle_timeout; /* Rule's old idle timeout. */
- uint16_t hard_timeout; /* Rule's old hard timeout. */
- bool send_flow_removed; /* Rule's old 'send_flow_removed'. */
- enum ofperr error; /* 0 if no error. */
+ ovs_be64 flow_cookie; /* Rule's old flow cookie. */
+ uint16_t idle_timeout; /* Rule's old idle timeout. */
+ uint16_t hard_timeout; /* Rule's old hard timeout. */
+ enum ofputil_flow_mod_flags flags; /* Rule's old flags. */
+ enum ofperr error; /* 0 if no error. */
};
static struct ofoperation *ofoperation_create(struct ofopgroup *,
static void reinit_ports(struct ofproto *);
/* rule. */
+static void ofproto_rule_destroy(struct rule *);
static void ofproto_rule_destroy__(struct rule *);
static void ofproto_rule_send_removed(struct rule *, uint8_t reason);
static bool rule_is_modifiable(const struct rule *);
}
/* Sets number of upcall handler threads. The default is
- * (number of online cores - 1). */
+ * (number of online cores - 2). */
void
ofproto_set_n_handler_threads(unsigned limit)
{
if (limit) {
n_handler_threads = limit;
} else {
- n_handler_threads = MAX(1, sysconf(_SC_NPROCESSORS_ONLN) - 1);
+ int n_proc = sysconf(_SC_NPROCESSORS_ONLN);
+ n_handler_threads = n_proc > 2 ? n_proc - 2 : 1;
}
}
struct oftable *table = &p->tables[i];
struct eviction_group *evg;
struct cls_cursor cursor;
- struct cls_rule cr;
struct rule *rule;
if (!table->eviction_fields) {
}
ovs_rwlock_rdlock(&table->cls.rwlock);
- cls_cursor_init(&cursor, &table->cls, &cr);
+ cls_cursor_init(&cursor, &table->cls, NULL);
CLS_CURSOR_FOR_EACH (rule, cr, &cursor) {
if (!rule->eviction_group
&& (rule->idle_timeout || rule->hard_timeout)) {
}
\f
static void
-ofproto_rule_destroy__(struct rule *rule)
+ofproto_rule_destroy(struct rule *rule)
{
if (rule) {
rule->ofproto->ofproto_class->rule_destruct(rule);
- cls_rule_destroy(&rule->cr);
- free(rule->ofpacts);
- ovs_mutex_destroy(&rule->timeout_mutex);
- ovs_rwlock_destroy(&rule->rwlock);
- rule->ofproto->ofproto_class->rule_dealloc(rule);
+ ofproto_rule_destroy__(rule);
}
}
+static void
+ofproto_rule_destroy__(struct rule *rule)
+{
+ cls_rule_destroy(&rule->cr);
+ free(rule->ofpacts);
+ ovs_mutex_destroy(&rule->timeout_mutex);
+ ovs_rwlock_destroy(&rule->rwlock);
+ rule->ofproto->ofproto_class->rule_dealloc(rule);
+}
+
/* This function allows an ofproto implementation to destroy any rules that
* remain when its ->destruct() function is called.. This function implements
* steps 4.4 and 4.5 in the section titled "Rule Life Cycle" in
goto exit;
}
if (rule->flow_cookie == cookie /* Hash collisions possible. */
- && ofproto_rule_has_out_port(rule, out_port)) {
+ && ofproto_rule_has_out_port(rule, out_port)
+ && ofproto_rule_has_out_group(rule, out_group)) {
list_push_back(rules, &rule->ofproto_node);
}
}
goto exit;
}
if (rule->flow_cookie == cookie /* Hash collisions possible. */
- && ofproto_rule_has_out_port(rule, out_port)) {
+ && ofproto_rule_has_out_port(rule, out_port)
+ && ofproto_rule_has_out_group(rule, out_group)) {
list_push_back(rules, &rule->ofproto_node);
}
}
fs.hard_timeout = rule->hard_timeout;
ovs_mutex_unlock(&rule->timeout_mutex);
- fs.flags = 0;
- if (rule->send_flow_removed) {
- fs.flags |= OFPUTIL_FF_SEND_FLOW_REM;
- /* FIXME: Implement OFPUTIL_FF_NO_PKT_COUNTS and
- OFPUTIL_FF_NO_BYT_COUNTS. */
- }
+ fs.flags = rule->flags;
+
ofputil_append_flow_stats_reply(&fs, &replies);
}
ofconn_send_replies(ofconn, &replies);
ovs_mutex_unlock(&rule->timeout_mutex);
rule->table_id = table - ofproto->tables;
- rule->send_flow_removed = (fm->flags & OFPUTIL_FF_SEND_FLOW_REM) != 0;
+ rule->flags = fm->flags & OFPUTIL_FF_STATE;
+
rule->ofpacts = xmemdup(fm->ofpacts, fm->ofpacts_len);
rule->ofpacts_len = fm->ofpacts_len;
rule->meter_id = find_meter(rule->ofpacts, rule->ofpacts_len);
rule->hard_timeout = fm->hard_timeout;
ovs_mutex_unlock(&rule->timeout_mutex);
- rule->send_flow_removed = (fm->flags
- & OFPUTIL_FF_SEND_FLOW_REM) != 0;
-
+ rule->flags = fm->flags & OFPUTIL_FF_STATE;
if (fm->idle_timeout || fm->hard_timeout) {
if (!rule->eviction_group) {
eviction_group_add_rule(rule);
{
struct ofputil_flow_removed fr;
- if (ofproto_rule_is_hidden(rule) || !rule->send_flow_removed) {
+ if (ofproto_rule_is_hidden(rule) ||
+ !(rule->flags & OFPUTIL_FF_SEND_FLOW_REM)) {
return;
}
return 0;
}
+static enum ofperr
+handle_nxt_get_async_request(struct ofconn *ofconn, const struct ofp_header *oh)
+{
+ struct ofpbuf *buf;
+ uint32_t master[OAM_N_TYPES];
+ uint32_t slave[OAM_N_TYPES];
+ struct nx_async_config *msg;
+
+ ofconn_get_async_config(ofconn, master, slave);
+ buf = ofpraw_alloc_reply(OFPRAW_OFPT13_GET_ASYNC_REPLY, oh, 0);
+ msg = ofpbuf_put_zeros(buf, sizeof *msg);
+
+ msg->packet_in_mask[0] = htonl(master[OAM_PACKET_IN]);
+ msg->port_status_mask[0] = htonl(master[OAM_PORT_STATUS]);
+ msg->flow_removed_mask[0] = htonl(master[OAM_FLOW_REMOVED]);
+
+ msg->packet_in_mask[1] = htonl(slave[OAM_PACKET_IN]);
+ msg->port_status_mask[1] = htonl(slave[OAM_PORT_STATUS]);
+ msg->flow_removed_mask[1] = htonl(slave[OAM_FLOW_REMOVED]);
+
+ ofconn_send_reply(ofconn, buf);
+
+ return 0;
+}
+
static enum ofperr
handle_nxt_set_controller_id(struct ofconn *ofconn,
const struct ofp_header *oh)
}
}
+static enum ofperr
+handle_table_mod(struct ofconn *ofconn, const struct ofp_header *oh)
+{
+ struct ofputil_table_mod tm;
+ enum ofperr error;
+
+ error = reject_slave_controller(ofconn);
+ if (error) {
+ return error;
+ }
+
+ error = ofputil_decode_table_mod(oh, &tm);
+ if (error) {
+ return error;
+ }
+
+ /* XXX Actual table mod support is not implemented yet. */
+ return 0;
+}
+
static enum ofperr
handle_openflow__(struct ofconn *ofconn, const struct ofpbuf *msg)
{
case OFPTYPE_GROUP_MOD:
return handle_group_mod(ofconn, oh);
+ case OFPTYPE_TABLE_MOD:
+ return handle_table_mod(ofconn, oh);
+
case OFPTYPE_METER_MOD:
return handle_meter_mod(ofconn, oh);
case OFPTYPE_SET_ASYNC_CONFIG:
return handle_nxt_set_async_config(ofconn, oh);
+ case OFPTYPE_GET_ASYNC_REQUEST:
+ return handle_nxt_get_async_request(ofconn, oh);
+
/* Statistics requests. */
case OFPTYPE_DESC_STATS_REQUEST:
return handle_desc_stats_request(ofconn, oh);
/* FIXME: Change the following once they are implemented: */
case OFPTYPE_QUEUE_GET_CONFIG_REQUEST:
- case OFPTYPE_GET_ASYNC_REQUEST:
case OFPTYPE_TABLE_FEATURES_STATS_REQUEST:
return OFPERR_OFPBRC_BAD_TYPE;
} else {
ovs_rwlock_wrlock(&rule->rwlock);
oftable_remove_rule(rule);
- ofproto_rule_destroy__(rule);
+ ofproto_rule_destroy(rule);
}
break;
case OFOPERATION_DELETE:
ovs_assert(!op->error);
- ofproto_rule_destroy__(rule);
+ ofproto_rule_destroy(rule);
op->rule = NULL;
break;
op->ofpacts = NULL;
op->ofpacts_len = 0;
}
- rule->send_flow_removed = op->send_flow_removed;
+ rule->flags = op->flags;
}
break;
op->idle_timeout = rule->idle_timeout;
op->hard_timeout = rule->hard_timeout;
ovs_mutex_unlock(&rule->timeout_mutex);
- op->send_flow_removed = rule->send_flow_removed;
+ op->flags = rule->flags;
group->n_running++;
])
AT_CLEANUP
+AT_SETUP([OFPT_TABLE_MOD - OF1.1])
+AT_KEYWORDS([ofp-print])
+AT_CHECK([ovs-ofctl ofp-print "\
+02 11 00 10 00 00 00 02 02 00 00 00 00 00 00 02 \
+" 3], [0], [dnl
+OFPT_TABLE_MOD (OF1.1) (xid=0x2): table_id=2, flow_miss_config=drop
+])
+AT_CLEANUP
+
+AT_SETUP([OFPT_TABLE_MOD - OF1.2])
+AT_KEYWORDS([ofp-print])
+AT_CHECK([ovs-ofctl ofp-print "\
+03 11 00 10 00 00 00 02 02 00 00 00 00 00 00 01 \
+" 3], [0], [dnl
+OFPT_TABLE_MOD (OF1.2) (xid=0x2): table_id=2, flow_miss_config=continue
+])
+AT_CLEANUP
+
+AT_SETUP([OFPT_TABLE_MOD - OF1.3])
+AT_KEYWORDS([ofp-print])
+AT_CHECK([ovs-ofctl ofp-print "\
+04 11 00 10 00 00 00 02 02 00 00 00 00 00 00 00 \
+" 3], [0], [dnl
+OFPT_TABLE_MOD (OF1.3) (xid=0x2): table_id=2, flow_miss_config=controller
+])
+AT_CLEANUP
+
AT_SETUP([OFPST_DESC request])
AT_KEYWORDS([ofp-print OFPT_STATS_REQUEST])
AT_CHECK([ovs-ofctl ofp-print "0110000c0000000100000000"], [0], [dnl
OVS_VSWITCHD_STOP
AT_CLEANUP
+
+AT_SETUP([ofproto - event filtering (OpenFlow 1.3)])
+AT_KEYWORDS([monitor])
+OVS_VSWITCHD_START
+
+# Start a monitor, use the required protocol version
+ovs-ofctl -O OpenFlow13 monitor br0 --detach --no-chdir --pidfile >monitor.log 2>&1
+AT_CAPTURE_FILE([monitor.log])
+
+# Send an OpenFlow13 message (04), OFPT_GET_ASYNC_REQUEST (1a), length (8), xid (0a)
+ovs-appctl -t ovs-ofctl ofctl/send 041a00080000000a
+ovs-appctl -t ovs-ofctl ofctl/barrier
+
+# Check default setting
+read -r -d '' expected <<'EOF'
+EOF
+
+AT_CHECK([ofctl_strip < monitor.log], [], [dnl
+send: OFPT_GET_ASYNC_REQUEST (OF1.3):
+OFPT_GET_ASYNC_REPLY (OF1.3):
+ master:
+ PACKET_IN: no_match action
+ PORT_STATUS: add delete modify
+ FLOW_REMOVED: idle hard delete
+
+ slave:
+ PACKET_IN: (off)
+ PORT_STATUS: add delete modify
+ FLOW_REMOVED: (off)
+OFPT_BARRIER_REPLY (OF1.3):
+])
+
+OVS_VSWITCHD_STOP
+AT_CLEANUP
groups are printed. See \fBGroup Syntax\fR, below, for the syntax of
\fIgroups\fR.
.
+.IP "\fBmod\-table \fIswitch\fR \fItable_id\fR \fIflow_miss_handling\fR"
+An OpenFlow 1.0 switch looks up each packet that arrives at the switch
+in table 0, then in table 1 if there is no match in table 0, then in
+table 2, and so on until the packet finds a match in some table.
+Finally, if no match was found, the switch sends the packet to the
+controller
+.IP
+OpenFlow 1.1 and later offer more flexibility. This command
+configures the flow table miss handling configuration for table
+\fItable_id\fR in \fIswitch\fR. \fItable_id\fR may be an OpenFlow
+table number between 0 and 254, inclusive, or the keyword \fBALL\fR to
+modify all tables. \fIflow_miss_handling\fR may be any one of the
+following:
+.RS
+.IP \fBdrop\fR
+Drop the packet.
+.IP \fBcontinue\fR
+Continue to the next table in the pipeline. (This is how an OpenFlow
+1.0 switch always handles packets that do not match any flow, in
+tables other than the last one.)
+.IP \fBcontroller\fR
+Send to controller. (This is how an OpenFlow 1.0 switch always
+handles packets that do not match any flow in the last table.)
+.RE
+.
.SS "OpenFlow Switch Flow Table Commands"
.
These commands manage the flow table in an OpenFlow switch. In each
" dump-desc SWITCH print switch description\n"
" dump-tables SWITCH print table stats\n"
" mod-port SWITCH IFACE ACT modify port behavior\n"
+ " mod-table SWITCH MOD modify flow table behavior\n"
" get-frags SWITCH print fragment handling behavior\n"
" set-frags SWITCH FRAG_MODE set fragment handling behavior\n"
" dump-ports SWITCH [PORT] print port statistics\n"
vconn_close(vconn);
}
+static void
+ofctl_mod_table(int argc OVS_UNUSED, char *argv[])
+{
+ enum ofputil_protocol protocol, usable_protocols;
+ struct ofputil_table_mod tm;
+ struct vconn *vconn;
+ char *error;
+ int i;
+
+ error = parse_ofp_table_mod(&tm, argv[2], argv[3], &usable_protocols);
+ if (error) {
+ ovs_fatal(0, "%s", error);
+ }
+
+ protocol = open_vconn(argv[1], &vconn);
+ if (!(protocol & usable_protocols)) {
+ for (i = 0; i < sizeof(enum ofputil_protocol) * CHAR_BIT; i++) {
+ enum ofputil_protocol f = 1 << i;
+ if (f != protocol
+ && f & usable_protocols
+ && try_set_protocol(vconn, f, &protocol)) {
+ protocol = f;
+ break;
+ }
+ }
+ }
+
+ if (!(protocol & usable_protocols)) {
+ char *usable_s = ofputil_protocols_to_string(usable_protocols);
+ ovs_fatal(0, "Switch does not support table mod message(%s)", usable_s);
+ }
+
+ transact_noreply(vconn, ofputil_encode_table_mod(&tm, protocol));
+ vconn_close(vconn);
+}
+
static void
ofctl_get_frags(int argc OVS_UNUSED, char *argv[])
{
{ "dump-ports", 1, 2, ofctl_dump_ports },
{ "dump-ports-desc", 1, 1, ofctl_dump_ports_desc },
{ "mod-port", 3, 3, ofctl_mod_port },
+ { "mod-table", 3, 3, ofctl_mod_table },
{ "get-frags", 1, 1, ofctl_get_frags },
{ "set-frags", 2, 2, ofctl_set_frags },
{ "ofp-parse", 1, 1, ofctl_ofp_parse },
type='{"type": "integer", "minInteger": 1}'>
<p>
Specifies the number of threads for software datapaths to use for
- handling new flows. The default is one less than the number of
+ handling new flows. The default is two less than the number of
online CPU cores (but at least 1).
</p>
<p>