From a0fd7dde11703233dd3e5a542fc674dd83bafcfb Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Mon, 28 Apr 2008 13:28:45 -0700 Subject: [PATCH] Add-on hardware tables initial implementation. Incorporates suggestions from Justin. --- INSTALL | 30 ++- README.hwtables | 38 ++++ configure.ac | 25 +++ datapath/chain.c | 52 ++++- datapath/chain.h | 5 + datapath/datapath.c | 1 + datapath/flow.c | 9 +- datapath/flow.h | 1 + datapath/hwtable-dummy/Modules.mk | 6 + datapath/hwtable-dummy/hwtable-dummy.c | 274 +++++++++++++++++++++++++ datapath/linux-2.4/Makefile.in | 1 + datapath/linux-2.6/Kbuild.in | 1 + datapath/linux-2.6/Makefile.main.in | 1 + datapath/unit-exports.c | 1 - 14 files changed, 434 insertions(+), 11 deletions(-) create mode 100644 README.hwtables create mode 100644 datapath/hwtable-dummy/Modules.mk create mode 100644 datapath/hwtable-dummy/hwtable-dummy.c diff --git a/INSTALL b/INSTALL index f9fc43f2b..0e488cf0e 100644 --- a/INSTALL +++ b/INSTALL @@ -281,12 +281,30 @@ To build for a running instance of Linux 2.4: % ./configure --with-l24=/lib/modules/`uname -r`/build +If you have hardware that supports accelerated OpenFlow switching, and +you have obtained a hardware table module for your hardware and +extracted it into the OpenFlow reference distribution source tree, +then you may also enable building support for the hardware switching +table with --enable-hw-tables. For example, if your hardware +switching table is in a directory named datapath/hwtable-foomatic, you +could compile support for it with the running Linux 2.6 kernel like +so: + + % ./configure --with-l26=/lib/modules/`uname -r`/build \ + --enable-hw-tables=foomatic + +For more information about hardware table modules, please read +README.hwtables at the root of the OpenFlow distribution tree. + In addition to the binaries listed under step 2 in "Building Userspace Programs" above, "make" will build the following kernel modules: datapath/linux-2.6/openflow_mod.ko (if --with-l26 was specified) datapath/linux-2.4/openflow_mod.o (if --with-l24 was specified) +"make" will also build a kernel module for each hardware switch table +enabled with --enable-hw-tables. + Once you have built the kernel modules, activating them requires only running "insmod", e.g.: @@ -297,6 +315,10 @@ running "insmod", e.g.: % insmod datapath/linux-2.4/compat24_mod.o % insmod datapath/linux-2.4/openflow_mod.o +After you load the openflow module, you may load one hardware switch +table module (if any were built) to enable support for that hardware +switching table. + The insmod program must be run as root. You may need to specify a full path to insmod, which is usually in the /sbin directory. To verify that the modules have been loaded, run "lsmod" (also in /sbin) @@ -316,8 +338,12 @@ previous section, before it may be tested. # dpctl adddp 0 - (In principle, openflow_mod supports multiple datapaths within the - same host, but this is rarely useful in practice.) + In principle, openflow_mod supports multiple datapaths within the + same host, but this is rarely useful in practice. + + If you built a support module for hardware accelerated OpenFlow + switching and you want to use it, you must load it before creating + the datapath with "dpctl adddp". 2. Use dpctl to attach the datapath to physical interfaces on the machine. Say, for example, you want to create a trivial 2-port diff --git a/README.hwtables b/README.hwtables new file mode 100644 index 000000000..e0b2a038f --- /dev/null +++ b/README.hwtables @@ -0,0 +1,38 @@ +Hardware Table Support -*- text -*- +---------------------- + +The OpenFlow reference implementation in this distribution provides a +mechanism to support hardware that can accelerate OpenFlow switching. +The mechanism consists of the ability to add a "hardware acceleration" +switching table ahead of the software switching tables implemented by +the reference implementation. The hardware switching table is +expected to handle any incoming packets that it can on its own. Any +packets that it cannot handle itself it may pass up to the software +table implementations. + +Hardware table implementation are built as separate kernel modules +that may be loaded after the openflow module. At most one hardware +table module may be loaded at a time. Only datapaths created after a +hardware table module is loaded (and before it is unloaded) will take +advantage of hardware switching features. + +Creating a hardware table module is straightforward. Create a +directory in the openflow source tree named datapath/hwtable-NAME, +where NAME identifies the hardware that the module supports. Populate +that directory with the C source files that comprise the module, plus +a file named Modules.mk that specifies how to build the module. This +distribution includes a "dummy" hardware module that demonstrates how +this works. + +Even though only one may be loaded at a given time, any number of +hardware table modules may be built along with the OpenFlow kernel +modules. Specify each NAME that identifies a module to be built on +the OpenFlow configure script command as the argument to +--enable-hw-tables, e.g.: + ./configure --enable-hw-tables=NAME + +Each hardware table module's code is encapsulated in a directory, so +it is easy to separate a hardware table implementation from OpenFlow. +Simply package up the contents of the hwtable-NAME directory and +distribute it for builders to extract into their distribution +directory. diff --git a/configure.ac b/configure.ac index 403433cde..2c2b427ed 100644 --- a/configure.ac +++ b/configure.ac @@ -22,6 +22,31 @@ AC_ARG_ENABLE( [ndebug=false]) AM_CONDITIONAL([NDEBUG], [test x$ndebug = xtrue]) +AC_ARG_ENABLE( + [hw-tables], + [AC_HELP_STRING([--enable-hw-tables=MODULE...], + [Configure and build the specified externally supplied + hardware table support modules])]) +case "${enable_hw_tables}" in # ( + yes) + AC_MSG_ERROR([--enable-hw-tables has a required argument]) + ;; # ( + ''|no) + hw_tables= + ;; # ( + *) + hw_tables=`echo "$enable_hw_tables" | sed 's/,/ /g'` + ;; +esac +for d in $hw_tables; do + mk=datapath/hwtable-$d/Modules.mk + if test ! -e $srcdir/$mk; then + AC_MSG_ERROR([--enable-hw-tables=$d specified but $mk is missing]) + fi + HW_TABLES="$HW_TABLES \$(top_srcdir)/$mk" +done +AC_SUBST(HW_TABLES) + CHECK_LINUX(l26, 2.6, 2.6, KSRC26, L26_ENABLED) CHECK_LINUX(l24, 2.4, 2.4, KSRC24, L24_ENABLED) diff --git a/datapath/chain.c b/datapath/chain.c index f44fbf090..2b1178b8a 100644 --- a/datapath/chain.c +++ b/datapath/chain.c @@ -7,8 +7,14 @@ #include "chain.h" #include "flow.h" #include "table.h" +#include #include #include +#include + +static struct sw_table *(*create_hw_table_hook)(void); +static struct module *hw_table_owner; +static DEFINE_SPINLOCK(hook_lock); /* Attempts to append 'table' to the set of tables in 'chain'. Returns 0 or * negative error. If 'table' is null it is assumed that table creation failed @@ -32,17 +38,25 @@ struct sw_chain *chain_create(struct datapath *dp) { struct sw_chain *chain = kzalloc(sizeof *chain, GFP_KERNEL); if (chain == NULL) - return NULL; + goto error; chain->dp = dp; + chain->owner = try_module_get(hw_table_owner) ? hw_table_owner : NULL; + if (chain->owner && create_hw_table_hook) { + struct sw_table *hwtable = create_hw_table_hook(); + if (!hwtable || add_table(chain, hwtable)) + goto error; + } if (add_table(chain, table_hash2_create(0x1EDC6F41, TABLE_HASH_MAX_FLOWS, 0x741B8CD7, TABLE_HASH_MAX_FLOWS)) - || add_table(chain, table_linear_create(TABLE_LINEAR_MAX_FLOWS))) { - chain_destroy(chain); - return NULL; - } - + || add_table(chain, table_linear_create(TABLE_LINEAR_MAX_FLOWS))) + goto error; return chain; + +error: + if (chain) + chain_destroy(chain); + return NULL; } /* Searches 'chain' for a flow matching 'key', which must not have any wildcard @@ -141,6 +155,7 @@ void chain_destroy(struct sw_chain *chain) struct sw_table *t = chain->tables[i]; t->destroy(t); } + module_put(chain->owner); kfree(chain); } @@ -158,3 +173,28 @@ void chain_print_stats(struct sw_chain *chain) stats.name, stats.n_flows, stats.max_flows); } } + + +int chain_set_hw_hook(struct sw_table *(*create_hw_table)(void), + struct module *owner) +{ + int retval = -EBUSY; + + spin_lock(&hook_lock); + if (!create_hw_table_hook) { + create_hw_table_hook = create_hw_table; + hw_table_owner = owner; + retval = 0; + } + spin_unlock(&hook_lock); + + return retval; +} +EXPORT_SYMBOL(chain_set_hw_hook); + +void chain_clear_hw_hook(void) +{ + create_hw_table_hook = NULL; + hw_table_owner = NULL; +} +EXPORT_SYMBOL(chain_clear_hw_hook); diff --git a/datapath/chain.h b/datapath/chain.h index 42e2cb774..69f746f8a 100644 --- a/datapath/chain.h +++ b/datapath/chain.h @@ -18,6 +18,7 @@ struct sw_chain { struct sw_table *tables[CHAIN_MAX_TABLES]; struct datapath *dp; + struct module *owner; }; struct sw_chain *chain_create(struct datapath *); @@ -28,4 +29,8 @@ int chain_timeout(struct sw_chain *); void chain_destroy(struct sw_chain *); void chain_print_stats(struct sw_chain *); +int chain_set_hw_hook(struct sw_table *(*create_hw_table)(void), + struct module *owner); +void chain_clear_hw_hook(void); + #endif /* chain.h */ diff --git a/datapath/datapath.c b/datapath/datapath.c index 385319525..5665de47c 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -826,6 +826,7 @@ dp_send_flow_expired(struct datapath *dp, struct sw_flow *flow) return send_openflow_skb(skb, NULL); } +EXPORT_SYMBOL(dp_send_flow_expired); int dp_send_error_msg(struct datapath *dp, const struct sender *sender, diff --git a/datapath/flow.c b/datapath/flow.c index fde6dd37d..cdda24ba2 100644 --- a/datapath/flow.c +++ b/datapath/flow.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -42,18 +43,17 @@ int flow_fields_match(const struct sw_flow_key *a, const struct sw_flow_key *b, /* Returns nonzero if 'a' and 'b' match, that is, if their fields are equal * modulo wildcards, zero otherwise. */ -inline int flow_matches(const struct sw_flow_key *a, const struct sw_flow_key *b) { return flow_fields_match(a, b, (a->wildcards | b->wildcards)); } +EXPORT_SYMBOL(flow_matches); /* Returns nonzero if 't' (the table entry's key) and 'd' (the key * describing the deletion) match, that is, if their fields are * equal modulo wildcards, zero otherwise. If 'strict' is nonzero, the * wildcards must match in both 't_key' and 'd_key'. Note that the * table's wildcards are ignored unless 'strict' is set. */ -inline int flow_del_matches(const struct sw_flow_key *t, const struct sw_flow_key *d, int strict) { if (strict && (t->wildcards != d->wildcards)) @@ -61,6 +61,7 @@ int flow_del_matches(const struct sw_flow_key *t, const struct sw_flow_key *d, i return flow_fields_match(t, d, d->wildcards); } +EXPORT_SYMBOL(flow_del_matches); void flow_extract_match(struct sw_flow_key* to, const struct ofp_match* from) { @@ -118,6 +119,7 @@ int flow_del(struct sw_flow *flow) { return !atomic_cmpxchg(&flow->deleted, 0, 1); } +EXPORT_SYMBOL(flow_del); /* Allocates and returns a new flow with 'n_actions' action, using allocation * flags 'flags'. Returns the new flow or a null pointer on failure. */ @@ -145,6 +147,7 @@ void flow_free(struct sw_flow *flow) kfree(flow->actions); kmem_cache_free(flow_cache, flow); } +EXPORT_SYMBOL(flow_free); /* RCU callback used by flow_deferred_free. */ static void rcu_callback(struct rcu_head *rcu) @@ -159,6 +162,7 @@ void flow_deferred_free(struct sw_flow *flow) { call_rcu(&flow->rcu, rcu_callback); } +EXPORT_SYMBOL(flow_deferred_free); /* Prints a representation of 'key' to the kernel log. */ void print_flow(const struct sw_flow_key *key) @@ -182,6 +186,7 @@ void print_flow(const struct sw_flow_key *key) ((unsigned char *)&key->nw_dst)[3], ntohs(key->tp_src), ntohs(key->tp_dst)); } +EXPORT_SYMBOL(print_flow); /* Parses the Ethernet frame in 'skb', which was received on 'in_port', * and initializes 'key' to match. */ diff --git a/datapath/flow.h b/datapath/flow.h index 8534342d9..6ecf7782d 100644 --- a/datapath/flow.h +++ b/datapath/flow.h @@ -74,6 +74,7 @@ struct sw_flow { struct list_head node; struct list_head iter_node; unsigned long serial; + void *private; spinlock_t lock; /* Lock this entry...mostly for stat updates */ unsigned long init_time; /* When the flow was created (in jiffies). */ diff --git a/datapath/hwtable-dummy/Modules.mk b/datapath/hwtable-dummy/Modules.mk new file mode 100644 index 000000000..97484a0f9 --- /dev/null +++ b/datapath/hwtable-dummy/Modules.mk @@ -0,0 +1,6 @@ +# Specify the module to build. +all_modules += hwtable-dummy + +# Specify the source files that comprise the module. +hwtable-dummy_sources = \ + hwtable-dummy/hwtable-dummy.c diff --git a/datapath/hwtable-dummy/hwtable-dummy.c b/datapath/hwtable-dummy/hwtable-dummy.c new file mode 100644 index 000000000..d3fd97c96 --- /dev/null +++ b/datapath/hwtable-dummy/hwtable-dummy.c @@ -0,0 +1,274 @@ + +#include +#include +#include +#include +#include +#include + +#include "chain.h" +#include "table.h" +#include "flow.h" +#include "datapath.h" + + +/* Max number of flow entries supported by the hardware */ +#define DUMMY_MAX_FLOW 8192 + + +/* xxx Explain need for this separate list because of RCU */ +static spinlock_t pending_free_lock; +static struct list_head pending_free_list; + +/* sw_flow private data for dummy table entries. */ +struct sw_flow_dummy { + struct list_head node; + + /* xxx If per-entry data is needed, define it here. */ +}; + +struct sw_table_dummy { + struct sw_table swt; + + spinlock_t lock; + unsigned int max_flows; + atomic_t n_flows; + struct list_head flows; + struct list_head iter_flows; + unsigned long int next_serial; +}; + + +static void table_dummy_sfw_destroy(struct sw_flow_dummy *sfw) +{ + /* xxx Remove the entry from hardware. If you need to do any other + * xxx clean-up associated with the entry, do it here. + */ + + kfree(sfw); +} + +static void table_dummy_rcu_callback(struct rcu_head *rcu) +{ + struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu); + + spin_lock(&pending_free_lock); + if (flow->private) { + struct sw_flow_dummy *sfw = flow->private; + list_add(&sfw->node, &pending_free_list); + flow->private = NULL; + } + spin_unlock(&pending_free_lock); + flow_free(flow); +} + +static void table_dummy_flow_deferred_free(struct sw_flow *flow) +{ + call_rcu(&flow->rcu, table_dummy_rcu_callback); +} + +static struct sw_flow *table_dummy_lookup(struct sw_table *swt, + const struct sw_flow_key *key) +{ + struct sw_table_dummy *td = (struct sw_table_dummy *) swt; + struct sw_flow *flow; + list_for_each_entry (flow, &td->flows, node) { + if (flow_matches(&flow->key, key)) { + printk("found!\n"); + return flow; + } + } + return NULL; +} + +static int table_dummy_insert(struct sw_table *swt, struct sw_flow *flow) +{ + /* xxx Use a data cache? */ + flow->private = kzalloc(sizeof(struct sw_flow_dummy), GFP_ATOMIC); + if (flow->private == NULL) + return 0; + + /* xxx Do whatever needs to be done to insert an entry in hardware. + * xxx If the entry can't be inserted, return 0. This stub code + * xxx doesn't do anything yet, so we're going to return 0...you + * xxx shouldn't. + */ + kfree(flow->private); + return 0; +} + + +static int do_delete(struct sw_table *swt, struct sw_flow *flow) +{ + if (flow_del(flow)) { + list_del_rcu(&flow->node); + table_dummy_flow_deferred_free(flow); + return 1; + } + return 0; +} + +static int table_dummy_delete(struct sw_table *swt, + const struct sw_flow_key *key, uint16_t priority, int strict) +{ + struct sw_table_dummy *td = (struct sw_table_dummy *) swt; + struct sw_flow *flow; + unsigned int count = 0; + + list_for_each_entry_rcu (flow, &td->flows, node) { + if (flow_del_matches(&flow->key, key, strict) + && (!strict || (flow->priority == priority))) + count += do_delete(swt, flow); + } + if (count) + atomic_sub(count, &td->n_flows); + return count; +} + + +static int table_dummy_timeout(struct datapath *dp, struct sw_table *swt) +{ + struct sw_table_dummy *td = (struct sw_table_dummy *) swt; + struct sw_flow *flow; + struct sw_flow_dummy *sfw, *n; + int del_count = 0; + uint64_t packet_count = 0; + int i=0; + + list_for_each_entry_rcu (flow, &td->flows, node) { + /* xxx Retrieve the packet count associated with this entry + * xxx and store it in "packet_count". + */ + + if ((packet_count > flow->packet_count) + && (flow->max_idle != OFP_FLOW_PERMANENT)) { + flow->packet_count = packet_count; + flow->timeout = jiffies + HZ * flow->max_idle; + } + + if (flow_timeout(flow)) { + if (dp->flags & OFPC_SEND_FLOW_EXP) { + /* xxx Get byte count */ + flow->byte_count = 0; + dp_send_flow_expired(dp, flow); + } + del_count += do_delete(swt, flow); + } + if ((i % 50) == 0) { + msleep_interruptible(1); + } + i++; + } + + /* Remove any entries queued for removal */ + spin_lock_bh(&pending_free_lock); + list_for_each_entry_safe (sfw, n, &pending_free_list, node) { + list_del(&sfw->node); + table_dummy_sfw_destroy(sfw); + } + spin_unlock_bh(&pending_free_lock); + + if (del_count) + atomic_sub(del_count, &td->n_flows); + return del_count; +} + + +static void table_dummy_destroy(struct sw_table *swt) +{ + struct sw_table_dummy *td = (struct sw_table_dummy *)swt; + + + /* xxx This table is being destroyed, so free any data that you + * xxx don't want to leak. + */ + + + if (td) { + while (!list_empty(&td->flows)) { + struct sw_flow *flow = list_entry(td->flows.next, + struct sw_flow, node); + list_del(&flow->node); + flow_free(flow); + } + kfree(td); + } +} + +static int table_dummy_iterate(struct sw_table *swt, + const struct sw_flow_key *key, + struct sw_table_position *position, + int (*callback)(struct sw_flow *, void *), + void *private) +{ + struct sw_table_dummy *tl = (struct sw_table_dummy *) swt; + struct sw_flow *flow; + unsigned long start; + + start = ~position->private[0]; + list_for_each_entry_rcu (flow, &tl->iter_flows, iter_node) { + if (flow->serial <= start && flow_matches(key, &flow->key)) { + int error = callback(flow, private); + if (error) { + position->private[0] = ~flow->serial; + return error; + } + } + } + return 0; +} + +static void table_dummy_stats(struct sw_table *swt, + struct sw_table_stats *stats) +{ + struct sw_table_dummy *td = (struct sw_table_dummy *) swt; + stats->name = "dummy"; + stats->n_flows = atomic_read(&td->n_flows); + stats->max_flows = td->max_flows; +} + + +static struct sw_table *table_dummy_create(void) +{ + struct sw_table_dummy *td; + struct sw_table *swt; + + td = kzalloc(sizeof *td, GFP_KERNEL); + if (td == NULL) + return NULL; + + swt = &td->swt; + swt->lookup = table_dummy_lookup; + swt->insert = table_dummy_insert; + swt->delete = table_dummy_delete; + swt->timeout = table_dummy_timeout; + swt->destroy = table_dummy_destroy; + swt->iterate = table_dummy_iterate; + swt->stats = table_dummy_stats; + + td->max_flows = DUMMY_MAX_FLOW; + atomic_set(&td->n_flows, 0); + INIT_LIST_HEAD(&td->flows); + spin_lock_init(&td->lock); + + INIT_LIST_HEAD(&pending_free_list); + spin_lock_init(&pending_free_lock); + + return swt; +} + +static int __init dummy_init(void) +{ + return chain_set_hw_hook(table_dummy_create, THIS_MODULE); +} +module_init(dummy_init); + +static void dummy_cleanup(void) +{ + chain_clear_hw_hook(); +} +module_exit(dummy_cleanup); + +MODULE_DESCRIPTION("Dummy hardware table driver"); +MODULE_AUTHOR("Copyright (c) 2007, 2008 The Board of Trustees of The Leland Stanford Junior University"); +MODULE_LICENSE("GPL"); diff --git a/datapath/linux-2.4/Makefile.in b/datapath/linux-2.4/Makefile.in index 834600fea..36ff04781 100644 --- a/datapath/linux-2.4/Makefile.in +++ b/datapath/linux-2.4/Makefile.in @@ -13,6 +13,7 @@ VPATH = $(v) include $(srcdir)/../Modules.mk include $(srcdir)/Modules.mk +include @HW_TABLES@ default: distclean: clean diff --git a/datapath/linux-2.6/Kbuild.in b/datapath/linux-2.6/Kbuild.in index 5e7b9c469..9174b790f 100644 --- a/datapath/linux-2.6/Kbuild.in +++ b/datapath/linux-2.6/Kbuild.in @@ -6,6 +6,7 @@ export VERSION = @VERSION@ include $(srcdir)/../Modules.mk include $(srcdir)/Modules.mk +include @HW_TABLES@ EXTRA_CFLAGS := -DVERSION=\"$(VERSION)\" EXTRA_CFLAGS += -I$(srcdir)/.. diff --git a/datapath/linux-2.6/Makefile.main.in b/datapath/linux-2.6/Makefile.main.in index 9de7bc95c..565ee394d 100644 --- a/datapath/linux-2.6/Makefile.main.in +++ b/datapath/linux-2.6/Makefile.main.in @@ -8,6 +8,7 @@ export VERSION = @VERSION@ include $(srcdir)/../Modules.mk include $(srcdir)/Modules.mk +include @HW_TABLES@ default: $(all_sources) $(all_sources): diff --git a/datapath/unit-exports.c b/datapath/unit-exports.c index 0dc3fa0b8..324e77a7a 100644 --- a/datapath/unit-exports.c +++ b/datapath/unit-exports.c @@ -11,7 +11,6 @@ #include EXPORT_SYMBOL(flow_alloc); -EXPORT_SYMBOL(flow_free); EXPORT_SYMBOL(flow_cache); EXPORT_SYMBOL(table_hash_create); -- 2.43.0