From 2afd8e46dc88f3fd2734e73025fefea2864c7681 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Thu, 11 Dec 2008 14:07:08 -0800 Subject: [PATCH] Add userspace datapath to openflow. --- INSTALL | 187 ++-- Makefile.am | 1 + README | 39 +- udatapath/.gitignore | 4 + udatapath/automake.mk | 25 + udatapath/chain.c | 191 +++++ udatapath/chain.h | 67 ++ udatapath/crc32.c | 68 ++ udatapath/crc32.h | 50 ++ udatapath/datapath.c | 1750 ++++++++++++++++++++++++++++++++++++++ udatapath/datapath.h | 58 ++ udatapath/dp_act.c | 476 +++++++++++ udatapath/dp_act.h | 49 ++ udatapath/nx_act.c | 52 ++ udatapath/nx_act.h | 48 ++ udatapath/switch-flow.c | 298 +++++++ udatapath/switch-flow.h | 95 +++ udatapath/table-hash.c | 426 ++++++++++ udatapath/table-linear.c | 243 ++++++ udatapath/table.h | 142 ++++ udatapath/udatapath.8.in | 183 ++++ udatapath/udatapath.c | 308 +++++++ 22 files changed, 4647 insertions(+), 113 deletions(-) create mode 100644 udatapath/.gitignore create mode 100644 udatapath/automake.mk create mode 100644 udatapath/chain.c create mode 100644 udatapath/chain.h create mode 100644 udatapath/crc32.c create mode 100644 udatapath/crc32.h create mode 100644 udatapath/datapath.c create mode 100644 udatapath/datapath.h create mode 100644 udatapath/dp_act.c create mode 100644 udatapath/dp_act.h create mode 100644 udatapath/nx_act.c create mode 100644 udatapath/nx_act.h create mode 100644 udatapath/switch-flow.c create mode 100644 udatapath/switch-flow.h create mode 100644 udatapath/table-hash.c create mode 100644 udatapath/table-linear.c create mode 100644 udatapath/table.h create mode 100644 udatapath/udatapath.8.in create mode 100644 udatapath/udatapath.c diff --git a/INSTALL b/INSTALL index b283f94ba..8f558b415 100644 --- a/INSTALL +++ b/INSTALL @@ -11,11 +11,6 @@ Contents The OpenFlow reference implementation includes three separate OpenFlow switch implementations: - - The "userspace switch": This implements an OpenFlow switch - as a single user program (built as switch/switch). The - userspace switch is the easiest to build and use but it is - much less featureful than the other switch implementations. - - The "kernel-based switch": This divides the switch into a "datapath" Linux kernel module (openflow_mod.o for Linux 2.4 or openflow_mod.ko for Linux 2.6) and a userspace program @@ -28,11 +23,17 @@ OpenFlow switch implementations: switch into a userspace "datapath" (built as udatapath/udatapath) and the same userspace program used by the kernel-based switch (secchan). The userspace - datapath-based switch is as featureful as the kernel-based - switch and it does not require building a kernel module, but - it is not as fast as the kernel-based switch and it is part - of the OpenFlow extensions distribution, not the main - OpenFlow distribution. + datapath-based switch does not require building a kernel + module, but it is not as fast as the kernel-based switch. + + - The "userspace switch": This implements an OpenFlow switch + as a single user program (built as switch/switch). The + userspace switch is the easiest to build and use but it is + much less featureful than the other switch implementations. + + The userspace switch is deprecated in favor of the userspace + datapath-based switch. It will likely be removed in a + future OpenFlow release. The reference implementation also contains a simple OpenFlow controller (built as controller/controller) and a number of related @@ -65,7 +66,7 @@ reference distribution, you will need the following software: - libssl, from OpenSSL (http://www.openssl.org/), is optional but recommended. libssl is required to establish confidentiality and authenticity in the connections among OpenFlow switches and - controllers. To enable, configure with --enable-ssl=yes + controllers. To enable, configure with --enable-ssl=yes. If you are working from a Git tree or snapshot (instead of from a distribution tarball), or if you modify the OpenFlow build system, you @@ -168,7 +169,7 @@ distribution in the ordinary way using "configure" and "make". The following binaries will be built: - - Switch executable: switch/switch. + - Userspace datapath: udatapath/udatapath. - Secure channel executable: secchan/secchan. @@ -183,11 +184,11 @@ distribution in the ordinary way using "configure" and "make". - Tests: various binaries in tests/. + - Switch executable: switch/switch. + If your distribution includes the OpenFlow extensions, the following additional binaries will be built: - - Userspace datapath: ext/udatapath/udatapath. - - ANSI terminal support for EZIO 16x2 LCD panel: ext/ezio/ezio-term. @@ -300,7 +301,7 @@ Follow these instructions to build Debian packages for OpenFlow. openflow-pki (see below). - openflow-switch: Install this package on a machine that acts - as an OpenFlow userspace or kernel switch. + as an OpenFlow kernel switch. - openflow-datapath-source: Source code for OpenFlow's Linux kernel module. @@ -366,51 +367,10 @@ reference implementation as a switch on a single machine. This can be used to verify that the distribution built properly. For full installation instructions, refer to the Installation section below. -Userspace Switch ----------------- - -These instructions use the OpenFlow userspace switch that runs as an -integrated userspace program. - -1. Start the OpenFlow controller running in the background, by running - the "controller" program with a command like the following: - - # controller ptcp: & - - This command causes the controller to bind to port 6633 (the - default) awaiting connections from OpenFlow switches. See - controller(8) for details. - - The "controller" program does not require any special privilege, so - you do not need to run it as root. - -2. The "switch" program must run as root, so log in as root, or use a - program such as "su" to become root temporarily. - -3. On the same machine, use the "switch" program to start an OpenFlow - switch, specifying network devices to use as switch ports on the -i - option as a comma-separated list, like so: - - # switch tcp:127.0.0.1 -i eth1,eth2 - - The network devices that you specify should not have configured IP - addresses. - -4. The controller causes each switch that connects to it to act like a - learning Ethernet switch. Thus, devices plugged into the specified - network ports should now be able to send packets to each other, as - if they were plugged into ports on a conventional Ethernet switch. - -Troubleshooting: if the commands above do not work, try using the -v -or --verbose option on the controller or switch commands, which will -cause a large amount of debug output from each program. - Userspace Datapath ------------------ These instructions use the OpenFlow userspace datapath ("udatapath"). -The udatapath program is part of the OpenFlow extensions repository, -which is not included in every OpenFlow distribution. 1. Start the OpenFlow controller running in the background, by running the "controller" program with a command like the following: @@ -443,6 +403,42 @@ which is not included in every OpenFlow distribution. now be able to send packets to each other, as if they were plugged into ports on a conventional Ethernet switch. +Userspace Switch +---------------- + +These instructions use the OpenFlow userspace switch that runs as an +integrated userspace program. Keep in mind that the userspace switch +is deprecated: you should use the userspace datapath instead. + +1. Start the OpenFlow controller running in the background, by running + the "controller" program with a command like the following: + + # controller ptcp: & + + This command causes the controller to bind to port 6633 (the + default) awaiting connections from OpenFlow switches. See + controller(8) for details. + + The "controller" program does not require any special privilege, so + you do not need to run it as root. + +2. The "switch" program must run as root, so log in as root, or use a + program such as "su" to become root temporarily. + +3. On the same machine, use the "switch" program to start an OpenFlow + switch, specifying network devices to use as switch ports on the -i + option as a comma-separated list, like so: + + # switch tcp:127.0.0.1 -i eth1,eth2 + + The network devices that you specify should not have configured IP + addresses. + +4. The controller causes each switch that connects to it to act like a + learning Ethernet switch. Thus, devices plugged into the specified + network ports should now be able to send packets to each other, as + if they were plugged into ports on a conventional Ethernet switch. + Installation ============ @@ -456,9 +452,6 @@ each switch to reach the controller over the network: location of the controller must be configured manually in this case. - All three switch implementations support only out-of-band - control. - - Use the same network for control and for data ("in-band control"). When in-band control is used, the location of the controller may be configured manually or discovered @@ -466,9 +459,8 @@ each switch to reach the controller over the network: please refer to secchan(8) for instructions on setting up controller discovery. - The userspace datapath-based and kernel-based switch - implementations support in-band control. The userspace switch - does not. + The (deprecated) userspace switch does not support in-band + control. Controller Setup ---------------- @@ -484,44 +476,11 @@ port 6633 (the default), as shown below. Make sure the machine hosting the controller is reachable by the switch. -Userspace Switch-Based Setup ----------------------------- - -To set up an OpenFlow switch using the userspace switch, follow this -procedure. The userspace switch must be connected to the controller -over a "control network" that is physically separate from the one that -the switch and controller are controlling. (The kernel-based and -userspace datapath-based switches do not have this limitation.) - -0. The commands below must run as root, so log in as root, or use a - program such as "su" to become root temporarily. - -1. Use the "switch" program to start an OpenFlow switch, specifying - the IP address of the controller as the first argument to the - switch program, and the network devices to include in the switch as - arguments to the -i option. For example, if the controller is - running on host 192.168.1.2 port 6633 (the default port), and eth1 - and eth2 are to be the switch ports, the switch invocation would - look like this: - - # switch tcp:127.0.0.1 -i eth1,eth2 - - The network devices that you specify should not have configured IP - addresses. - -2. The controller causes each switch that connects to it to act like a - learning Ethernet switch. Thus, devices plugged into the specified - network ports should now be able to send packets to each other, as - if they were plugged into ports on a conventional Ethernet switch. - Userspace Datapath-Based Setup ------------------------------ On a machine that is to host an OpenFlow userspace datapath-based -switch, follow the procedure below. These instructions require the -OpenFlow userspace datapath ("udatapath"). The udatapath program is -part of the OpenFlow extensions repository, which is not included in -every OpenFlow distribution. +switch, follow the procedure below. 0. The commands below must run as root, so log in as root, or use a program such as "su" to become root temporarily. @@ -606,9 +565,9 @@ The OpenFlow kernel module must be loaded, as described under # dpctl adddp nl:0 - (In principle, openflow_mod supports multiple datapaths within the - same host which would be identified as nl:1, nl:2, etc., but this - is rarely useful in practice.) + (nl:0 is the first datapath within a host. openflow_mod supports + multiple datapaths within the same host, which would be identified + as nl:1, nl:2, etc.) Creating datapath nl:0 also creates a new network device named of0. This network device, called the datapath's "local port", will be @@ -684,6 +643,36 @@ The OpenFlow kernel module must be loaded, as described under use, because the switch must then also obtain its own IP address and the controller's location via DHCP. +Userspace Switch-Based Setup +---------------------------- + +To set up an OpenFlow switch using the (deprecated) userspace switch, +follow this procedure. The userspace switch must be connected to the +controller over a "control network" that is physically separate from +the one that the switch and controller are controlling. (The other +switch implementations do not have this limitation.) + +0. The commands below must run as root, so log in as root, or use a + program such as "su" to become root temporarily. + +1. Use the "switch" program to start an OpenFlow switch, specifying + the IP address of the controller as the first argument to the + switch program, and the network devices to include in the switch as + arguments to the -i option. For example, if the controller is + running on host 192.168.1.2 port 6633 (the default port), and eth1 + and eth2 are to be the switch ports, the switch invocation would + look like this: + + # switch tcp:127.0.0.1 -i eth1,eth2 + + The network devices that you specify should not have configured IP + addresses. + +2. The controller causes each switch that connects to it to act like a + learning Ethernet switch. Thus, devices plugged into the specified + network ports should now be able to send packets to each other, as + if they were plugged into ports on a conventional Ethernet switch. + Configuration ============= diff --git a/Makefile.am b/Makefile.am index 114805937..dadf84804 100644 --- a/Makefile.am +++ b/Makefile.am @@ -65,6 +65,7 @@ include secchan/automake.mk include controller/automake.mk include utilities/automake.mk include switch/automake.mk +include udatapath/automake.mk include tests/automake.mk include include/automake.mk include third-party/automake.mk diff --git a/README b/README index a5d7a308b..28cfab9c4 100644 --- a/README +++ b/README @@ -30,10 +30,10 @@ Specification [2]. What's here? ------------ -This distribution includes two different reference implementations of -an OpenFlow switch. The first implementation, which is closely tied -to Linux because it is partially implemented in the Linux kernel, has -the following components: +This distribution includes three different reference implementations +of an OpenFlow switch. The first implementation, which is closely +tied to Linux because it is partially implemented in the Linux kernel, +has the following components: - A Linux kernel module that implements the flow table and OpenFlow protocol, in the datapath directory. @@ -43,8 +43,19 @@ the following components: - dpctl, a tool for configuring the kernel module. -The second implementation is a single userspace program, named -"switch", that integrates all three parts of an OpenFlow switch. +The second implementation has the following components: + + - udatapath, which implements the same functionality as the + Linux kernel module in userspace, at a cost in performance. + + - secchan, a program that implements the secure channel + component of the reference switch (the same program used in + the kernel-based implementation). + +The third implementation is a single userspace program, named +"switch", that integrates all three parts of an OpenFlow switch. This +implementation is deprecated. It lacks features present in the other +two implementations. This distribution includes some additional software as well: @@ -73,15 +84,15 @@ compile under Unix-like environments such as Linux, FreeBSD, Mac OS X, and Solaris. Our primary test environment is Debian GNU/Linux. Please contact us with portability-related bug reports or patches. -The Linux kernel module is, of course, Linux-specific, and the secchan -and dpctl utilities will not be as useful without the kernel module. -The testing of the kernel module has focused on Linux 2.6.23. Linux -2.6 releases from 2.6.15 onward and Linux 2.4 releases from 2.4.20 -onward should also work. +The Linux kernel module is, of course, Linux-specific, and the dpctl +utility will not be useful without the kernel module. The testing of +the kernel module has focused on Linux 2.6.26. Linux 2.6 releases +from 2.6.15 onward and Linux 2.4 releases from 2.4.20 onward should +also work. -The userspace switch implementation should be easy to port to +The userspace datapath implementation should be easy to port to Unix-like systems. The interface to network devices, in netdev.c, is -the only code that should need to change. So far, only Linux is +the code most likely to need changes. So far, only Linux is supported. We welcome ports to other platforms. GCC is the expected compiler. @@ -89,7 +100,7 @@ GCC is the expected compiler. Bugs/Shortcomings ----------------- -- The flowtable does not support the "normal processing" action. +- The flow table does not support the "normal processing" action. References ---------- diff --git a/udatapath/.gitignore b/udatapath/.gitignore new file mode 100644 index 000000000..5ce50666f --- /dev/null +++ b/udatapath/.gitignore @@ -0,0 +1,4 @@ +/Makefile +/Makefile.in +/udatapath +/udatapath.8 diff --git a/udatapath/automake.mk b/udatapath/automake.mk new file mode 100644 index 000000000..290742b1c --- /dev/null +++ b/udatapath/automake.mk @@ -0,0 +1,25 @@ +bin_PROGRAMS += udatapath/udatapath +man_MANS += udatapath/udatapath.8 + +udatapath_udatapath_SOURCES = \ + udatapath/chain.c \ + udatapath/chain.h \ + udatapath/crc32.c \ + udatapath/crc32.h \ + udatapath/datapath.c \ + udatapath/datapath.h \ + udatapath/dp_act.c \ + udatapath/dp_act.h \ + udatapath/nx_act.c \ + udatapath/nx_act.h \ + udatapath/udatapath.c \ + udatapath/switch-flow.c \ + udatapath/switch-flow.h \ + udatapath/table.h \ + udatapath/table-hash.c \ + udatapath/table-linear.c + +udatapath_udatapath_LDADD = lib/libopenflow.a $(SSL_LIBS) $(FAULT_LIBS) + +EXTRA_DIST += udatapath/udatapath.8.in +DISTCLEANFILES += udatapath/udatapath.8 diff --git a/udatapath/chain.c b/udatapath/chain.c new file mode 100644 index 000000000..8f09c00ea --- /dev/null +++ b/udatapath/chain.c @@ -0,0 +1,191 @@ +/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford + * Junior University + * + * We are making the OpenFlow specification and associated documentation + * (Software) available for public use and benefit with the expectation + * that others will use, modify and enhance the Software and contribute + * those enhancements back to the community. However, since we would + * like to make the Software available for broadest use, with as few + * restrictions as possible permission is hereby granted, free of + * charge, to any person obtaining a copy of this Software to deal in + * the Software under the copyrights without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * The name and trademarks of copyright holder(s) may NOT be used in + * advertising or publicity pertaining to the Software or any + * derivatives without specific, written prior permission. + */ + +#include +#include "chain.h" +#include +#include +#include +#include "switch-flow.h" +#include "table.h" + +#define THIS_MODULE VLM_chain +#include "vlog.h" + +/* Attempts to append 'table' to the set of tables in 'chain'. Returns 0 or + * negative error. If 'table' is null it is assumed that table creation failed + * due to out-of-memory. */ +static int add_table(struct sw_chain *chain, struct sw_table *table) +{ + if (table == NULL) + return -ENOMEM; + if (chain->n_tables >= CHAIN_MAX_TABLES) { + VLOG_ERR("too many tables in chain\n"); + table->destroy(table); + return -ENOBUFS; + } + chain->tables[chain->n_tables++] = table; + return 0; +} + +/* Creates and returns a new chain. Returns NULL if the chain cannot be + * created. */ +struct sw_chain *chain_create(void) +{ + struct sw_chain *chain = calloc(1, sizeof *chain); + if (chain == NULL) + return NULL; + + if (add_table(chain, table_hash2_create(0x1EDC6F41, TABLE_HASH_MAX_FLOWS, + 0x741B8CD7, TABLE_HASH_MAX_FLOWS)) + || add_table(chain, table_linear_create(TABLE_LINEAR_MAX_FLOWS))) { + chain_destroy(chain); + return NULL; + } + + return chain; +} + +/* Searches 'chain' for a flow matching 'key', which must not have any wildcard + * fields. Returns the flow if successful, otherwise a null pointer. */ +struct sw_flow * +chain_lookup(struct sw_chain *chain, const struct sw_flow_key *key) +{ + int i; + + assert(!key->wildcards); + for (i = 0; i < chain->n_tables; i++) { + struct sw_table *t = chain->tables[i]; + struct sw_flow *flow = t->lookup(t, key); + t->n_lookup++; + if (flow) { + t->n_matched++; + return flow; + } + } + return NULL; +} + +/* Inserts 'flow' into 'chain', replacing any duplicate flow. Returns 0 if + * successful or a negative error. + * + * If successful, 'flow' becomes owned by the chain, otherwise it is retained + * by the caller. */ +int +chain_insert(struct sw_chain *chain, struct sw_flow *flow) +{ + int i; + + for (i = 0; i < chain->n_tables; i++) { + struct sw_table *t = chain->tables[i]; + if (t->insert(t, flow)) + return 0; + } + + return -ENOBUFS; +} + +/* Modifies actions in 'chain' that match 'key'. If 'strict' set, wildcards + * and priority must match. Returns the number of flows that were modified. + * + * Expensive in the general case as currently implemented, since it requires + * iterating through the entire contents of each table for keys that contain + * wildcards. Relatively cheap for fully specified keys. */ +int +chain_modify(struct sw_chain *chain, const struct sw_flow_key *key, + uint16_t priority, int strict, + const struct ofp_action_header *actions, size_t actions_len) +{ + int count = 0; + int i; + + for (i = 0; i < chain->n_tables; i++) { + struct sw_table *t = chain->tables[i]; + count += t->modify(t, key, priority, strict, actions, actions_len); + } + + return count; +} + +/* Deletes from 'chain' any and all flows that match 'key'. If 'out_port' + * is not OFPP_NONE, then matching entries must have that port as an + * argument for an output action. If 'strict" is set, then wildcards and + * priority must match. Returns the number of flows that were deleted. + * + * Expensive in the general case as currently implemented, since it requires + * iterating through the entire contents of each table for keys that contain + * wildcards. Relatively cheap for fully specified keys. */ +int +chain_delete(struct sw_chain *chain, const struct sw_flow_key *key, + uint16_t out_port, uint16_t priority, int strict) +{ + int count = 0; + int i; + + for (i = 0; i < chain->n_tables; i++) { + struct sw_table *t = chain->tables[i]; + count += t->delete(t, key, out_port, priority, strict); + } + + return count; + +} + +/* Deletes timed-out flow entries from all the tables in 'chain' and appends + * the deleted flows to 'deleted'. + * + * Expensive as currently implemented, since it iterates through the entire + * contents of each table. */ +void +chain_timeout(struct sw_chain *chain, struct list *deleted) +{ + int i; + + for (i = 0; i < chain->n_tables; i++) { + struct sw_table *t = chain->tables[i]; + t->timeout(t, deleted); + } +} + +/* Destroys 'chain', which must not have any users. */ +void +chain_destroy(struct sw_chain *chain) +{ + int i; + + for (i = 0; i < chain->n_tables; i++) { + struct sw_table *t = chain->tables[i]; + t->destroy(t); + } + free(chain); +} diff --git a/udatapath/chain.h b/udatapath/chain.h new file mode 100644 index 000000000..8427caa59 --- /dev/null +++ b/udatapath/chain.h @@ -0,0 +1,67 @@ +/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford + * Junior University + * + * We are making the OpenFlow specification and associated documentation + * (Software) available for public use and benefit with the expectation + * that others will use, modify and enhance the Software and contribute + * those enhancements back to the community. However, since we would + * like to make the Software available for broadest use, with as few + * restrictions as possible permission is hereby granted, free of + * charge, to any person obtaining a copy of this Software to deal in + * the Software under the copyrights without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * The name and trademarks of copyright holder(s) may NOT be used in + * advertising or publicity pertaining to the Software or any + * derivatives without specific, written prior permission. + */ + +#ifndef CHAIN_H +#define CHAIN_H 1 + +#include +#include + +struct sw_flow; +struct sw_flow_key; +struct ofp_action_header; +struct list; + +#define TABLE_LINEAR_MAX_FLOWS 100 +#define TABLE_HASH_MAX_FLOWS 65536 +#define TABLE_MAC_MAX_FLOWS 1024 +#define TABLE_MAC_NUM_BUCKETS 1024 + +/* Set of tables chained together in sequence from cheap to expensive. */ +#define CHAIN_MAX_TABLES 4 +struct sw_chain { + int n_tables; + struct sw_table *tables[CHAIN_MAX_TABLES]; +}; + +struct sw_chain *chain_create(void); +struct sw_flow *chain_lookup(struct sw_chain *, const struct sw_flow_key *); +int chain_insert(struct sw_chain *, struct sw_flow *); +int chain_modify(struct sw_chain *, const struct sw_flow_key *, + uint16_t, int, const struct ofp_action_header *, size_t); +int chain_delete(struct sw_chain *, const struct sw_flow_key *, uint16_t, + uint16_t, int); +void chain_timeout(struct sw_chain *, struct list *deleted); +void chain_destroy(struct sw_chain *); + +#endif /* chain.h */ diff --git a/udatapath/crc32.c b/udatapath/crc32.c new file mode 100644 index 000000000..f6c2c0b37 --- /dev/null +++ b/udatapath/crc32.c @@ -0,0 +1,68 @@ +/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford + * Junior University + * + * We are making the OpenFlow specification and associated documentation + * (Software) available for public use and benefit with the expectation + * that others will use, modify and enhance the Software and contribute + * those enhancements back to the community. However, since we would + * like to make the Software available for broadest use, with as few + * restrictions as possible permission is hereby granted, free of + * charge, to any person obtaining a copy of this Software to deal in + * the Software under the copyrights without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * The name and trademarks of copyright holder(s) may NOT be used in + * advertising or publicity pertaining to the Software or any + * derivatives without specific, written prior permission. + */ + +#include +#include "crc32.h" + +void +crc32_init(struct crc32 *crc, unsigned int polynomial) +{ + int i; + + for (i = 0; i < CRC32_TABLE_SIZE; ++i) { + unsigned int reg = i << 24; + int j; + for (j = 0; j < CRC32_TABLE_BITS; j++) { + int topBit = (reg & 0x80000000) != 0; + reg <<= 1; + if (topBit) + reg ^= polynomial; + } + crc->table[i] = reg; + } +} + +unsigned int +crc32_calculate(const struct crc32 *crc, const void *data_, size_t n_bytes) +{ + const uint8_t *data = data_; + unsigned int result = 0; + size_t i; + + for (i = 0; i < n_bytes; i++) { + unsigned int top = result >> 24; + top ^= data[i]; + result = (result << 8) ^ crc->table[top]; + } + return result; +} diff --git a/udatapath/crc32.h b/udatapath/crc32.h new file mode 100644 index 000000000..355aefdfe --- /dev/null +++ b/udatapath/crc32.h @@ -0,0 +1,50 @@ +/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford + * Junior University + * + * We are making the OpenFlow specification and associated documentation + * (Software) available for public use and benefit with the expectation + * that others will use, modify and enhance the Software and contribute + * those enhancements back to the community. However, since we would + * like to make the Software available for broadest use, with as few + * restrictions as possible permission is hereby granted, free of + * charge, to any person obtaining a copy of this Software to deal in + * the Software under the copyrights without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * The name and trademarks of copyright holder(s) may NOT be used in + * advertising or publicity pertaining to the Software or any + * derivatives without specific, written prior permission. + */ + +#ifndef CRC32_H +#define CRC32_H 1 + +#include +#include + +#define CRC32_TABLE_BITS 8 +#define CRC32_TABLE_SIZE (1u << CRC32_TABLE_BITS) + +struct crc32 { + unsigned int table[CRC32_TABLE_SIZE]; +}; + +void crc32_init(struct crc32 *, unsigned int polynomial); +unsigned int crc32_calculate(const struct crc32 *, const void *, size_t); + +#endif /* crc32.h */ diff --git a/udatapath/datapath.c b/udatapath/datapath.c new file mode 100644 index 000000000..852ba5080 --- /dev/null +++ b/udatapath/datapath.c @@ -0,0 +1,1750 @@ +/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford + * Junior University + * + * We are making the OpenFlow specification and associated documentation + * (Software) available for public use and benefit with the expectation + * that others will use, modify and enhance the Software and contribute + * those enhancements back to the community. However, since we would + * like to make the Software available for broadest use, with as few + * restrictions as possible permission is hereby granted, free of + * charge, to any person obtaining a copy of this Software to deal in + * the Software under the copyrights without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * The name and trademarks of copyright holder(s) may NOT be used in + * advertising or publicity pertaining to the Software or any + * derivatives without specific, written prior permission. + */ + +#include "datapath.h" +#include +#include +#include +#include +#include +#include +#include "chain.h" +#include "csum.h" +#include "flow.h" +#include "list.h" +#include "netdev.h" +#include "ofpbuf.h" +#include "openflow/openflow.h" +#include "packets.h" +#include "poll-loop.h" +#include "rconn.h" +#include "stp.h" +#include "switch-flow.h" +#include "table.h" +#include "timeval.h" +#include "vconn.h" +#include "xtoxll.h" +#include "dp_act.h" + +#define THIS_MODULE VLM_datapath +#include "vlog.h" + +extern char mfr_desc; +extern char hw_desc; +extern char sw_desc; +extern char serial_num; + +/* Capabilities supported by this implementation. */ +#define OFP_SUPPORTED_CAPABILITIES ( OFPC_FLOW_STATS \ + | OFPC_TABLE_STATS \ + | OFPC_PORT_STATS \ + | OFPC_MULTI_PHY_TX ) + +/* Actions supported by this implementation. */ +#define OFP_SUPPORTED_ACTIONS ( (1 << OFPAT_OUTPUT) \ + | (1 << OFPAT_SET_VLAN_VID) \ + | (1 << OFPAT_SET_VLAN_PCP) \ + | (1 << OFPAT_STRIP_VLAN) \ + | (1 << OFPAT_SET_DL_SRC) \ + | (1 << OFPAT_SET_DL_DST) \ + | (1 << OFPAT_SET_NW_SRC) \ + | (1 << OFPAT_SET_NW_DST) \ + | (1 << OFPAT_SET_TP_SRC) \ + | (1 << OFPAT_SET_TP_DST) ) + +struct sw_port { + uint32_t config; /* Some subset of OFPPC_* flags. */ + uint32_t state; /* Some subset of OFPPS_* flags. */ + struct datapath *dp; + struct netdev *netdev; + struct list node; /* Element in datapath.ports. */ + unsigned long long int rx_packets, tx_packets; + unsigned long long int rx_bytes, tx_bytes; + unsigned long long int tx_dropped; + uint16_t port_no; +}; + +/* The origin of a received OpenFlow message, to enable sending a reply. */ +struct sender { + struct remote *remote; /* The device that sent the message. */ + uint32_t xid; /* The OpenFlow transaction ID. */ +}; + +/* A connection to a secure channel. */ +struct remote { + struct list node; + struct rconn *rconn; +#define TXQ_LIMIT 128 /* Max number of packets to queue for tx. */ + int n_txq; /* Number of packets queued for tx on rconn. */ + + /* Support for reliable, multi-message replies to requests. + * + * If an incoming request needs to have a reliable reply that might + * require multiple messages, it can use remote_start_dump() to set up + * a callback that will be called as buffer space for replies. */ + int (*cb_dump)(struct datapath *, void *aux); + void (*cb_done)(void *aux); + void *cb_aux; +}; + +#define DP_MAX_PORTS 255 +BUILD_ASSERT_DECL(DP_MAX_PORTS <= OFPP_MAX); + +struct datapath { + /* Remote connections. */ + struct list remotes; /* All connections (including controller). */ + + /* Listeners. */ + struct pvconn **listeners; + size_t n_listeners; + + time_t last_timeout; + + /* Unique identifier for this datapath */ + uint64_t id; + + struct sw_chain *chain; /* Forwarding rules. */ + + /* Configuration set from controller. */ + uint16_t flags; + uint16_t miss_send_len; + + /* Switch ports. */ + struct sw_port ports[DP_MAX_PORTS]; + struct sw_port *local_port; /* OFPP_LOCAL port, if any. */ + struct list port_list; /* All ports, including local_port. */ +}; + +static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(60, 60); + +static struct remote *remote_create(struct datapath *, struct rconn *); +static void remote_run(struct datapath *, struct remote *); +static void remote_wait(struct remote *); +static void remote_destroy(struct remote *); + +static void update_port_flags(struct datapath *, const struct ofp_port_mod *); +static void send_flow_expired(struct datapath *, struct sw_flow *, + enum ofp_flow_expired_reason); +static int update_port_status(struct sw_port *p); +static void send_port_status(struct sw_port *p, uint8_t status); +static void del_switch_port(struct sw_port *p); + +/* Buffers are identified by a 31-bit opaque ID. We divide the ID + * into a buffer number (low bits) and a cookie (high bits). The buffer number + * is an index into an array of buffers. The cookie distinguishes between + * different packets that have occupied a single buffer. Thus, the more + * buffers we have, the lower-quality the cookie... */ +#define PKT_BUFFER_BITS 8 +#define N_PKT_BUFFERS (1 << PKT_BUFFER_BITS) +#define PKT_BUFFER_MASK (N_PKT_BUFFERS - 1) + +#define PKT_COOKIE_BITS (32 - PKT_BUFFER_BITS) + +int run_flow_through_tables(struct datapath *, struct ofpbuf *, + struct sw_port *); +void fwd_port_input(struct datapath *, struct ofpbuf *, struct sw_port *); +int fwd_control_input(struct datapath *, const struct sender *, + const void *, size_t); + +uint32_t save_buffer(struct ofpbuf *); +static struct ofpbuf *retrieve_buffer(uint32_t id); +static void discard_buffer(uint32_t id); + +static struct sw_port * +lookup_port(struct datapath *dp, uint16_t port_no) +{ + return (port_no < DP_MAX_PORTS ? &dp->ports[port_no] + : port_no == OFPP_LOCAL ? dp->local_port + : NULL); +} + +/* Generates and returns a random datapath id. */ +static uint64_t +gen_datapath_id(void) +{ + uint8_t ea[ETH_ADDR_LEN]; + eth_addr_random(ea); + ea[0] = 0x00; /* Set Nicira OUI. */ + ea[1] = 0x23; + ea[2] = 0x20; + return eth_addr_to_uint64(ea); +} + +int +dp_new(struct datapath **dp_, uint64_t dpid) +{ + struct datapath *dp; + + dp = calloc(1, sizeof *dp); + if (!dp) { + return ENOMEM; + } + + dp->last_timeout = time_now(); + list_init(&dp->remotes); + dp->listeners = NULL; + dp->n_listeners = 0; + dp->id = dpid <= UINT64_C(0xffffffffffff) ? dpid : gen_datapath_id(); + dp->chain = chain_create(); + if (!dp->chain) { + VLOG_ERR("could not create chain"); + free(dp); + return ENOMEM; + } + + list_init(&dp->port_list); + dp->flags = 0; + dp->miss_send_len = OFP_DEFAULT_MISS_SEND_LEN; + *dp_ = dp; + return 0; +} + +static int +new_port(struct datapath *dp, struct sw_port *port, uint16_t port_no, + const char *netdev_name, const uint8_t *new_mac) +{ + struct netdev *netdev; + struct in6_addr in6; + struct in_addr in4; + int error; + + error = netdev_open(netdev_name, NETDEV_ETH_TYPE_ANY, &netdev); + if (error) { + return error; + } + if (new_mac && !eth_addr_equals(netdev_get_etheraddr(netdev), new_mac)) { + /* Generally the device has to be down before we change its hardware + * address. Don't bother to check for an error because it's really + * the netdev_set_etheraddr() call below that we care about. */ + netdev_set_flags(netdev, 0, false); + error = netdev_set_etheraddr(netdev, new_mac); + if (error) { + VLOG_WARN("failed to change %s Ethernet address " + "to "ETH_ADDR_FMT": %s", + netdev_name, ETH_ADDR_ARGS(new_mac), strerror(error)); + } + } + error = netdev_set_flags(netdev, NETDEV_UP | NETDEV_PROMISC, false); + if (error) { + VLOG_ERR("failed to set promiscuous mode on %s device", netdev_name); + netdev_close(netdev); + return error; + } + if (netdev_get_in4(netdev, &in4)) { + VLOG_ERR("%s device has assigned IP address %s", + netdev_name, inet_ntoa(in4)); + } + if (netdev_get_in6(netdev, &in6)) { + char in6_name[INET6_ADDRSTRLEN + 1]; + inet_ntop(AF_INET6, &in6, in6_name, sizeof in6_name); + VLOG_ERR("%s device has assigned IPv6 address %s", + netdev_name, in6_name); + } + + memset(port, '\0', sizeof *port); + + port->dp = dp; + port->netdev = netdev; + port->port_no = port_no; + list_push_back(&dp->port_list, &port->node); + + /* Notify the ctlpath that this port has been added */ + send_port_status(port, OFPPR_ADD); + + return 0; +} + +int +dp_add_port(struct datapath *dp, const char *netdev) +{ + int port_no; + for (port_no = 0; port_no < DP_MAX_PORTS; port_no++) { + struct sw_port *port = &dp->ports[port_no]; + if (!port->netdev) { + return new_port(dp, port, port_no, netdev, NULL); + } + } + return EXFULL; +} + +int +dp_add_local_port(struct datapath *dp, const char *netdev) +{ + if (!dp->local_port) { + uint8_t ea[ETH_ADDR_LEN]; + struct sw_port *port; + int error; + + port = xcalloc(1, sizeof *port); + eth_addr_from_uint64(dp->id, ea); + error = new_port(dp, port, OFPP_LOCAL, netdev, ea); + if (!error) { + dp->local_port = port; + } else { + free(port); + } + return error; + } else { + return EXFULL; + } +} + +void +dp_add_pvconn(struct datapath *dp, struct pvconn *pvconn) +{ + dp->listeners = xrealloc(dp->listeners, + sizeof *dp->listeners * (dp->n_listeners + 1)); + dp->listeners[dp->n_listeners++] = pvconn; +} + +void +dp_run(struct datapath *dp) +{ + time_t now = time_now(); + struct sw_port *p, *pn; + struct remote *r, *rn; + struct ofpbuf *buffer = NULL; + size_t i; + + if (now != dp->last_timeout) { + struct list deleted = LIST_INITIALIZER(&deleted); + struct sw_flow *f, *n; + + LIST_FOR_EACH (p, struct sw_port, node, &dp->port_list) { + if (update_port_status(p)) { + send_port_status(p, OFPPR_MODIFY); + } + } + + chain_timeout(dp->chain, &deleted); + LIST_FOR_EACH_SAFE (f, n, struct sw_flow, node, &deleted) { + send_flow_expired(dp, f, f->reason); + list_remove(&f->node); + flow_free(f); + } + dp->last_timeout = now; + } + poll_timer_wait(1000); + + LIST_FOR_EACH_SAFE (p, pn, struct sw_port, node, &dp->port_list) { + int error; + + if (!buffer) { + /* Allocate buffer with some headroom to add headers in forwarding + * to the controller or adding a vlan tag, plus an extra 2 bytes to + * allow IP headers to be aligned on a 4-byte boundary. */ + const int headroom = 128 + 2; + const int hard_header = VLAN_ETH_HEADER_LEN; + const int mtu = netdev_get_mtu(p->netdev); + buffer = ofpbuf_new(headroom + hard_header + mtu); + buffer->data = (char*)buffer->data + headroom; + } + error = netdev_recv(p->netdev, buffer); + if (!error) { + p->rx_packets++; + p->rx_bytes += buffer->size; + fwd_port_input(dp, buffer, p); + buffer = NULL; + } else if (error != EAGAIN) { + VLOG_ERR_RL(&rl, "error receiving data from %s: %s", + netdev_get_name(p->netdev), strerror(error)); + } + } + ofpbuf_delete(buffer); + + /* Talk to remotes. */ + LIST_FOR_EACH_SAFE (r, rn, struct remote, node, &dp->remotes) { + remote_run(dp, r); + } + + for (i = 0; i < dp->n_listeners; ) { + struct pvconn *pvconn = dp->listeners[i]; + struct vconn *new_vconn; + int retval = pvconn_accept(pvconn, OFP_VERSION, &new_vconn); + if (!retval) { + remote_create(dp, rconn_new_from_vconn("passive", new_vconn)); + } else if (retval != EAGAIN) { + VLOG_WARN_RL(&rl, "accept failed (%s)", strerror(retval)); + dp->listeners[i] = dp->listeners[--dp->n_listeners]; + continue; + } + i++; + } +} + +static void +remote_run(struct datapath *dp, struct remote *r) +{ + int i; + + rconn_run(r->rconn); + + /* Do some remote processing, but cap it at a reasonable amount so that + * other processing doesn't starve. */ + for (i = 0; i < 50; i++) { + if (!r->cb_dump) { + struct ofpbuf *buffer; + struct ofp_header *oh; + + buffer = rconn_recv(r->rconn); + if (!buffer) { + break; + } + + if (buffer->size >= sizeof *oh) { + struct sender sender; + + oh = buffer->data; + sender.remote = r; + sender.xid = oh->xid; + fwd_control_input(dp, &sender, buffer->data, buffer->size); + } else { + VLOG_WARN_RL(&rl, "received too-short OpenFlow message"); + } + ofpbuf_delete(buffer); + } else { + if (r->n_txq < TXQ_LIMIT) { + int error = r->cb_dump(dp, r->cb_aux); + if (error <= 0) { + if (error) { + VLOG_WARN_RL(&rl, "dump callback error: %s", + strerror(-error)); + } + r->cb_done(r->cb_aux); + r->cb_dump = NULL; + } + } else { + break; + } + } + } + + if (!rconn_is_alive(r->rconn)) { + remote_destroy(r); + } +} + +static void +remote_wait(struct remote *r) +{ + rconn_run_wait(r->rconn); + rconn_recv_wait(r->rconn); +} + +static void +remote_destroy(struct remote *r) +{ + if (r) { + if (r->cb_dump && r->cb_done) { + r->cb_done(r->cb_aux); + } + list_remove(&r->node); + rconn_destroy(r->rconn); + free(r); + } +} + +static struct remote * +remote_create(struct datapath *dp, struct rconn *rconn) +{ + struct remote *remote = xmalloc(sizeof *remote); + list_push_back(&dp->remotes, &remote->node); + remote->rconn = rconn; + remote->cb_dump = NULL; + remote->n_txq = 0; + return remote; +} + +/* Starts a callback-based, reliable, possibly multi-message reply to a + * request made by 'remote'. + * + * 'dump' designates a function that will be called when the 'remote' send + * queue has an empty slot. It should compose a message and send it on + * 'remote'. On success, it should return 1 if it should be called again when + * another send queue slot opens up, 0 if its transmissions are complete, or a + * negative errno value on failure. + * + * 'done' designates a function to clean up any resources allocated for the + * dump. It must handle being called before the dump is complete (which will + * happen if 'remote' is closed unexpectedly). + * + * 'aux' is passed to 'dump' and 'done'. */ +static void +remote_start_dump(struct remote *remote, + int (*dump)(struct datapath *, void *), + void (*done)(void *), + void *aux) +{ + assert(!remote->cb_dump); + remote->cb_dump = dump; + remote->cb_done = done; + remote->cb_aux = aux; +} + +void +dp_wait(struct datapath *dp) +{ + struct sw_port *p; + struct remote *r; + size_t i; + + LIST_FOR_EACH (p, struct sw_port, node, &dp->port_list) { + netdev_recv_wait(p->netdev); + } + LIST_FOR_EACH (r, struct remote, node, &dp->remotes) { + remote_wait(r); + } + for (i = 0; i < dp->n_listeners; i++) { + pvconn_wait(dp->listeners[i]); + } +} + +/* Delete 'p' from switch. */ +static void +del_switch_port(struct sw_port *p) +{ + send_port_status(p, OFPPR_DELETE); + netdev_close(p->netdev); + p->netdev = NULL; + list_remove(&p->node); +} + +void +dp_destroy(struct datapath *dp) +{ + struct sw_port *p, *n; + + if (!dp) { + return; + } + + LIST_FOR_EACH_SAFE (p, n, struct sw_port, node, &dp->port_list) { + del_switch_port(p); + } + chain_destroy(dp->chain); + free(dp); +} + +/* Send packets out all the ports except the originating one. If the + * "flood" argument is set, don't send out ports with flooding disabled. + */ +static int +output_all(struct datapath *dp, struct ofpbuf *buffer, int in_port, int flood) +{ + struct sw_port *p; + int prev_port; + + prev_port = -1; + LIST_FOR_EACH (p, struct sw_port, node, &dp->port_list) { + if (p->port_no == in_port) { + continue; + } + if (flood && p->config & OFPPC_NO_FLOOD) { + continue; + } + if (prev_port != -1) { + dp_output_port(dp, ofpbuf_clone(buffer), in_port, prev_port, + false); + } + prev_port = p->port_no; + } + if (prev_port != -1) + dp_output_port(dp, buffer, in_port, prev_port, false); + else + ofpbuf_delete(buffer); + + return 0; +} + +void +output_packet(struct datapath *dp, struct ofpbuf *buffer, uint16_t out_port) +{ + struct sw_port *p = lookup_port(dp, out_port); + if (p && p->netdev != NULL) { + if (!(p->config & OFPPC_PORT_DOWN)) { + if (!netdev_send(p->netdev, buffer)) { + p->tx_packets++; + p->tx_bytes += buffer->size; + } else { + p->tx_dropped++; + } + } + ofpbuf_delete(buffer); + return; + } + + ofpbuf_delete(buffer); + VLOG_DBG_RL(&rl, "can't forward to bad port %d\n", out_port); +} + +/* Takes ownership of 'buffer' and transmits it to 'out_port' on 'dp'. + */ +void +dp_output_port(struct datapath *dp, struct ofpbuf *buffer, + int in_port, int out_port, bool ignore_no_fwd) +{ + + assert(buffer); + switch (out_port) { + case OFPP_IN_PORT: + output_packet(dp, buffer, in_port); + break; + + case OFPP_TABLE: { + struct sw_port *p = lookup_port(dp, in_port); + if (run_flow_through_tables(dp, buffer, p)) { + ofpbuf_delete(buffer); + } + break; + } + + case OFPP_FLOOD: + output_all(dp, buffer, in_port, 1); + break; + + case OFPP_ALL: + output_all(dp, buffer, in_port, 0); + break; + + case OFPP_CONTROLLER: + dp_output_control(dp, buffer, in_port, 0, OFPR_ACTION); + break; + + case OFPP_LOCAL: + default: + if (in_port == out_port) { + VLOG_DBG_RL(&rl, "can't directly forward to input port"); + return; + } + output_packet(dp, buffer, out_port); + break; + } +} + +static void * +make_openflow_reply(size_t openflow_len, uint8_t type, + const struct sender *sender, struct ofpbuf **bufferp) +{ + return make_openflow_xid(openflow_len, type, sender ? sender->xid : 0, + bufferp); +} + +static int +send_openflow_buffer_to_remote(struct ofpbuf *buffer, struct remote *remote) +{ + int retval = rconn_send_with_limit(remote->rconn, buffer, &remote->n_txq, + TXQ_LIMIT); + if (retval) { + VLOG_WARN_RL(&rl, "send to %s failed: %s", + rconn_get_name(remote->rconn), strerror(retval)); + } + return retval; +} + +static int +send_openflow_buffer(struct datapath *dp, struct ofpbuf *buffer, + const struct sender *sender) +{ + update_openflow_length(buffer); + if (sender) { + /* Send back to the sender. */ + return send_openflow_buffer_to_remote(buffer, sender->remote); + } else { + /* Broadcast to all remotes. */ + struct remote *r, *prev = NULL; + LIST_FOR_EACH (r, struct remote, node, &dp->remotes) { + if (prev) { + send_openflow_buffer_to_remote(ofpbuf_clone(buffer), prev); + } + prev = r; + } + if (prev) { + send_openflow_buffer_to_remote(buffer, prev); + } else { + ofpbuf_delete(buffer); + } + return 0; + } +} + +/* Takes ownership of 'buffer' and transmits it to 'dp''s controller. If the + * packet can be saved in a buffer, then only the first max_len bytes of + * 'buffer' are sent; otherwise, all of 'buffer' is sent. 'reason' indicates + * why 'buffer' is being sent. 'max_len' sets the maximum number of bytes that + * the caller wants to be sent; a value of 0 indicates the entire packet should + * be sent. */ +void +dp_output_control(struct datapath *dp, struct ofpbuf *buffer, int in_port, + size_t max_len, int reason) +{ + struct ofp_packet_in *opi; + size_t total_len; + uint32_t buffer_id; + + buffer_id = save_buffer(buffer); + total_len = buffer->size; + if (buffer_id != UINT32_MAX && max_len && buffer->size > max_len) { + buffer->size = max_len; + } + + opi = ofpbuf_push_uninit(buffer, offsetof(struct ofp_packet_in, data)); + opi->header.version = OFP_VERSION; + opi->header.type = OFPT_PACKET_IN; + opi->header.length = htons(buffer->size); + opi->header.xid = htonl(0); + opi->buffer_id = htonl(buffer_id); + opi->total_len = htons(total_len); + opi->in_port = htons(in_port); + opi->reason = reason; + opi->pad = 0; + send_openflow_buffer(dp, buffer, NULL); +} + +static void fill_port_desc(struct datapath *dp, struct sw_port *p, + struct ofp_phy_port *desc) +{ + desc->port_no = htons(p->port_no); + strncpy((char *) desc->name, netdev_get_name(p->netdev), + sizeof desc->name); + desc->name[sizeof desc->name - 1] = '\0'; + memcpy(desc->hw_addr, netdev_get_etheraddr(p->netdev), ETH_ADDR_LEN); + desc->config = htonl(p->config); + desc->state = htonl(p->state); + desc->curr = htonl(netdev_get_features(p->netdev, NETDEV_FEAT_CURRENT)); + desc->supported = htonl(netdev_get_features(p->netdev, + NETDEV_FEAT_SUPPORTED)); + desc->advertised = htonl(netdev_get_features(p->netdev, + NETDEV_FEAT_ADVERTISED)); + desc->peer = htonl(netdev_get_features(p->netdev, NETDEV_FEAT_PEER)); +} + +static void +dp_send_features_reply(struct datapath *dp, const struct sender *sender) +{ + struct ofpbuf *buffer; + struct ofp_switch_features *ofr; + struct sw_port *p; + + ofr = make_openflow_reply(sizeof *ofr, OFPT_FEATURES_REPLY, + sender, &buffer); + ofr->datapath_id = htonll(dp->id); + ofr->n_tables = dp->chain->n_tables; + ofr->n_buffers = htonl(N_PKT_BUFFERS); + ofr->capabilities = htonl(OFP_SUPPORTED_CAPABILITIES); + ofr->actions = htonl(OFP_SUPPORTED_ACTIONS); + LIST_FOR_EACH (p, struct sw_port, node, &dp->port_list) { + struct ofp_phy_port *opp = ofpbuf_put_uninit(buffer, sizeof *opp); + memset(opp, 0, sizeof *opp); + fill_port_desc(dp, p, opp); + } + send_openflow_buffer(dp, buffer, sender); +} + +void +update_port_flags(struct datapath *dp, const struct ofp_port_mod *opm) +{ + struct sw_port *p = lookup_port(dp, ntohs(opm->port_no)); + + /* Make sure the port id hasn't changed since this was sent */ + if (!p || memcmp(opm->hw_addr, netdev_get_etheraddr(p->netdev), + ETH_ADDR_LEN) != 0) { + return; + } + + + if (opm->mask) { + uint32_t config_mask = ntohl(opm->mask); + p->config &= ~config_mask; + p->config |= ntohl(opm->config) & config_mask; + } + + if (opm->mask & htonl(OFPPC_PORT_DOWN)) { + if ((opm->config & htonl(OFPPC_PORT_DOWN)) + && (p->config & OFPPC_PORT_DOWN) == 0) { + p->config |= OFPPC_PORT_DOWN; + netdev_turn_flags_off(p->netdev, NETDEV_UP, true); + } else if ((opm->config & htonl(OFPPC_PORT_DOWN)) == 0 + && (p->config & OFPPC_PORT_DOWN)) { + p->config &= ~OFPPC_PORT_DOWN; + netdev_turn_flags_on(p->netdev, NETDEV_UP, true); + } + } +} + +/* Update the port status field of the bridge port. A non-zero return + * value indicates some field has changed. + * + * NB: Callers of this function may hold the RCU read lock, so any + * additional checks must not sleep. + */ +static int +update_port_status(struct sw_port *p) +{ + int retval; + enum netdev_flags flags; + uint32_t orig_config = p->config; + uint32_t orig_state = p->state; + + if (netdev_get_flags(p->netdev, &flags) < 0) { + VLOG_WARN_RL(&rl, "could not get netdev flags for %s", + netdev_get_name(p->netdev)); + return 0; + } else { + if (flags & NETDEV_UP) { + p->config &= ~OFPPC_PORT_DOWN; + } else { + p->config |= OFPPC_PORT_DOWN; + } + } + + /* Not all cards support this getting link status, so don't warn on + * error. */ + retval = netdev_get_link_status(p->netdev); + if (retval == 1) { + p->state &= ~OFPPS_LINK_DOWN; + } else if (retval == 0) { + p->state |= OFPPS_LINK_DOWN; + } + + return ((orig_config != p->config) || (orig_state != p->state)); +} + +static void +send_port_status(struct sw_port *p, uint8_t status) +{ + struct ofpbuf *buffer; + struct ofp_port_status *ops; + ops = make_openflow_xid(sizeof *ops, OFPT_PORT_STATUS, 0, &buffer); + ops->reason = status; + memset(ops->pad, 0, sizeof ops->pad); + fill_port_desc(p->dp, p, &ops->desc); + + send_openflow_buffer(p->dp, buffer, NULL); +} + +void +send_flow_expired(struct datapath *dp, struct sw_flow *flow, + enum ofp_flow_expired_reason reason) +{ + struct ofpbuf *buffer; + struct ofp_flow_expired *ofe; + ofe = make_openflow_xid(sizeof *ofe, OFPT_FLOW_EXPIRED, 0, &buffer); + flow_fill_match(&ofe->match, &flow->key); + + ofe->priority = htons(flow->priority); + ofe->reason = reason; + memset(ofe->pad, 0, sizeof ofe->pad); + + ofe->duration = htonl(time_now() - flow->created); + memset(ofe->pad2, 0, sizeof ofe->pad2); + ofe->packet_count = htonll(flow->packet_count); + ofe->byte_count = htonll(flow->byte_count); + send_openflow_buffer(dp, buffer, NULL); +} + +void +dp_send_error_msg(struct datapath *dp, const struct sender *sender, + uint16_t type, uint16_t code, const void *data, size_t len) +{ + struct ofpbuf *buffer; + struct ofp_error_msg *oem; + oem = make_openflow_reply(sizeof(*oem)+len, OFPT_ERROR, sender, &buffer); + oem->type = htons(type); + oem->code = htons(code); + memcpy(oem->data, data, len); + send_openflow_buffer(dp, buffer, sender); +} + +static void +fill_flow_stats(struct ofpbuf *buffer, struct sw_flow *flow, + int table_idx, time_t now) +{ + struct ofp_flow_stats *ofs; + int length = sizeof *ofs + flow->sf_acts->actions_len; + ofs = ofpbuf_put_uninit(buffer, length); + ofs->length = htons(length); + ofs->table_id = table_idx; + ofs->pad = 0; + ofs->match.wildcards = htonl(flow->key.wildcards); + ofs->match.in_port = flow->key.flow.in_port; + memcpy(ofs->match.dl_src, flow->key.flow.dl_src, ETH_ADDR_LEN); + memcpy(ofs->match.dl_dst, flow->key.flow.dl_dst, ETH_ADDR_LEN); + ofs->match.dl_vlan = flow->key.flow.dl_vlan; + ofs->match.dl_type = flow->key.flow.dl_type; + ofs->match.nw_src = flow->key.flow.nw_src; + ofs->match.nw_dst = flow->key.flow.nw_dst; + ofs->match.nw_proto = flow->key.flow.nw_proto; + ofs->match.pad = 0; + ofs->match.tp_src = flow->key.flow.tp_src; + ofs->match.tp_dst = flow->key.flow.tp_dst; + ofs->duration = htonl(now - flow->created); + ofs->priority = htons(flow->priority); + ofs->idle_timeout = htons(flow->idle_timeout); + ofs->hard_timeout = htons(flow->hard_timeout); + memset(ofs->pad2, 0, sizeof ofs->pad2); + ofs->packet_count = htonll(flow->packet_count); + ofs->byte_count = htonll(flow->byte_count); + memcpy(ofs->actions, flow->sf_acts->actions, flow->sf_acts->actions_len); +} + + +/* 'buffer' was received on 'p', which may be a a physical switch port or a + * null pointer. Process it according to 'dp''s flow table. Returns 0 if + * successful, in which case 'buffer' is destroyed, or -ESRCH if there is no + * matching flow, in which case 'buffer' still belongs to the caller. */ +int run_flow_through_tables(struct datapath *dp, struct ofpbuf *buffer, + struct sw_port *p) +{ + struct sw_flow_key key; + struct sw_flow *flow; + + key.wildcards = 0; + if (flow_extract(buffer, p ? p->port_no : OFPP_NONE, &key.flow) + && (dp->flags & OFPC_FRAG_MASK) == OFPC_FRAG_DROP) { + /* Drop fragment. */ + ofpbuf_delete(buffer); + return 0; + } + if (p && p->config & (OFPPC_NO_RECV | OFPPC_NO_RECV_STP) + && p->config & (!eth_addr_equals(key.flow.dl_dst, stp_eth_addr) + ? OFPPC_NO_RECV : OFPPC_NO_RECV_STP)) { + ofpbuf_delete(buffer); + return 0; + } + + flow = chain_lookup(dp->chain, &key); + if (flow != NULL) { + flow_used(flow, buffer); + execute_actions(dp, buffer, &key, flow->sf_acts->actions, + flow->sf_acts->actions_len, false); + return 0; + } else { + return -ESRCH; + } +} + +/* 'buffer' was received on 'p', which may be a a physical switch port or a + * null pointer. Process it according to 'dp''s flow table, sending it up to + * the controller if no flow matches. Takes ownership of 'buffer'. */ +void fwd_port_input(struct datapath *dp, struct ofpbuf *buffer, + struct sw_port *p) +{ + if (run_flow_through_tables(dp, buffer, p)) { + dp_output_control(dp, buffer, p->port_no, + dp->miss_send_len, OFPR_NO_MATCH); + } +} + +static int +recv_features_request(struct datapath *dp, const struct sender *sender, + const void *msg) +{ + dp_send_features_reply(dp, sender); + return 0; +} + +static int +recv_get_config_request(struct datapath *dp, const struct sender *sender, + const void *msg) +{ + struct ofpbuf *buffer; + struct ofp_switch_config *osc; + + osc = make_openflow_reply(sizeof *osc, OFPT_GET_CONFIG_REPLY, + sender, &buffer); + + osc->flags = htons(dp->flags); + osc->miss_send_len = htons(dp->miss_send_len); + + return send_openflow_buffer(dp, buffer, sender); +} + +static int +recv_set_config(struct datapath *dp, const struct sender *sender UNUSED, + const void *msg) +{ + const struct ofp_switch_config *osc = msg; + int flags; + + flags = ntohs(osc->flags) & (OFPC_SEND_FLOW_EXP | OFPC_FRAG_MASK); + if ((flags & OFPC_FRAG_MASK) != OFPC_FRAG_NORMAL + && (flags & OFPC_FRAG_MASK) != OFPC_FRAG_DROP) { + flags = (flags & ~OFPC_FRAG_MASK) | OFPC_FRAG_DROP; + } + dp->flags = flags; + dp->miss_send_len = ntohs(osc->miss_send_len); + return 0; +} + +static int +recv_packet_out(struct datapath *dp, const struct sender *sender, + const void *msg) +{ + const struct ofp_packet_out *opo = msg; + struct sw_flow_key key; + uint16_t v_code; + struct ofpbuf *buffer; + size_t actions_len = ntohs(opo->actions_len); + + if (actions_len > (ntohs(opo->header.length) - sizeof *opo)) { + VLOG_DBG_RL(&rl, "message too short for number of actions"); + return -EINVAL; + } + + if (ntohl(opo->buffer_id) == (uint32_t) -1) { + /* FIXME: can we avoid copying data here? */ + int data_len = ntohs(opo->header.length) - sizeof *opo - actions_len; + buffer = ofpbuf_new(data_len); + ofpbuf_put(buffer, (uint8_t *)opo->actions + actions_len, data_len); + } else { + buffer = retrieve_buffer(ntohl(opo->buffer_id)); + if (!buffer) { + return -ESRCH; + } + } + + flow_extract(buffer, ntohs(opo->in_port), &key.flow); + + v_code = validate_actions(dp, &key, opo->actions, actions_len); + if (v_code != ACT_VALIDATION_OK) { + dp_send_error_msg(dp, sender, OFPET_BAD_ACTION, v_code, + msg, ntohs(opo->header.length)); + goto error; + } + + execute_actions(dp, buffer, &key, opo->actions, actions_len, true); + + return 0; + +error: + ofpbuf_delete(buffer); + return -EINVAL; +} + +static int +recv_port_mod(struct datapath *dp, const struct sender *sender UNUSED, + const void *msg) +{ + const struct ofp_port_mod *opm = msg; + + update_port_flags(dp, opm); + + return 0; +} + +static int +add_flow(struct datapath *dp, const struct sender *sender, + const struct ofp_flow_mod *ofm) +{ + int error = -ENOMEM; + uint16_t v_code; + struct sw_flow *flow; + size_t actions_len = ntohs(ofm->header.length) - sizeof *ofm; + + /* Allocate memory. */ + flow = flow_alloc(actions_len); + if (flow == NULL) + goto error; + + flow_extract_match(&flow->key, &ofm->match); + + v_code = validate_actions(dp, &flow->key, ofm->actions, actions_len); + if (v_code != ACT_VALIDATION_OK) { + dp_send_error_msg(dp, sender, OFPET_BAD_ACTION, v_code, + ofm, ntohs(ofm->header.length)); + goto error_free_flow; + } + + /* Fill out flow. */ + flow->priority = flow->key.wildcards ? ntohs(ofm->priority) : -1; + flow->idle_timeout = ntohs(ofm->idle_timeout); + flow->hard_timeout = ntohs(ofm->hard_timeout); + flow->used = flow->created = time_now(); + flow->sf_acts->actions_len = actions_len; + flow->byte_count = 0; + flow->packet_count = 0; + memcpy(flow->sf_acts->actions, ofm->actions, actions_len); + + /* Act. */ + error = chain_insert(dp->chain, flow); + if (error == -ENOBUFS) { + dp_send_error_msg(dp, sender, OFPET_FLOW_MOD_FAILED, + OFPFMFC_ALL_TABLES_FULL, ofm, ntohs(ofm->header.length)); + goto error_free_flow; + } else if (error) { + goto error_free_flow; + } + error = 0; + if (ntohl(ofm->buffer_id) != UINT32_MAX) { + struct ofpbuf *buffer = retrieve_buffer(ntohl(ofm->buffer_id)); + if (buffer) { + struct sw_flow_key key; + uint16_t in_port = ntohs(ofm->match.in_port); + flow_used(flow, buffer); + flow_extract(buffer, in_port, &key.flow); + execute_actions(dp, buffer, &key, + ofm->actions, actions_len, false); + } else { + error = -ESRCH; + } + } + return error; + +error_free_flow: + flow_free(flow); +error: + if (ntohl(ofm->buffer_id) != (uint32_t) -1) + discard_buffer(ntohl(ofm->buffer_id)); + return error; +} + +static int +mod_flow(struct datapath *dp, const struct sender *sender, + const struct ofp_flow_mod *ofm) +{ + int error = -ENOMEM; + uint16_t v_code; + size_t actions_len; + struct sw_flow_key key; + uint16_t priority; + int strict; + + flow_extract_match(&key, &ofm->match); + + actions_len = ntohs(ofm->header.length) - sizeof *ofm; + + v_code = validate_actions(dp, &key, ofm->actions, actions_len); + if (v_code != ACT_VALIDATION_OK) { + dp_send_error_msg(dp, sender, OFPET_BAD_ACTION, v_code, + ofm, ntohs(ofm->header.length)); + goto error; + } + + priority = key.wildcards ? ntohs(ofm->priority) : -1; + strict = (ofm->command == htons(OFPFC_MODIFY_STRICT)) ? 1 : 0; + chain_modify(dp->chain, &key, priority, strict, ofm->actions, actions_len); + + if (ntohl(ofm->buffer_id) != UINT32_MAX) { + struct ofpbuf *buffer = retrieve_buffer(ntohl(ofm->buffer_id)); + if (buffer) { + struct sw_flow_key skb_key; + uint16_t in_port = ntohs(ofm->match.in_port); + flow_extract(buffer, in_port, &skb_key.flow); + execute_actions(dp, buffer, &skb_key, + ofm->actions, actions_len, false); + } else { + error = -ESRCH; + } + } + return error; + +error: + if (ntohl(ofm->buffer_id) != (uint32_t) -1) + discard_buffer(ntohl(ofm->buffer_id)); + return error; +} + +static int +recv_flow(struct datapath *dp, const struct sender *sender, + const void *msg) +{ + const struct ofp_flow_mod *ofm = msg; + uint16_t command = ntohs(ofm->command); + + if (command == OFPFC_ADD) { + return add_flow(dp, sender, ofm); + } else if ((command == OFPFC_MODIFY) || (command == OFPFC_MODIFY_STRICT)) { + return mod_flow(dp, sender, ofm); + } else if (command == OFPFC_DELETE) { + struct sw_flow_key key; + flow_extract_match(&key, &ofm->match); + return chain_delete(dp->chain, &key, ofm->out_port, 0, 0) ? 0 : -ESRCH; + } else if (command == OFPFC_DELETE_STRICT) { + struct sw_flow_key key; + uint16_t priority; + flow_extract_match(&key, &ofm->match); + priority = key.wildcards ? ntohs(ofm->priority) : -1; + return chain_delete(dp->chain, &key, ofm->out_port, + priority, 1) ? 0 : -ESRCH; + } else { + return -ENODEV; + } +} + +static int desc_stats_dump(struct datapath *dp, void *state, + struct ofpbuf *buffer) +{ + struct ofp_desc_stats *ods = ofpbuf_put_uninit(buffer, sizeof *ods); + + strncpy(ods->mfr_desc, &mfr_desc, sizeof ods->mfr_desc); + strncpy(ods->hw_desc, &hw_desc, sizeof ods->hw_desc); + strncpy(ods->sw_desc, &sw_desc, sizeof ods->sw_desc); + strncpy(ods->serial_num, &serial_num, sizeof ods->serial_num); + + return 0; +} + +struct flow_stats_state { + int table_idx; + struct sw_table_position position; + struct ofp_flow_stats_request rq; + time_t now; + + struct ofpbuf *buffer; +}; + +#define MAX_FLOW_STATS_BYTES 4096 + +static int flow_stats_init(struct datapath *dp, const void *body, int body_len, + void **state) +{ + const struct ofp_flow_stats_request *fsr = body; + struct flow_stats_state *s = xmalloc(sizeof *s); + s->table_idx = fsr->table_id == 0xff ? 0 : fsr->table_id; + memset(&s->position, 0, sizeof s->position); + s->rq = *fsr; + *state = s; + return 0; +} + +static int flow_stats_dump_callback(struct sw_flow *flow, void *private) +{ + struct flow_stats_state *s = private; + fill_flow_stats(s->buffer, flow, s->table_idx, s->now); + return s->buffer->size >= MAX_FLOW_STATS_BYTES; +} + +static int flow_stats_dump(struct datapath *dp, void *state, + struct ofpbuf *buffer) +{ + struct flow_stats_state *s = state; + struct sw_flow_key match_key; + + flow_extract_match(&match_key, &s->rq.match); + s->buffer = buffer; + s->now = time_now(); + while (s->table_idx < dp->chain->n_tables + && (s->rq.table_id == 0xff || s->rq.table_id == s->table_idx)) + { + struct sw_table *table = dp->chain->tables[s->table_idx]; + + if (table->iterate(table, &match_key, s->rq.out_port, + &s->position, flow_stats_dump_callback, s)) + break; + + s->table_idx++; + memset(&s->position, 0, sizeof s->position); + } + return s->buffer->size >= MAX_FLOW_STATS_BYTES; +} + +static void flow_stats_done(void *state) +{ + free(state); +} + +struct aggregate_stats_state { + struct ofp_aggregate_stats_request rq; +}; + +static int aggregate_stats_init(struct datapath *dp, + const void *body, int body_len, + void **state) +{ + const struct ofp_aggregate_stats_request *rq = body; + struct aggregate_stats_state *s = xmalloc(sizeof *s); + s->rq = *rq; + *state = s; + return 0; +} + +static int aggregate_stats_dump_callback(struct sw_flow *flow, void *private) +{ + struct ofp_aggregate_stats_reply *rpy = private; + rpy->packet_count += flow->packet_count; + rpy->byte_count += flow->byte_count; + rpy->flow_count++; + return 0; +} + +static int aggregate_stats_dump(struct datapath *dp, void *state, + struct ofpbuf *buffer) +{ + struct aggregate_stats_state *s = state; + struct ofp_aggregate_stats_request *rq = &s->rq; + struct ofp_aggregate_stats_reply *rpy; + struct sw_table_position position; + struct sw_flow_key match_key; + int table_idx; + + rpy = ofpbuf_put_uninit(buffer, sizeof *rpy); + memset(rpy, 0, sizeof *rpy); + + flow_extract_match(&match_key, &rq->match); + table_idx = rq->table_id == 0xff ? 0 : rq->table_id; + memset(&position, 0, sizeof position); + while (table_idx < dp->chain->n_tables + && (rq->table_id == 0xff || rq->table_id == table_idx)) + { + struct sw_table *table = dp->chain->tables[table_idx]; + int error; + + error = table->iterate(table, &match_key, rq->out_port, &position, + aggregate_stats_dump_callback, rpy); + if (error) + return error; + + table_idx++; + memset(&position, 0, sizeof position); + } + + rpy->packet_count = htonll(rpy->packet_count); + rpy->byte_count = htonll(rpy->byte_count); + rpy->flow_count = htonl(rpy->flow_count); + return 0; +} + +static void aggregate_stats_done(void *state) +{ + free(state); +} + +static int table_stats_dump(struct datapath *dp, void *state, + struct ofpbuf *buffer) +{ + int i; + for (i = 0; i < dp->chain->n_tables; i++) { + struct ofp_table_stats *ots = ofpbuf_put_uninit(buffer, sizeof *ots); + struct sw_table_stats stats; + dp->chain->tables[i]->stats(dp->chain->tables[i], &stats); + strncpy(ots->name, stats.name, sizeof ots->name); + ots->table_id = i; + ots->wildcards = htonl(stats.wildcards); + memset(ots->pad, 0, sizeof ots->pad); + ots->max_entries = htonl(stats.max_flows); + ots->active_count = htonl(stats.n_flows); + ots->lookup_count = htonll(stats.n_lookup); + ots->matched_count = htonll(stats.n_matched); + } + return 0; +} + +struct port_stats_state { + int port; +}; + +static int port_stats_init(struct datapath *dp, const void *body, int body_len, + void **state) +{ + struct port_stats_state *s = xmalloc(sizeof *s); + s->port = 0; + *state = s; + return 0; +} + +static void +dump_port_stats(struct sw_port *port, struct ofpbuf *buffer) +{ + struct ofp_port_stats *ops = ofpbuf_put_uninit(buffer, sizeof *ops); + ops->port_no = htons(port->port_no); + memset(ops->pad, 0, sizeof ops->pad); + ops->rx_packets = htonll(port->rx_packets); + ops->tx_packets = htonll(port->tx_packets); + ops->rx_bytes = htonll(port->rx_bytes); + ops->tx_bytes = htonll(port->tx_bytes); + ops->rx_dropped = htonll(-1); + ops->tx_dropped = htonll(port->tx_dropped); + ops->rx_errors = htonll(-1); + ops->tx_errors = htonll(-1); + ops->rx_frame_err = htonll(-1); + ops->rx_over_err = htonll(-1); + ops->rx_crc_err = htonll(-1); + ops->collisions = htonll(-1); +} + +static int port_stats_dump(struct datapath *dp, void *state, + struct ofpbuf *buffer) +{ + struct port_stats_state *s = state; + int i; + + for (i = s->port; i < DP_MAX_PORTS; i++) { + struct sw_port *p = &dp->ports[i]; + if (p->netdev) { + dump_port_stats(p, buffer); + } + } + s->port = i; + + if (dp->local_port) { + dump_port_stats(dp->local_port, buffer); + s->port = OFPP_LOCAL + 1; + } + return 0; +} + +static void port_stats_done(void *state) +{ + free(state); +} + +struct stats_type { + /* Value for 'type' member of struct ofp_stats_request. */ + int type; + + /* Minimum and maximum acceptable number of bytes in body member of + * struct ofp_stats_request. */ + size_t min_body, max_body; + + /* Prepares to dump some kind of statistics on 'dp'. 'body' and + * 'body_len' are the 'body' member of the struct ofp_stats_request. + * Returns zero if successful, otherwise a negative error code. + * May initialize '*state' to state information. May be null if no + * initialization is required.*/ + int (*init)(struct datapath *dp, const void *body, int body_len, + void **state); + + /* Appends statistics for 'dp' to 'buffer', which initially contains a + * struct ofp_stats_reply. On success, it should return 1 if it should be + * called again later with another buffer, 0 if it is done, or a negative + * errno value on failure. */ + int (*dump)(struct datapath *dp, void *state, struct ofpbuf *buffer); + + /* Cleans any state created by the init or dump functions. May be null + * if no cleanup is required. */ + void (*done)(void *state); +}; + +static const struct stats_type stats[] = { + { + OFPST_DESC, + 0, + 0, + NULL, + desc_stats_dump, + NULL + }, + { + OFPST_FLOW, + sizeof(struct ofp_flow_stats_request), + sizeof(struct ofp_flow_stats_request), + flow_stats_init, + flow_stats_dump, + flow_stats_done + }, + { + OFPST_AGGREGATE, + sizeof(struct ofp_aggregate_stats_request), + sizeof(struct ofp_aggregate_stats_request), + aggregate_stats_init, + aggregate_stats_dump, + aggregate_stats_done + }, + { + OFPST_TABLE, + 0, + 0, + NULL, + table_stats_dump, + NULL + }, + { + OFPST_PORT, + 0, + 0, + port_stats_init, + port_stats_dump, + port_stats_done + }, +}; + +struct stats_dump_cb { + bool done; + struct ofp_stats_request *rq; + struct sender sender; + const struct stats_type *s; + void *state; +}; + +static int +stats_dump(struct datapath *dp, void *cb_) +{ + struct stats_dump_cb *cb = cb_; + struct ofp_stats_reply *osr; + struct ofpbuf *buffer; + int err; + + if (cb->done) { + return 0; + } + + osr = make_openflow_reply(sizeof *osr, OFPT_STATS_REPLY, &cb->sender, + &buffer); + osr->type = htons(cb->s->type); + osr->flags = 0; + + err = cb->s->dump(dp, cb->state, buffer); + if (err >= 0) { + int err2; + if (!err) { + cb->done = true; + } else { + /* Buffer might have been reallocated, so find our data again. */ + osr = ofpbuf_at_assert(buffer, 0, sizeof *osr); + osr->flags = ntohs(OFPSF_REPLY_MORE); + } + err2 = send_openflow_buffer(dp, buffer, &cb->sender); + if (err2) { + err = err2; + } + } + + return err; +} + +static void +stats_done(void *cb_) +{ + struct stats_dump_cb *cb = cb_; + if (cb) { + if (cb->s->done) { + cb->s->done(cb->state); + } + free(cb); + } +} + +static int +recv_stats_request(struct datapath *dp, const struct sender *sender, + const void *oh) +{ + const struct ofp_stats_request *rq = oh; + size_t rq_len = ntohs(rq->header.length); + const struct stats_type *st; + struct stats_dump_cb *cb; + int type, body_len; + int err; + + type = ntohs(rq->type); + for (st = stats; ; st++) { + if (st >= &stats[ARRAY_SIZE(stats)]) { + VLOG_WARN_RL(&rl, "received stats request of unknown type %d", + type); + return -EINVAL; + } else if (type == st->type) { + break; + } + } + + cb = xmalloc(sizeof *cb); + cb->done = false; + cb->rq = xmemdup(rq, rq_len); + cb->sender = *sender; + cb->s = st; + cb->state = NULL; + + body_len = rq_len - offsetof(struct ofp_stats_request, body); + if (body_len < cb->s->min_body || body_len > cb->s->max_body) { + VLOG_WARN_RL(&rl, "stats request type %d with bad body length %d", + type, body_len); + err = -EINVAL; + goto error; + } + + if (cb->s->init) { + err = cb->s->init(dp, rq->body, body_len, &cb->state); + if (err) { + VLOG_WARN_RL(&rl, + "failed initialization of stats request type %d: %s", + type, strerror(-err)); + goto error; + } + } + + remote_start_dump(sender->remote, stats_dump, stats_done, cb); + return 0; + +error: + free(cb->rq); + free(cb); + return err; +} + +static int +recv_echo_request(struct datapath *dp, const struct sender *sender, + const void *oh) +{ + return send_openflow_buffer(dp, make_echo_reply(oh), sender); +} + +static int +recv_echo_reply(struct datapath *dp UNUSED, const struct sender *sender UNUSED, + const void *oh UNUSED) +{ + return 0; +} + +/* 'msg', which is 'length' bytes long, was received from the control path. + * Apply it to 'chain'. */ +int +fwd_control_input(struct datapath *dp, const struct sender *sender, + const void *msg, size_t length) +{ + int (*handler)(struct datapath *, const struct sender *, const void *); + struct ofp_header *oh; + size_t min_size; + + /* Check encapsulated length. */ + oh = (struct ofp_header *) msg; + if (ntohs(oh->length) > length) { + return -EINVAL; + } + assert(oh->version == OFP_VERSION); + + /* Figure out how to handle it. */ + switch (oh->type) { + case OFPT_FEATURES_REQUEST: + min_size = sizeof(struct ofp_header); + handler = recv_features_request; + break; + case OFPT_GET_CONFIG_REQUEST: + min_size = sizeof(struct ofp_header); + handler = recv_get_config_request; + break; + case OFPT_SET_CONFIG: + min_size = sizeof(struct ofp_switch_config); + handler = recv_set_config; + break; + case OFPT_PACKET_OUT: + min_size = sizeof(struct ofp_packet_out); + handler = recv_packet_out; + break; + case OFPT_FLOW_MOD: + min_size = sizeof(struct ofp_flow_mod); + handler = recv_flow; + break; + case OFPT_PORT_MOD: + min_size = sizeof(struct ofp_port_mod); + handler = recv_port_mod; + break; + case OFPT_STATS_REQUEST: + min_size = sizeof(struct ofp_stats_request); + handler = recv_stats_request; + break; + case OFPT_ECHO_REQUEST: + min_size = sizeof(struct ofp_header); + handler = recv_echo_request; + break; + case OFPT_ECHO_REPLY: + min_size = sizeof(struct ofp_header); + handler = recv_echo_reply; + break; + default: + dp_send_error_msg(dp, sender, OFPET_BAD_REQUEST, OFPBRC_BAD_TYPE, + msg, length); + return -EINVAL; + } + + /* Handle it. */ + if (length < min_size) + return -EFAULT; + return handler(dp, sender, msg); +} + +/* Packet buffering. */ + +#define OVERWRITE_SECS 1 + +struct packet_buffer { + struct ofpbuf *buffer; + uint32_t cookie; + time_t timeout; +}; + +static struct packet_buffer buffers[N_PKT_BUFFERS]; +static unsigned int buffer_idx; + +uint32_t save_buffer(struct ofpbuf *buffer) +{ + struct packet_buffer *p; + uint32_t id; + + buffer_idx = (buffer_idx + 1) & PKT_BUFFER_MASK; + p = &buffers[buffer_idx]; + if (p->buffer) { + /* Don't buffer packet if existing entry is less than + * OVERWRITE_SECS old. */ + if (time_now() < p->timeout) { /* FIXME */ + return -1; + } else { + ofpbuf_delete(p->buffer); + } + } + /* Don't use maximum cookie value since the all-bits-1 id is + * special. */ + if (++p->cookie >= (1u << PKT_COOKIE_BITS) - 1) + p->cookie = 0; + p->buffer = ofpbuf_clone(buffer); /* FIXME */ + p->timeout = time_now() + OVERWRITE_SECS; /* FIXME */ + id = buffer_idx | (p->cookie << PKT_BUFFER_BITS); + + return id; +} + +static struct ofpbuf *retrieve_buffer(uint32_t id) +{ + struct ofpbuf *buffer = NULL; + struct packet_buffer *p; + + p = &buffers[id & PKT_BUFFER_MASK]; + if (p->cookie == id >> PKT_BUFFER_BITS) { + buffer = p->buffer; + p->buffer = NULL; + } else { + printf("cookie mismatch: %x != %x\n", + id >> PKT_BUFFER_BITS, p->cookie); + } + + return buffer; +} + +static void discard_buffer(uint32_t id) +{ + struct packet_buffer *p; + + p = &buffers[id & PKT_BUFFER_MASK]; + if (p->cookie == id >> PKT_BUFFER_BITS) { + ofpbuf_delete(p->buffer); + p->buffer = NULL; + } +} diff --git a/udatapath/datapath.h b/udatapath/datapath.h new file mode 100644 index 000000000..a8cf27f21 --- /dev/null +++ b/udatapath/datapath.h @@ -0,0 +1,58 @@ +/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford + * Junior University + * + * We are making the OpenFlow specification and associated documentation + * (Software) available for public use and benefit with the expectation + * that others will use, modify and enhance the Software and contribute + * those enhancements back to the community. However, since we would + * like to make the Software available for broadest use, with as few + * restrictions as possible permission is hereby granted, free of + * charge, to any person obtaining a copy of this Software to deal in + * the Software under the copyrights without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * The name and trademarks of copyright holder(s) may NOT be used in + * advertising or publicity pertaining to the Software or any + * derivatives without specific, written prior permission. + */ + +/* Interface exported by OpenFlow module. */ + +#ifndef DATAPATH_H +#define DATAPATH_H 1 + +#include +#include +#include "ofpbuf.h" + +struct datapath; +struct rconn; +struct pvconn; + +int dp_new(struct datapath **, uint64_t dpid); +int dp_add_port(struct datapath *, const char *netdev); +int dp_add_local_port(struct datapath *, const char *netdev); +void dp_add_pvconn(struct datapath *, struct pvconn *); +void dp_run(struct datapath *); +void dp_wait(struct datapath *); +void dp_output_port(struct datapath *, struct ofpbuf *, int in_port, + int out_port, bool ignore_no_fwd); +void dp_output_control(struct datapath *, struct ofpbuf *, int in_port, + size_t max_len, int reason); + +#endif /* datapath.h */ diff --git a/udatapath/dp_act.c b/udatapath/dp_act.c new file mode 100644 index 000000000..3322d9fea --- /dev/null +++ b/udatapath/dp_act.c @@ -0,0 +1,476 @@ +/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford + * Junior University + * + * We are making the OpenFlow specification and associated documentation + * (Software) available for public use and benefit with the expectation + * that others will use, modify and enhance the Software and contribute + * those enhancements back to the community. However, since we would + * like to make the Software available for broadest use, with as few + * restrictions as possible permission is hereby granted, free of + * charge, to any person obtaining a copy of this Software to deal in + * the Software under the copyrights without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * The name and trademarks of copyright holder(s) may NOT be used in + * advertising or publicity pertaining to the Software or any + * derivatives without specific, written prior permission. + */ + +/* Functions for executing OpenFlow actions. */ + +#include +#include "csum.h" +#include "packets.h" +#include "dp_act.h" +#include "openflow/nicira-ext.h" +#include "nx_act.h" + + +static uint16_t +validate_output(struct datapath *dp, const struct sw_flow_key *key, + const struct ofp_action_header *ah) +{ + struct ofp_action_output *oa = (struct ofp_action_output *)ah; + + /* To prevent loops, make sure there's no action to send to the + * OFP_TABLE virtual port. + */ + if (oa->port == htons(OFPP_NONE) || oa->port == key->flow.in_port) { + return OFPBAC_BAD_OUT_PORT; + } + return ACT_VALIDATION_OK; +} + +static void +do_output(struct datapath *dp, struct ofpbuf *buffer, int in_port, + size_t max_len, int out_port, bool ignore_no_fwd) +{ + if (out_port != OFPP_CONTROLLER) { + dp_output_port(dp, buffer, in_port, out_port, ignore_no_fwd); + } else { + dp_output_control(dp, buffer, in_port, max_len, OFPR_ACTION); + } +} + +/* Modify vlan tag control information (TCI). Only sets the TCI bits + * indicated by 'mask'. If no vlan tag is present, one is added. + */ +static void +modify_vlan_tci(struct ofpbuf *buffer, struct sw_flow_key *key, + uint16_t tci, uint16_t mask) +{ + struct vlan_eth_header *veh; + + if (key->flow.dl_vlan != htons(OFP_VLAN_NONE)) { + /* Modify vlan id, but maintain other TCI values */ + veh = buffer->l2; + veh->veth_tci &= ~htons(mask); + veh->veth_tci |= htons(tci); + } else { + /* Insert new vlan id. */ + struct eth_header *eh = buffer->l2; + struct vlan_eth_header tmp; + memcpy(tmp.veth_dst, eh->eth_dst, ETH_ADDR_LEN); + memcpy(tmp.veth_src, eh->eth_src, ETH_ADDR_LEN); + tmp.veth_type = htons(ETH_TYPE_VLAN); + tmp.veth_tci = htons(tci); + tmp.veth_next_type = eh->eth_type; + + veh = ofpbuf_push_uninit(buffer, VLAN_HEADER_LEN); + memcpy(veh, &tmp, sizeof tmp); + buffer->l2 = (char*)buffer->l2 - VLAN_HEADER_LEN; + } + + key->flow.dl_vlan = veh->veth_tci & htons(VLAN_VID_MASK); +} + + +/* Remove an existing vlan header if it exists. */ +static void +vlan_pull_tag(struct ofpbuf *buffer) +{ + struct vlan_eth_header *veh = buffer->l2; + + if (veh->veth_type == htons(ETH_TYPE_VLAN)) { + struct eth_header tmp; + + memcpy(tmp.eth_dst, veh->veth_dst, ETH_ADDR_LEN); + memcpy(tmp.eth_src, veh->veth_src, ETH_ADDR_LEN); + tmp.eth_type = veh->veth_next_type; + + buffer->size -= VLAN_HEADER_LEN; + buffer->data = (char*)buffer->data + VLAN_HEADER_LEN; + buffer->l2 = (char*)buffer->l2 + VLAN_HEADER_LEN; + memcpy(buffer->data, &tmp, sizeof tmp); + } +} + +static void +set_vlan_vid(struct ofpbuf *buffer, struct sw_flow_key *key, + const struct ofp_action_header *ah) +{ + struct ofp_action_vlan_vid *va = (struct ofp_action_vlan_vid *)ah; + uint16_t tci = ntohs(va->vlan_vid); + + modify_vlan_tci(buffer, key, tci, VLAN_VID_MASK); +} + +static void +set_vlan_pcp(struct ofpbuf *buffer, struct sw_flow_key *key, + const struct ofp_action_header *ah) +{ + struct ofp_action_vlan_pcp *va = (struct ofp_action_vlan_pcp *)ah; + uint16_t tci = (uint16_t)va->vlan_pcp << 13; + + modify_vlan_tci(buffer, key, tci, VLAN_PCP_MASK); +} + +static void +strip_vlan(struct ofpbuf *buffer, struct sw_flow_key *key, + const struct ofp_action_header *ah) +{ + vlan_pull_tag(buffer); + key->flow.dl_vlan = htons(OFP_VLAN_NONE); +} + +static void +set_dl_addr(struct ofpbuf *buffer, struct sw_flow_key *key, + const struct ofp_action_header *ah) +{ + struct ofp_action_dl_addr *da = (struct ofp_action_dl_addr *)ah; + struct eth_header *eh = buffer->l2; + + if (da->type == htons(OFPAT_SET_DL_SRC)) { + memcpy(eh->eth_src, da->dl_addr, sizeof eh->eth_src); + } else { + memcpy(eh->eth_dst, da->dl_addr, sizeof eh->eth_dst); + } +} + +static void +set_nw_addr(struct ofpbuf *buffer, struct sw_flow_key *key, + const struct ofp_action_header *ah) +{ + struct ofp_action_nw_addr *na = (struct ofp_action_nw_addr *)ah; + uint16_t eth_proto = ntohs(key->flow.dl_type); + + if (eth_proto == ETH_TYPE_IP) { + struct ip_header *nh = buffer->l3; + uint8_t nw_proto = key->flow.nw_proto; + uint32_t new, *field; + + new = na->nw_addr; + field = na->type == OFPAT_SET_NW_SRC ? &nh->ip_src : &nh->ip_dst; + if (nw_proto == IP_TYPE_TCP) { + struct tcp_header *th = buffer->l4; + th->tcp_csum = recalc_csum32(th->tcp_csum, *field, new); + } else if (nw_proto == IP_TYPE_UDP) { + struct udp_header *th = buffer->l4; + if (th->udp_csum) { + th->udp_csum = recalc_csum32(th->udp_csum, *field, new); + if (!th->udp_csum) { + th->udp_csum = 0xffff; + } + } + } + nh->ip_csum = recalc_csum32(nh->ip_csum, *field, new); + *field = new; + } +} + +static void +set_tp_port(struct ofpbuf *buffer, struct sw_flow_key *key, + const struct ofp_action_header *ah) +{ + struct ofp_action_tp_port *ta = (struct ofp_action_tp_port *)ah; + uint16_t eth_proto = ntohs(key->flow.dl_type); + + if (eth_proto == ETH_TYPE_IP) { + uint8_t nw_proto = key->flow.nw_proto; + uint16_t new, *field; + + new = ta->tp_port; + if (nw_proto == IP_TYPE_TCP) { + struct tcp_header *th = buffer->l4; + field = ta->type == OFPAT_SET_TP_SRC ? &th->tcp_src : &th->tcp_dst; + th->tcp_csum = recalc_csum16(th->tcp_csum, *field, new); + *field = new; + } else if (nw_proto == IP_TYPE_UDP) { + struct udp_header *th = buffer->l4; + field = ta->type == OFPAT_SET_TP_SRC ? &th->udp_src : &th->udp_dst; + th->udp_csum = recalc_csum16(th->udp_csum, *field, new); + *field = new; + } + } +} + +struct openflow_action { + size_t min_size; + size_t max_size; + uint16_t (*validate)(struct datapath *dp, + const struct sw_flow_key *key, + const struct ofp_action_header *ah); + void (*execute)(struct ofpbuf *buffer, + struct sw_flow_key *key, + const struct ofp_action_header *ah); +}; + +static const struct openflow_action of_actions[] = { + [OFPAT_OUTPUT] = { + sizeof(struct ofp_action_output), + sizeof(struct ofp_action_output), + validate_output, + NULL /* This is optimized into execute_actions */ + }, + [OFPAT_SET_VLAN_VID] = { + sizeof(struct ofp_action_vlan_vid), + sizeof(struct ofp_action_vlan_vid), + NULL, + set_vlan_vid + }, + [OFPAT_SET_VLAN_PCP] = { + sizeof(struct ofp_action_vlan_pcp), + sizeof(struct ofp_action_vlan_pcp), + NULL, + set_vlan_pcp + }, + [OFPAT_STRIP_VLAN] = { + sizeof(struct ofp_action_header), + sizeof(struct ofp_action_header), + NULL, + strip_vlan + }, + [OFPAT_SET_DL_SRC] = { + sizeof(struct ofp_action_dl_addr), + sizeof(struct ofp_action_dl_addr), + NULL, + set_dl_addr + }, + [OFPAT_SET_DL_DST] = { + sizeof(struct ofp_action_dl_addr), + sizeof(struct ofp_action_dl_addr), + NULL, + set_dl_addr + }, + [OFPAT_SET_NW_SRC] = { + sizeof(struct ofp_action_nw_addr), + sizeof(struct ofp_action_nw_addr), + NULL, + set_nw_addr + }, + [OFPAT_SET_NW_DST] = { + sizeof(struct ofp_action_nw_addr), + sizeof(struct ofp_action_nw_addr), + NULL, + set_nw_addr + }, + [OFPAT_SET_TP_SRC] = { + sizeof(struct ofp_action_tp_port), + sizeof(struct ofp_action_tp_port), + NULL, + set_tp_port + }, + [OFPAT_SET_TP_DST] = { + sizeof(struct ofp_action_tp_port), + sizeof(struct ofp_action_tp_port), + NULL, + set_tp_port + } + /* OFPAT_VENDOR is not here, since it would blow up the array size. */ +}; + +/* Validate built-in OpenFlow actions. Either returns ACT_VALIDATION_OK + * or an OFPET_BAD_ACTION error code. */ +static uint16_t +validate_ofpat(struct datapath *dp, const struct sw_flow_key *key, + const struct ofp_action_header *ah, uint16_t type, uint16_t len) +{ + int ret = ACT_VALIDATION_OK; + const struct openflow_action *act = &of_actions[type]; + + if ((len < act->min_size) || (len > act->max_size)) { + return OFPBAC_BAD_LEN; + } + + if (act->validate) { + ret = act->validate(dp, key, ah); + } + + return ret; +} + +/* Validate vendor-defined actions. Either returns ACT_VALIDATION_OK + * or an OFPET_BAD_ACTION error code. */ +static uint16_t +validate_vendor(struct datapath *dp, const struct sw_flow_key *key, + const struct ofp_action_header *ah, uint16_t len) +{ + struct ofp_action_vendor_header *avh; + int ret = ACT_VALIDATION_OK; + + if (len < sizeof(struct ofp_action_vendor_header)) { + return OFPBAC_BAD_LEN; + } + + avh = (struct ofp_action_vendor_header *)ah; + + switch(ntohl(avh->vendor)) { + case NX_VENDOR_ID: + ret = nx_validate_act(dp, key, avh, len); + break; + + default: + return OFPBAC_BAD_VENDOR; + } + + return ret; +} + +/* Validates a list of actions. If a problem is found, a code for the + * OFPET_BAD_ACTION error type is returned. If the action list validates, + * ACT_VALIDATION_OK is returned. */ +uint16_t +validate_actions(struct datapath *dp, const struct sw_flow_key *key, + const struct ofp_action_header *actions, size_t actions_len) +{ + uint8_t *p = (uint8_t *)actions; + int err; + + while (actions_len >= sizeof(struct ofp_action_header)) { + struct ofp_action_header *ah = (struct ofp_action_header *)p; + size_t len = ntohs(ah->len); + uint16_t type; + + /* Make there's enough remaining data for the specified length + * and that the action length is a multiple of 64 bits. */ + if ((actions_len < len) || (len % 8) != 0) { + return OFPBAC_BAD_LEN; + } + + type = ntohs(ah->type); + if (type < ARRAY_SIZE(of_actions)) { + err = validate_ofpat(dp, key, ah, type, len); + if (err != ACT_VALIDATION_OK) { + return err; + } + } else if (type == OFPAT_VENDOR) { + err = validate_vendor(dp, key, ah, len); + if (err != ACT_VALIDATION_OK) { + return err; + } + } else { + return OFPBAC_BAD_TYPE; + } + + p += len; + actions_len -= len; + } + + /* Check if there's any trailing garbage. */ + if (actions_len != 0) { + return OFPBAC_BAD_LEN; + } + + return ACT_VALIDATION_OK; +} + +/* Execute a built-in OpenFlow action against 'buffer'. */ +static void +execute_ofpat(struct ofpbuf *buffer, struct sw_flow_key *key, + const struct ofp_action_header *ah, uint16_t type) +{ + const struct openflow_action *act = &of_actions[type]; + + if (act->execute) { + act->execute(buffer, key, ah); + } +} + +/* Execute a vendor-defined action against 'buffer'. */ +static void +execute_vendor(struct ofpbuf *buffer, const struct sw_flow_key *key, + const struct ofp_action_header *ah) +{ + struct ofp_action_vendor_header *avh + = (struct ofp_action_vendor_header *)ah; + + switch(ntohl(avh->vendor)) { + case NX_VENDOR_ID: + nx_execute_act(buffer, key, avh); + break; + + default: + /* This should not be possible due to prior validation. */ + printf("attempt to execute action with unknown vendor: %#x\n", + ntohl(avh->vendor)); + break; + } +} + +/* Execute a list of actions against 'buffer'. */ +void execute_actions(struct datapath *dp, struct ofpbuf *buffer, + struct sw_flow_key *key, + const struct ofp_action_header *actions, size_t actions_len, + int ignore_no_fwd) +{ + /* Every output action needs a separate clone of 'buffer', but the common + * case is just a single output action, so that doing a clone and then + * freeing the original buffer is wasteful. So the following code is + * slightly obscure just to avoid that. */ + int prev_port; + size_t max_len=0; /* Initialze to make compiler happy */ + uint16_t in_port = ntohs(key->flow.in_port); + uint8_t *p = (uint8_t *)actions; + + prev_port = -1; + + /* The action list was already validated, so we can be a bit looser + * in our sanity-checking. */ + while (actions_len > 0) { + struct ofp_action_header *ah = (struct ofp_action_header *)p; + size_t len = htons(ah->len); + + if (prev_port != -1) { + do_output(dp, ofpbuf_clone(buffer), in_port, max_len, + prev_port, ignore_no_fwd); + prev_port = -1; + } + + if (ah->type == htons(OFPAT_OUTPUT)) { + struct ofp_action_output *oa = (struct ofp_action_output *)p; + prev_port = ntohs(oa->port); + max_len = ntohs(oa->max_len); + } else { + uint16_t type = ntohs(ah->type); + + if (type < ARRAY_SIZE(of_actions)) { + execute_ofpat(buffer, key, ah, type); + } else if (type == OFPAT_VENDOR) { + execute_vendor(buffer, key, ah); + } + } + + p += len; + actions_len -= len; + } + if (prev_port != -1) { + do_output(dp, buffer, in_port, max_len, prev_port, ignore_no_fwd); + } else { + ofpbuf_delete(buffer); + } +} diff --git a/udatapath/dp_act.h b/udatapath/dp_act.h new file mode 100644 index 000000000..e0181fadc --- /dev/null +++ b/udatapath/dp_act.h @@ -0,0 +1,49 @@ +/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford + * Junior University + * + * We are making the OpenFlow specification and associated documentation + * (Software) available for public use and benefit with the expectation + * that others will use, modify and enhance the Software and contribute + * those enhancements back to the community. However, since we would + * like to make the Software available for broadest use, with as few + * restrictions as possible permission is hereby granted, free of + * charge, to any person obtaining a copy of this Software to deal in + * the Software under the copyrights without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * The name and trademarks of copyright holder(s) may NOT be used in + * advertising or publicity pertaining to the Software or any + * derivatives without specific, written prior permission. + */ + +#ifndef DP_ACT_H +#define DP_ACT_H 1 + +#include "openflow/openflow.h" +#include "switch-flow.h" +#include "datapath.h" + +#define ACT_VALIDATION_OK ((uint16_t)-1) + +uint16_t validate_actions(struct datapath *, const struct sw_flow_key *, + const struct ofp_action_header *, size_t); +void execute_actions(struct datapath *, struct ofpbuf *, + struct sw_flow_key *, const struct ofp_action_header *, + size_t action_len, int ignore_no_fwd); + +#endif /* dp_act.h */ diff --git a/udatapath/nx_act.c b/udatapath/nx_act.c new file mode 100644 index 000000000..e2a6d4f62 --- /dev/null +++ b/udatapath/nx_act.c @@ -0,0 +1,52 @@ +/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford + * Junior University + * + * We are making the OpenFlow specification and associated documentation + * (Software) available for public use and benefit with the expectation + * that others will use, modify and enhance the Software and contribute + * those enhancements back to the community. However, since we would + * like to make the Software available for broadest use, with as few + * restrictions as possible permission is hereby granted, free of + * charge, to any person obtaining a copy of this Software to deal in + * the Software under the copyrights without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * The name and trademarks of copyright holder(s) may NOT be used in + * advertising or publicity pertaining to the Software or any + * derivatives without specific, written prior permission. + */ + +/* Functions for Nicira-extended actions. */ +#include "openflow/nicira-ext.h" +#include "nx_act.h" + +uint16_t +nx_validate_act(struct datapath *dp, const struct sw_flow_key *key, + const struct ofp_action_vendor_header *avh, uint16_t len) +{ + /* Nothing to validate yet */ + return OFPBAC_BAD_VENDOR_TYPE; +} + +void +nx_execute_act(struct ofpbuf *buffer, const struct sw_flow_key *key, + const struct ofp_action_vendor_header *avh) +{ + /* Nothing to execute yet */ +} + diff --git a/udatapath/nx_act.h b/udatapath/nx_act.h new file mode 100644 index 000000000..92d106544 --- /dev/null +++ b/udatapath/nx_act.h @@ -0,0 +1,48 @@ +/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford + * Junior University + * + * We are making the OpenFlow specification and associated documentation + * (Software) available for public use and benefit with the expectation + * that others will use, modify and enhance the Software and contribute + * those enhancements back to the community. However, since we would + * like to make the Software available for broadest use, with as few + * restrictions as possible permission is hereby granted, free of + * charge, to any person obtaining a copy of this Software to deal in + * the Software under the copyrights without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * The name and trademarks of copyright holder(s) may NOT be used in + * advertising or publicity pertaining to the Software or any + * derivatives without specific, written prior permission. + */ + +#ifndef NX_ACT_H +#define NX_ACT_H 1 + +#include "switch-flow.h" +#include "datapath.h" + + +uint16_t nx_validate_act(struct datapath *dp, const struct sw_flow_key *key, + const struct ofp_action_vendor_header *avh, uint16_t len); + +void nx_execute_act(struct ofpbuf *buffer, + const struct sw_flow_key *key, + const struct ofp_action_vendor_header *avh); + +#endif /* nx_act.h */ diff --git a/udatapath/switch-flow.c b/udatapath/switch-flow.c new file mode 100644 index 000000000..82eee55de --- /dev/null +++ b/udatapath/switch-flow.c @@ -0,0 +1,298 @@ +/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford + * Junior University + * + * We are making the OpenFlow specification and associated documentation + * (Software) available for public use and benefit with the expectation + * that others will use, modify and enhance the Software and contribute + * those enhancements back to the community. However, since we would + * like to make the Software available for broadest use, with as few + * restrictions as possible permission is hereby granted, free of + * charge, to any person obtaining a copy of this Software to deal in + * the Software under the copyrights without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * The name and trademarks of copyright holder(s) may NOT be used in + * advertising or publicity pertaining to the Software or any + * derivatives without specific, written prior permission. + */ + +#include +#include "switch-flow.h" +#include +#include +#include +#include +#include "ofpbuf.h" +#include "openflow/openflow.h" +#include "packets.h" +#include "timeval.h" + +/* Internal function used to compare fields in flow. */ +static inline int +flow_fields_match(const struct flow *a, const struct flow *b, uint16_t w, + uint32_t src_mask, uint32_t dst_mask) +{ + return ((w & OFPFW_IN_PORT || a->in_port == b->in_port) + && (w & OFPFW_DL_VLAN || a->dl_vlan == b->dl_vlan) + && (w & OFPFW_DL_SRC || eth_addr_equals(a->dl_src, b->dl_src)) + && (w & OFPFW_DL_DST || eth_addr_equals(a->dl_dst, b->dl_dst)) + && (w & OFPFW_DL_TYPE || a->dl_type == b->dl_type) + && !((a->nw_src ^ b->nw_src) & src_mask) + && !((a->nw_dst ^ b->nw_dst) & dst_mask) + && (w & OFPFW_NW_PROTO || a->nw_proto == b->nw_proto) + && (w & OFPFW_TP_SRC || a->tp_src == b->tp_src) + && (w & OFPFW_TP_DST || a->tp_dst == b->tp_dst)); +} + +static uint32_t make_nw_mask(int n_wild_bits) +{ + n_wild_bits &= (1u << OFPFW_NW_SRC_BITS) - 1; + return n_wild_bits < 32 ? htonl(~((1u << n_wild_bits) - 1)) : 0; +} + +/* Returns nonzero if 'a' and 'b' match, that is, if their fields are equal + * modulo wildcards in 'b', zero otherwise. */ +inline int +flow_matches_1wild(const struct sw_flow_key *a, const struct sw_flow_key *b) +{ + return flow_fields_match(&a->flow, &b->flow, b->wildcards, + b->nw_src_mask, b->nw_dst_mask); +} + +/* Returns nonzero if 'a' and 'b' match, that is, if their fields are equal + * modulo wildcards in 'a' or 'b', zero otherwise. */ +inline int +flow_matches_2wild(const struct sw_flow_key *a, const struct sw_flow_key *b) +{ + return flow_fields_match(&a->flow, &b->flow, a->wildcards | b->wildcards, + a->nw_src_mask & b->nw_src_mask, + a->nw_dst_mask & b->nw_dst_mask); +} + +/* Returns nonzero if 't' (the table entry's key) and 'd' (the key + * describing the match) match, that is, if their fields are + * equal modulo wildcards, zero otherwise. If 'strict' is nonzero, the + * wildcards must match in both 't_key' and 'd_key'. Note that the + * table's wildcards are ignored unless 'strict' is set. */ +int +flow_matches_desc(const struct sw_flow_key *t, const struct sw_flow_key *d, + int strict) +{ + if (strict && d->wildcards != t->wildcards) { + return 0; + } + return flow_matches_1wild(t, d); +} + +void +flow_extract_match(struct sw_flow_key* to, const struct ofp_match* from) +{ + to->wildcards = ntohl(from->wildcards) & OFPFW_ALL; + to->flow.reserved = 0; + to->flow.in_port = from->in_port; + to->flow.dl_vlan = from->dl_vlan; + memcpy(to->flow.dl_src, from->dl_src, ETH_ADDR_LEN); + memcpy(to->flow.dl_dst, from->dl_dst, ETH_ADDR_LEN); + to->flow.dl_type = from->dl_type; + + to->flow.nw_src = to->flow.nw_dst = to->flow.nw_proto = 0; + to->flow.tp_src = to->flow.tp_dst = 0; + +#define OFPFW_TP (OFPFW_TP_SRC | OFPFW_TP_DST) +#define OFPFW_NW (OFPFW_NW_SRC_MASK | OFPFW_NW_DST_MASK | OFPFW_NW_PROTO) + if (to->wildcards & OFPFW_DL_TYPE) { + /* Can't sensibly match on network or transport headers if the + * data link type is unknown. */ + to->wildcards |= OFPFW_NW | OFPFW_TP; + } else if (from->dl_type == htons(ETH_TYPE_IP)) { + to->flow.nw_src = from->nw_src; + to->flow.nw_dst = from->nw_dst; + to->flow.nw_proto = from->nw_proto; + + if (to->wildcards & OFPFW_NW_PROTO) { + /* Can't sensibly match on transport headers if the network + * protocol is unknown. */ + to->wildcards |= OFPFW_TP; + } else if (from->nw_proto == IPPROTO_TCP + || from->nw_proto == IPPROTO_UDP + || from->nw_proto == IPPROTO_ICMP) { + to->flow.tp_src = from->tp_src; + to->flow.tp_dst = from->tp_dst; + } else { + /* Transport layer fields are undefined. Mark them as + * exact-match to allow such flows to reside in table-hash, + * instead of falling into table-linear. */ + to->wildcards &= ~OFPFW_TP; + } + } else { + /* Network and transport layer fields are undefined. Mark them + * as exact-match to allow such flows to reside in table-hash, + * instead of falling into table-linear. */ + to->wildcards &= ~(OFPFW_NW | OFPFW_TP); + } + + /* We set these late because code above adjusts to->wildcards. */ + to->nw_src_mask = make_nw_mask(to->wildcards >> OFPFW_NW_SRC_SHIFT); + to->nw_dst_mask = make_nw_mask(to->wildcards >> OFPFW_NW_DST_SHIFT); +} + +void +flow_fill_match(struct ofp_match* to, const struct sw_flow_key* from) +{ + to->wildcards = htonl(from->wildcards); + to->in_port = from->flow.in_port; + to->dl_vlan = from->flow.dl_vlan; + memcpy(to->dl_src, from->flow.dl_src, ETH_ADDR_LEN); + memcpy(to->dl_dst, from->flow.dl_dst, ETH_ADDR_LEN); + to->dl_type = from->flow.dl_type; + to->nw_src = from->flow.nw_src; + to->nw_dst = from->flow.nw_dst; + to->nw_proto = from->flow.nw_proto; + to->tp_src = from->flow.tp_src; + to->tp_dst = from->flow.tp_dst; + to->pad = 0; +} + +/* Allocates and returns a new flow with room for 'actions_len' actions. + * Returns the new flow or a null pointer on failure. */ +struct sw_flow * +flow_alloc(size_t actions_len) +{ + struct sw_flow_actions *sfa; + size_t size = sizeof *sfa + actions_len; + struct sw_flow *flow = malloc(sizeof *flow); + if (!flow) + return NULL; + + sfa = malloc(size); + if (!sfa) { + free(flow); + return NULL; + } + sfa->actions_len = actions_len; + flow->sf_acts = sfa; + return flow; +} + +/* Frees 'flow' immediately. */ +void +flow_free(struct sw_flow *flow) +{ + if (!flow) { + return; + } + free(flow->sf_acts); + free(flow); +} + +/* Copies 'actions' into a newly allocated structure for use by 'flow' + * and frees the structure that defined the previous actions. */ +void flow_replace_acts(struct sw_flow *flow, + const struct ofp_action_header *actions, size_t actions_len) +{ + struct sw_flow_actions *sfa; + int size = sizeof *sfa + actions_len; + + sfa = malloc(size); + if (unlikely(!sfa)) + return; + + sfa->actions_len = actions_len; + memcpy(sfa->actions, actions, actions_len); + + free(flow->sf_acts); + flow->sf_acts = sfa; + + return; +} + +/* Prints a representation of 'key' to the kernel log. */ +void +print_flow(const struct sw_flow_key *key) +{ + const struct flow *f = &key->flow; + printf("wild%08x port%04x:vlan%04x mac%02x:%02x:%02x:%02x:%02x:%02x" + "->%02x:%02x:%02x:%02x:%02x:%02x " + "proto%04x ip%u.%u.%u.%u->%u.%u.%u.%u port%d->%d\n", + key->wildcards, ntohs(f->in_port), ntohs(f->dl_vlan), + f->dl_src[0], f->dl_src[1], f->dl_src[2], + f->dl_src[3], f->dl_src[4], f->dl_src[5], + f->dl_dst[0], f->dl_dst[1], f->dl_dst[2], + f->dl_dst[3], f->dl_dst[4], f->dl_dst[5], + ntohs(f->dl_type), + ((unsigned char *)&f->nw_src)[0], + ((unsigned char *)&f->nw_src)[1], + ((unsigned char *)&f->nw_src)[2], + ((unsigned char *)&f->nw_src)[3], + ((unsigned char *)&f->nw_dst)[0], + ((unsigned char *)&f->nw_dst)[1], + ((unsigned char *)&f->nw_dst)[2], + ((unsigned char *)&f->nw_dst)[3], + ntohs(f->tp_src), ntohs(f->tp_dst)); +} + +bool flow_timeout(struct sw_flow *flow) +{ + time_t now = time_now(); + if (flow->idle_timeout != OFP_FLOW_PERMANENT + && now > flow->used + flow->idle_timeout) { + flow->reason = OFPER_IDLE_TIMEOUT; + return true; + } else if (flow->hard_timeout != OFP_FLOW_PERMANENT + && now > flow->created + flow->hard_timeout) { + flow->reason = OFPER_HARD_TIMEOUT; + return true; + } else { + return false; + } +} + +/* Returns nonzero if 'flow' contains an output action to 'out_port' or + * has the value OFPP_NONE. 'out_port' is in network-byte order. */ +int flow_has_out_port(struct sw_flow *flow, uint16_t out_port) +{ + struct sw_flow_actions *sf_acts = flow->sf_acts; + size_t actions_len = sf_acts->actions_len; + uint8_t *p = (uint8_t *)sf_acts->actions; + + if (out_port == htons(OFPP_NONE)) + return 1; + + while (actions_len > 0) { + struct ofp_action_header *ah = (struct ofp_action_header *)p; + size_t len = ntohs(ah->len); + + if (ah->type == htons(OFPAT_OUTPUT)) { + struct ofp_action_output *oa = (struct ofp_action_output *)p; + if (oa->port == out_port) { + return 1; + } + } + p += len; + actions_len -= len; + } + + return 0; +} + +void flow_used(struct sw_flow *flow, struct ofpbuf *buffer) +{ + flow->used = time_now(); + flow->packet_count++; + flow->byte_count += buffer->size; +} diff --git a/udatapath/switch-flow.h b/udatapath/switch-flow.h new file mode 100644 index 000000000..ef0497c89 --- /dev/null +++ b/udatapath/switch-flow.h @@ -0,0 +1,95 @@ +/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford + * Junior University + * + * We are making the OpenFlow specification and associated documentation + * (Software) available for public use and benefit with the expectation + * that others will use, modify and enhance the Software and contribute + * those enhancements back to the community. However, since we would + * like to make the Software available for broadest use, with as few + * restrictions as possible permission is hereby granted, free of + * charge, to any person obtaining a copy of this Software to deal in + * the Software under the copyrights without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * The name and trademarks of copyright holder(s) may NOT be used in + * advertising or publicity pertaining to the Software or any + * derivatives without specific, written prior permission. + */ + +#ifndef SWITCH_FLOW_H +#define SWITCH_FLOW_H 1 + +#include +#include "openflow/openflow.h" +#include "flow.h" +#include "list.h" + +struct ofp_match; + +/* Identification data for a flow. */ +struct sw_flow_key { + struct flow flow; /* Flow data (in network byte order). */ + uint32_t wildcards; /* Wildcard fields (in host byte order). */ + uint32_t nw_src_mask; /* 1-bit in each significant nw_src bit. */ + uint32_t nw_dst_mask; /* 1-bit in each significant nw_dst bit. */ +}; + +struct sw_flow_actions { + size_t actions_len; + struct ofp_action_header actions[0]; +}; + +struct sw_flow { + struct sw_flow_key key; + + uint16_t priority; /* Only used on entries with wildcards. */ + uint16_t idle_timeout; /* Idle time before discarding (seconds). */ + uint16_t hard_timeout; /* Hard expiration time (seconds) */ + time_t used; /* Last used time. */ + time_t created; /* When the flow was created. */ + uint64_t packet_count; /* Number of packets seen. */ + uint64_t byte_count; /* Number of bytes seen. */ + uint8_t reason; /* Reason flow expired (one of OFPER_*). */ + + struct sw_flow_actions *sf_acts; + + /* Private to table implementations. */ + struct list node; + struct list iter_node; + unsigned long int serial; +}; + +int flow_matches_1wild(const struct sw_flow_key *, const struct sw_flow_key *); +int flow_matches_2wild(const struct sw_flow_key *, const struct sw_flow_key *); +int flow_matches_desc(const struct sw_flow_key *, const struct sw_flow_key *, + int); +int flow_has_out_port(struct sw_flow *flow, uint16_t out_port); +struct sw_flow *flow_alloc(size_t); +void flow_free(struct sw_flow *); +void flow_deferred_free(struct sw_flow *); +void flow_deferred_free_acts(struct sw_flow_actions *); +void flow_replace_acts(struct sw_flow *, const struct ofp_action_header *, + size_t); +void flow_extract_match(struct sw_flow_key* to, const struct ofp_match* from); +void flow_fill_match(struct ofp_match* to, const struct sw_flow_key* from); + +void print_flow(const struct sw_flow_key *); +bool flow_timeout(struct sw_flow *flow); +void flow_used(struct sw_flow *flow, struct ofpbuf *buffer); + +#endif /* switch-flow.h */ diff --git a/udatapath/table-hash.c b/udatapath/table-hash.c new file mode 100644 index 000000000..fcdad0967 --- /dev/null +++ b/udatapath/table-hash.c @@ -0,0 +1,426 @@ +/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford + * Junior University + * + * We are making the OpenFlow specification and associated documentation + * (Software) available for public use and benefit with the expectation + * that others will use, modify and enhance the Software and contribute + * those enhancements back to the community. However, since we would + * like to make the Software available for broadest use, with as few + * restrictions as possible permission is hereby granted, free of + * charge, to any person obtaining a copy of this Software to deal in + * the Software under the copyrights without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * The name and trademarks of copyright holder(s) may NOT be used in + * advertising or publicity pertaining to the Software or any + * derivatives without specific, written prior permission. + */ + +#include +#include "table.h" +#include +#include +#include +#include "crc32.h" +#include "datapath.h" +#include "flow.h" +#include "switch-flow.h" + +struct sw_table_hash { + struct sw_table swt; + struct crc32 crc32; + unsigned int n_flows; + unsigned int bucket_mask; /* Number of buckets minus 1. */ + struct sw_flow **buckets; +}; + +static struct sw_flow **find_bucket(struct sw_table *swt, + const struct sw_flow_key *key) +{ + struct sw_table_hash *th = (struct sw_table_hash *) swt; + unsigned int crc = crc32_calculate(&th->crc32, key, + offsetof(struct sw_flow_key, wildcards)); + return &th->buckets[crc & th->bucket_mask]; +} + +static struct sw_flow *table_hash_lookup(struct sw_table *swt, + const struct sw_flow_key *key) +{ + struct sw_flow *flow = *find_bucket(swt, key); + return flow && !flow_compare(&flow->key.flow, &key->flow) ? flow : NULL; +} + +static int table_hash_insert(struct sw_table *swt, struct sw_flow *flow) +{ + struct sw_table_hash *th = (struct sw_table_hash *) swt; + struct sw_flow **bucket; + int retval; + + if (flow->key.wildcards != 0) + return 0; + + bucket = find_bucket(swt, &flow->key); + if (*bucket == NULL) { + th->n_flows++; + *bucket = flow; + retval = 1; + } else { + struct sw_flow *old_flow = *bucket; + if (!flow_compare(&old_flow->key.flow, &flow->key.flow)) { + *bucket = flow; + flow_free(old_flow); + retval = 1; + } else { + retval = 0; + } + } + return retval; +} + +static int table_hash_modify(struct sw_table *swt, + const struct sw_flow_key *key, uint16_t priority, int strict, + const struct ofp_action_header *actions, size_t actions_len) +{ + struct sw_table_hash *th = (struct sw_table_hash *) swt; + unsigned int count = 0; + + if (key->wildcards == 0) { + struct sw_flow **bucket = find_bucket(swt, key); + struct sw_flow *flow = *bucket; + if (flow && flow_matches_desc(&flow->key, key, strict) + && (!strict || (flow->priority == priority))) { + flow_replace_acts(flow, actions, actions_len); + count = 1; + } + } else { + unsigned int i; + + for (i = 0; i <= th->bucket_mask; i++) { + struct sw_flow **bucket = &th->buckets[i]; + struct sw_flow *flow = *bucket; + if (flow && flow_matches_desc(&flow->key, key, strict) + && (!strict || (flow->priority == priority))) { + flow_replace_acts(flow, actions, actions_len); + count++; + } + } + } + return count; +} + +/* Caller must update n_flows. */ +static void +do_delete(struct sw_flow **bucket) +{ + flow_free(*bucket); + *bucket = NULL; +} + +/* Returns number of deleted flows. We can igonre the priority + * argument, since all exact-match entries are the same (highest) + * priority. */ +static int table_hash_delete(struct sw_table *swt, + const struct sw_flow_key *key, + uint16_t out_port, + uint16_t priority, int strict) +{ + struct sw_table_hash *th = (struct sw_table_hash *) swt; + unsigned int count = 0; + + if (key->wildcards == 0) { + struct sw_flow **bucket = find_bucket(swt, key); + struct sw_flow *flow = *bucket; + if (flow && !flow_compare(&flow->key.flow, &key->flow) + && flow_has_out_port(flow, out_port)) { + do_delete(bucket); + count = 1; + } + } else { + unsigned int i; + + for (i = 0; i <= th->bucket_mask; i++) { + struct sw_flow **bucket = &th->buckets[i]; + struct sw_flow *flow = *bucket; + if (flow && flow_matches_desc(&flow->key, key, strict) + && flow_has_out_port(flow, out_port)) { + do_delete(bucket); + count++; + } + } + } + th->n_flows -= count; + return count; +} + +static void table_hash_timeout(struct sw_table *swt, struct list *deleted) +{ + struct sw_table_hash *th = (struct sw_table_hash *) swt; + unsigned int i; + + for (i = 0; i <= th->bucket_mask; i++) { + struct sw_flow **bucket = &th->buckets[i]; + struct sw_flow *flow = *bucket; + if (flow && flow_timeout(flow)) { + list_push_back(deleted, &flow->node); + *bucket = NULL; + th->n_flows--; + } + } +} + +static void table_hash_destroy(struct sw_table *swt) +{ + struct sw_table_hash *th = (struct sw_table_hash *) swt; + unsigned int i; + for (i = 0; i <= th->bucket_mask; i++) { + if (th->buckets[i]) { + flow_free(th->buckets[i]); + } + } + free(th->buckets); + free(th); +} + +static int table_hash_iterate(struct sw_table *swt, + const struct sw_flow_key *key, uint16_t out_port, + struct sw_table_position *position, + int (*callback)(struct sw_flow *, void *private), + void *private) +{ + struct sw_table_hash *th = (struct sw_table_hash *) swt; + + if (position->private[0] > th->bucket_mask) + return 0; + + if (key->wildcards == 0) { + struct sw_flow *flow = table_hash_lookup(swt, key); + position->private[0] = -1; + if (!flow || !flow_has_out_port(flow, out_port)) { + return 0; + } + return callback(flow, private); + } else { + int i; + + for (i = position->private[0]; i <= th->bucket_mask; i++) { + struct sw_flow *flow = th->buckets[i]; + if (flow && flow_matches_1wild(&flow->key, key) + && flow_has_out_port(flow, out_port)) { + int error = callback(flow, private); + if (error) { + position->private[0] = i + 1; + return error; + } + } + } + return 0; + } +} + +static void table_hash_stats(struct sw_table *swt, + struct sw_table_stats *stats) +{ + struct sw_table_hash *th = (struct sw_table_hash *) swt; + stats->name = "hash"; + stats->wildcards = 0; /* No wildcards are supported. */ + stats->n_flows = th->n_flows; + stats->max_flows = th->bucket_mask + 1; + stats->n_lookup = swt->n_lookup; + stats->n_matched = swt->n_matched; +} + +struct sw_table *table_hash_create(unsigned int polynomial, + unsigned int n_buckets) +{ + struct sw_table_hash *th; + struct sw_table *swt; + + th = malloc(sizeof *th); + if (th == NULL) + return NULL; + memset(th, '\0', sizeof *th); + + assert(!(n_buckets & (n_buckets - 1))); + th->buckets = calloc(n_buckets, sizeof *th->buckets); + if (th->buckets == NULL) { + printf("failed to allocate %u buckets\n", n_buckets); + free(th); + return NULL; + } + th->n_flows = 0; + th->bucket_mask = n_buckets - 1; + + swt = &th->swt; + swt->lookup = table_hash_lookup; + swt->insert = table_hash_insert; + swt->modify = table_hash_modify; + swt->delete = table_hash_delete; + swt->timeout = table_hash_timeout; + swt->destroy = table_hash_destroy; + swt->iterate = table_hash_iterate; + swt->stats = table_hash_stats; + + crc32_init(&th->crc32, polynomial); + + return swt; +} + +/* Double-hashing table. */ + +struct sw_table_hash2 { + struct sw_table swt; + struct sw_table *subtable[2]; +}; + +static struct sw_flow *table_hash2_lookup(struct sw_table *swt, + const struct sw_flow_key *key) +{ + struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt; + int i; + + for (i = 0; i < 2; i++) { + struct sw_flow *flow = *find_bucket(t2->subtable[i], key); + if (flow && !flow_compare(&flow->key.flow, &key->flow)) + return flow; + } + return NULL; +} + +static int table_hash2_insert(struct sw_table *swt, struct sw_flow *flow) +{ + struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt; + + if (table_hash_insert(t2->subtable[0], flow)) + return 1; + return table_hash_insert(t2->subtable[1], flow); +} + +static int table_hash2_modify(struct sw_table *swt, + const struct sw_flow_key *key, uint16_t priority, int strict, + const struct ofp_action_header *actions, size_t actions_len) +{ + struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt; + return (table_hash_modify(t2->subtable[0], key, priority, strict, + actions, actions_len) + + table_hash_modify(t2->subtable[1], key, priority, strict, + actions, actions_len)); +} + +static int table_hash2_delete(struct sw_table *swt, + const struct sw_flow_key *key, + uint16_t out_port, + uint16_t priority, int strict) +{ + struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt; + return (table_hash_delete(t2->subtable[0], key, out_port, priority, strict) + + table_hash_delete(t2->subtable[1], key, out_port, priority, + strict)); +} + +static void table_hash2_timeout(struct sw_table *swt, struct list *deleted) +{ + struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt; + table_hash_timeout(t2->subtable[0], deleted); + table_hash_timeout(t2->subtable[1], deleted); +} + +static void table_hash2_destroy(struct sw_table *swt) +{ + struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt; + table_hash_destroy(t2->subtable[0]); + table_hash_destroy(t2->subtable[1]); + free(t2); +} + +static int table_hash2_iterate(struct sw_table *swt, + const struct sw_flow_key *key, + uint16_t out_port, + struct sw_table_position *position, + int (*callback)(struct sw_flow *, void *), + void *private) +{ + struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt; + int i; + + for (i = position->private[1]; i < 2; i++) { + int error = table_hash_iterate(t2->subtable[i], key, out_port, + position, callback, private); + if (error) { + return error; + } + position->private[0] = 0; + position->private[1]++; + } + return 0; +} + +static void table_hash2_stats(struct sw_table *swt, + struct sw_table_stats *stats) +{ + struct sw_table_hash2 *t2 = (struct sw_table_hash2 *) swt; + struct sw_table_stats substats[2]; + int i; + + for (i = 0; i < 2; i++) + table_hash_stats(t2->subtable[i], &substats[i]); + stats->name = "hash2"; + stats->wildcards = 0; /* No wildcards are supported. */ + stats->n_flows = substats[0].n_flows + substats[1].n_flows; + stats->max_flows = substats[0].max_flows + substats[1].max_flows; + stats->n_lookup = swt->n_lookup; + stats->n_matched = swt->n_matched; +} + +struct sw_table *table_hash2_create(unsigned int poly0, unsigned int buckets0, + unsigned int poly1, unsigned int buckets1) + +{ + struct sw_table_hash2 *t2; + struct sw_table *swt; + + t2 = malloc(sizeof *t2); + if (t2 == NULL) + return NULL; + memset(t2, '\0', sizeof *t2); + + t2->subtable[0] = table_hash_create(poly0, buckets0); + if (t2->subtable[0] == NULL) + goto out_free_t2; + + t2->subtable[1] = table_hash_create(poly1, buckets1); + if (t2->subtable[1] == NULL) + goto out_free_subtable0; + + swt = &t2->swt; + swt->lookup = table_hash2_lookup; + swt->insert = table_hash2_insert; + swt->modify = table_hash2_modify; + swt->delete = table_hash2_delete; + swt->timeout = table_hash2_timeout; + swt->destroy = table_hash2_destroy; + swt->iterate = table_hash2_iterate; + swt->stats = table_hash2_stats; + + return swt; + +out_free_subtable0: + table_hash_destroy(t2->subtable[0]); +out_free_t2: + free(t2); + return NULL; +} diff --git a/udatapath/table-linear.c b/udatapath/table-linear.c new file mode 100644 index 000000000..cd22f0f36 --- /dev/null +++ b/udatapath/table-linear.c @@ -0,0 +1,243 @@ +/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford + * Junior University + * + * We are making the OpenFlow specification and associated documentation + * (Software) available for public use and benefit with the expectation + * that others will use, modify and enhance the Software and contribute + * those enhancements back to the community. However, since we would + * like to make the Software available for broadest use, with as few + * restrictions as possible permission is hereby granted, free of + * charge, to any person obtaining a copy of this Software to deal in + * the Software under the copyrights without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * The name and trademarks of copyright holder(s) may NOT be used in + * advertising or publicity pertaining to the Software or any + * derivatives without specific, written prior permission. + */ + +#include +#include "table.h" +#include +#include "flow.h" +#include "list.h" +#include "openflow/openflow.h" +#include "switch-flow.h" +#include "datapath.h" + +struct sw_table_linear { + struct sw_table swt; + + unsigned int max_flows; + unsigned int n_flows; + struct list flows; + struct list iter_flows; + unsigned long int next_serial; +}; + +static struct sw_flow *table_linear_lookup(struct sw_table *swt, + const struct sw_flow_key *key) +{ + struct sw_table_linear *tl = (struct sw_table_linear *) swt; + struct sw_flow *flow; + LIST_FOR_EACH (flow, struct sw_flow, node, &tl->flows) { + if (flow_matches_1wild(key, &flow->key)) + return flow; + } + return NULL; +} + +static int table_linear_insert(struct sw_table *swt, struct sw_flow *flow) +{ + struct sw_table_linear *tl = (struct sw_table_linear *) swt; + struct sw_flow *f; + + /* Loop through the existing list of entries. New entries will + * always be placed behind those with equal priority. Just replace + * any flows that match exactly. + */ + LIST_FOR_EACH (f, struct sw_flow, node, &tl->flows) { + if (f->priority == flow->priority + && f->key.wildcards == flow->key.wildcards + && flow_matches_2wild(&f->key, &flow->key)) { + flow->serial = f->serial; + list_replace(&flow->node, &f->node); + list_replace(&flow->iter_node, &f->iter_node); + flow_free(f); + return 1; + } + + if (f->priority < flow->priority) + break; + } + + /* Make sure there's room in the table. */ + if (tl->n_flows >= tl->max_flows) { + return 0; + } + tl->n_flows++; + + /* Insert the entry immediately in front of where we're pointing. */ + flow->serial = tl->next_serial++; + list_insert(&f->node, &flow->node); + list_push_front(&tl->iter_flows, &flow->iter_node); + + return 1; +} + +static int table_linear_modify(struct sw_table *swt, + const struct sw_flow_key *key, uint16_t priority, int strict, + const struct ofp_action_header *actions, size_t actions_len) +{ + struct sw_table_linear *tl = (struct sw_table_linear *) swt; + struct sw_flow *flow; + unsigned int count = 0; + + LIST_FOR_EACH (flow, struct sw_flow, node, &tl->flows) { + if (flow_matches_desc(&flow->key, key, strict) + && (!strict || (flow->priority == priority))) { + flow_replace_acts(flow, actions, actions_len); + count++; + } + } + return count; +} + +static void +do_delete(struct sw_flow *flow) +{ + list_remove(&flow->node); + list_remove(&flow->iter_node); + flow_free(flow); +} + +static int table_linear_delete(struct sw_table *swt, + const struct sw_flow_key *key, + uint16_t out_port, + uint16_t priority, int strict) +{ + struct sw_table_linear *tl = (struct sw_table_linear *) swt; + struct sw_flow *flow, *n; + unsigned int count = 0; + + LIST_FOR_EACH_SAFE (flow, n, struct sw_flow, node, &tl->flows) { + if (flow_matches_desc(&flow->key, key, strict) + && flow_has_out_port(flow, out_port) + && (!strict || (flow->priority == priority))) { + do_delete(flow); + count++; + } + } + tl->n_flows -= count; + return count; +} + +static void table_linear_timeout(struct sw_table *swt, struct list *deleted) +{ + struct sw_table_linear *tl = (struct sw_table_linear *) swt; + struct sw_flow *flow, *n; + + LIST_FOR_EACH_SAFE (flow, n, struct sw_flow, node, &tl->flows) { + if (flow_timeout(flow)) { + list_remove(&flow->node); + list_remove(&flow->iter_node); + list_push_back(deleted, &flow->node); + tl->n_flows--; + } + } +} + +static void table_linear_destroy(struct sw_table *swt) +{ + struct sw_table_linear *tl = (struct sw_table_linear *) swt; + + while (!list_is_empty(&tl->flows)) { + struct sw_flow *flow = CONTAINER_OF(list_front(&tl->flows), + struct sw_flow, node); + list_remove(&flow->node); + flow_free(flow); + } + free(tl); +} + +static int table_linear_iterate(struct sw_table *swt, + const struct sw_flow_key *key, + uint16_t out_port, + struct sw_table_position *position, + int (*callback)(struct sw_flow *, void *), + void *private) +{ + struct sw_table_linear *tl = (struct sw_table_linear *) swt; + struct sw_flow *flow; + unsigned long start; + + start = ~position->private[0]; + LIST_FOR_EACH (flow, struct sw_flow, iter_node, &tl->iter_flows) { + if (flow->serial <= start + && flow_matches_2wild(key, &flow->key) + && flow_has_out_port(flow, out_port)) { + int error = callback(flow, private); + if (error) { + position->private[0] = ~(flow->serial - 1); + return error; + } + } + } + return 0; +} + +static void table_linear_stats(struct sw_table *swt, + struct sw_table_stats *stats) +{ + struct sw_table_linear *tl = (struct sw_table_linear *) swt; + stats->name = "linear"; + stats->wildcards = OFPFW_ALL; + stats->n_flows = tl->n_flows; + stats->max_flows = tl->max_flows; + stats->n_lookup = swt->n_lookup; + stats->n_matched = swt->n_matched; +} + + +struct sw_table *table_linear_create(unsigned int max_flows) +{ + struct sw_table_linear *tl; + struct sw_table *swt; + + tl = calloc(1, sizeof *tl); + if (tl == NULL) + return NULL; + + swt = &tl->swt; + swt->lookup = table_linear_lookup; + swt->insert = table_linear_insert; + swt->modify = table_linear_modify; + swt->delete = table_linear_delete; + swt->timeout = table_linear_timeout; + swt->destroy = table_linear_destroy; + swt->iterate = table_linear_iterate; + swt->stats = table_linear_stats; + + tl->max_flows = max_flows; + tl->n_flows = 0; + list_init(&tl->flows); + list_init(&tl->iter_flows); + tl->next_serial = 0; + + return swt; +} diff --git a/udatapath/table.h b/udatapath/table.h new file mode 100644 index 000000000..7681c0d38 --- /dev/null +++ b/udatapath/table.h @@ -0,0 +1,142 @@ +/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford + * Junior University + * + * We are making the OpenFlow specification and associated documentation + * (Software) available for public use and benefit with the expectation + * that others will use, modify and enhance the Software and contribute + * those enhancements back to the community. However, since we would + * like to make the Software available for broadest use, with as few + * restrictions as possible permission is hereby granted, free of + * charge, to any person obtaining a copy of this Software to deal in + * the Software under the copyrights without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * The name and trademarks of copyright holder(s) may NOT be used in + * advertising or publicity pertaining to the Software or any + * derivatives without specific, written prior permission. + */ + +/* Individual switching tables. Generally grouped together in a chain (see + * chain.h). */ + +#ifndef TABLE_H +#define TABLE_H 1 + +#include +#include + +struct sw_flow; +struct sw_flow_key; +struct ofp_action_header; +struct list; + +/* Table statistics. */ +struct sw_table_stats { + const char *name; /* Human-readable name. */ + uint32_t wildcards; /* Bitmap of OFPFW_* wildcards that are + supported by the table. */ + unsigned int n_flows; /* Number of active flows. */ + unsigned int max_flows; /* Flow capacity. */ + unsigned long int n_lookup; /* Number of packets looked up. */ + unsigned long int n_matched; /* Number of packets that have hit. */ +}; + +/* Position within an iteration of a sw_table. + * + * The contents are private to the table implementation, except that a position + * initialized to all-zero-bits represents the start of a table. */ +struct sw_table_position { + unsigned long private[4]; +}; + +/* A single table of flows. */ +struct sw_table { + /* The number of packets that have been looked up and matched, + * respecitvely. To make these 100% accurate, they should be atomic. + * However, we're primarily concerned about speed. */ + unsigned long long n_lookup; + unsigned long long n_matched; + + /* Searches 'table' for a flow matching 'key', which must not have any + * wildcard fields. Returns the flow if successful, a null pointer + * otherwise. */ + struct sw_flow *(*lookup)(struct sw_table *table, + const struct sw_flow_key *key); + + /* Inserts 'flow' into 'table', replacing any duplicate flow. Returns + * 0 if successful or a negative error. Error can be due to an + * over-capacity table or because the flow is not one of the kind that + * the table accepts. + * + * If successful, 'flow' becomes owned by 'table', otherwise it is + * retained by the caller. */ + int (*insert)(struct sw_table *table, struct sw_flow *flow); + + /* Modifies the actions in 'table' that match 'key'. If 'strict' + * set, wildcards and priority must match. Returns the number of flows + * that were modified. */ + int (*modify)(struct sw_table *table, const struct sw_flow_key *key, + uint16_t priority, int strict, + const struct ofp_action_header *actions, size_t actions_len); + + /* Deletes from 'table' any and all flows that match 'key' from + * 'table'. If 'out_port' is not OFPP_NONE, then matching entries + * must have that port as an argument for an output action. If + * 'strict' is set, wildcards and priority must match. Returns the + * number of flows that were deleted. */ + int (*delete)(struct sw_table *table, const struct sw_flow_key *key, + uint16_t out_port, uint16_t priority, int strict); + + /* Performs timeout processing on all the flow entries in 'table'. + * Appends all the flow entries removed from 'table' to 'deleted' for the + * caller to free. */ + void (*timeout)(struct sw_table *table, struct list *deleted); + + /* Destroys 'table', which must not have any users. */ + void (*destroy)(struct sw_table *table); + + /* Iterates through the flow entries in 'table', passing each one + * matches 'key' and output port 'out_port' to 'callback'. The + * callback function should return 0 to continue iteration or a + * nonzero error code to stop. The iterator function returns either + * 0 if the table iteration completed or the value returned by the + * callback function otherwise. + * + * The iteration starts at 'position', which may be initialized to + * all-zero-bits to iterate from the beginning of the table. If the + * iteration terminates due to an error from the callback function, + * 'position' is updated to a value that can be passed back to the + * iterator function to resume iteration later with the following + * flow. */ + int (*iterate)(struct sw_table *table, + const struct sw_flow_key *key, uint16_t out_port, + struct sw_table_position *position, + int (*callback)(struct sw_flow *flow, void *private), + void *private); + + /* Dumps statistics for 'table' into 'stats'. */ + void (*stats)(struct sw_table *table, struct sw_table_stats *stats); +}; + +struct sw_table *table_hash_create(unsigned int polynomial, + unsigned int n_buckets); +struct sw_table *table_hash2_create(unsigned int poly0, unsigned int buckets0, + unsigned int poly1, unsigned int buckets1); +struct sw_table *table_linear_create(unsigned int max_flows); + +#endif /* table.h */ diff --git a/udatapath/udatapath.8.in b/udatapath/udatapath.8.in new file mode 100644 index 000000000..ad992c5d6 --- /dev/null +++ b/udatapath/udatapath.8.in @@ -0,0 +1,183 @@ +.TH udatapath 8 "May 2008" "OpenFlow" "OpenFlow Manual" + +.SH NAME +udatapath \- userspace implementation of OpenFlow switch + +.SH SYNOPSIS +.B udatapath +[\fIoptions\fR] +\fB-i\fR \fInetdev\fR[\fB,\fInetdev\fR].\|.\|. +\fImethod\fR [\fImethod\fR].\|.\|. + +.SH DESCRIPTION +The \fBudatapath\fR is a userspace implementation of an OpenFlow +datapath. It monitors one or more network device interfaces, +forwarding packets between them according to the entries in the flow +table that it maintains. When it is used with \fBsecchan\fR(8), to +connect the datapath to an OpenFlow controller, the combination is an +OpenFlow switch. + +For access to network devices, the udatapath program must normally run as +root. + +The mandatory \fImethod\fR argument specifies how \fBsecchan\fR(8) +communicates with \fBudatapath\fR, as a passive OpenFlow connection +method. Ordinarily \fImethod\fR takes the following form: + +.TP +\fBpunix:\fIfile\fR +Listens for connections on the Unix domain server socket named +\fIfile\fR. + +.PP +The following connection methods are also supported, but their use +would be unusual because \fBudatapath\fR and \fBsecchan\fR should run +on the same machine: + +.TP +\fBpssl:\fR[\fIport\fR] +Listens for SSL connections \fIport\fR (default: 976). The +\fB--private-key\fR, \fB--certificate\fR, and \fB--ca-cert\fR options +are mandatory when this form is used. (\fBofp\-pki\fR(8) does not set +up a suitable PKI for use with this option.) + +.TP +\fBptcp:\fR[\fIport\fR] +Listens for TCP connections from remote OpenFlow switches on +\fIport\fR (default: 975). + +.SH OPTIONS +.TP +\fB-i\fR, \fB--interfaces=\fR\fInetdev\fR[\fB,\fInetdev\fR].\|.\|. +Specifies each \fInetdev\fR (e.g., \fBeth0\fR) as a switch port. The +specified network devices should not have any configured IP addresses. +This option may be given any number of times to specify additional +network devices. + +.TP +\fB-L\fR, \fB--local-port=\fInetdev\fR +Specifies the network device to use as the userspace datapath's +``local port,'' which is a network device that \fBsecchan\fR(8) +bridges to the physical switch ports for use in in-band control. When +this option is not specified, the default is \fBtap:\fR, which causes +a new TAP virtual network device to be allocated with a default name +assigned by the kernel. To do the same, but assign a specific name +\fBname\fR to the TAP network device, specify the option as +\fB--local-port=tap:\fIname\fR. + +Either way, the existence of TAP devices created by \fBudatapath\fR is +temporary: they are destroyed when \fBudatapath\fR exits. If this is +undesirable, you may use \fBtunctl\fR(8) to create a persistent TAP +network device and then pass it to \fBudatapath\fR, like so: + +.RS +.IP 1. +Create a persistent TAP network device: \fBtunctl -t mytap\fR. (The +\fBtunctl\fR(8) utility is part of User Mode Linux. It is not +included with the OpenFlow reference implementation.) +.IP 2. +Invoke \fBudatapath\fR(8) using \fBmytap\fR, e.g. \fBudatapath +--local-port=mytap\fR .\|.\|. (Note the lack of \fBtap:\fR prefix on +the \fB--local-port\fR argument.) +.IP 3. +Invoke \fBsecchan\fR(8), etc., and use the switch as desired. +.IP 4. +When \fBsecchan\fR and \fBudatapath\fR have terminated and the TAP +network device is no longer needed, you may destroy it with: \fBtunctl +-d mytap\fR +.RE + +.IP +It does not ordinarily make sense to specify the name of a physical +network device on \fB-L\fR or \fB--local-port\fR. + +.TP +\fB--no-local-port\fR +Do not provide a local port as part of the datapath. When this option +is used, the switch will not support in-band control. + +.TP +\fB-d\fR, \fB--datapath-id=\fIdpid\fR +Specifies the OpenFlow datapath ID (a 48-bit number that uniquely +identifies a controller) as \fIdpid\fR, which consists of exactly 12 +hex digits. Without this option, \fBudatapath\fR picks an ID randomly. + +.TP +\fB-p\fR, \fB--private-key=\fIprivkey.pem\fR +Specifies a PEM file containing the private key used as the datapath's +identity for SSL connections to \fBsecchan\fR(8). + +.TP +\fB-c\fR, \fB--certificate=\fIcert.pem\fR +Specifies a PEM file containing a certificate, signed by the +datapath's certificate authority (CA), that certifies the datapath's +private key to identify a trustworthy datapath. + +.TP +\fB-C\fR, \fB--ca-cert=\fIcacert.pem\fR +Specifies a PEM file containing the CA certificate used to verify that +the datapath is connected to a trustworthy secure channel. + +.TP +\fB-P\fR[\fIpidfile\fR], \fB--pidfile\fR[\fB=\fIpidfile\fR] +Causes a file (by default, \fBudatapath.pid\fR) to be created indicating +the PID of the running process. If \fIpidfile\fR is not specified, or +if it does not begin with \fB/\fR, then it is created in +\fB@RUNDIR@\fR. + +.TP +\fB-f\fR, \fB--force\fR +By default, when \fB-P\fR or \fB--pidfile\fR is specified and the +specified pidfile already exists and is locked by a running process, +\fBudatapath\fR refuses to start. Specify \fB-f\fR or \fB--force\fR +to cause it to instead overwrite the pidfile. + +When \fB-P\fR or \fB--pidfile\fR is not specified, this option has no +effect. + +.TP +\fB-D\fR, \fB--detach\fR +Causes \fBudatapath\fR to detach itself from the foreground session and +run as a background process. + +.TP +.BR \-h ", " \-\^\-help +Prints a brief help message to the console. + +.TP +\fB-v\fImodule\fR[\fB:\fIfacility\fR[\fB:\fIlevel\fR]], \fB--verbose=\fImodule\fR[\fB:\fIfacility\fR[\fB:\fIlevel\fR]] +Sets the logging level for \fImodule\fR in \fIfacility\fR to +\fIlevel\fR. The \fImodule\fR may be any valid module name (as +displayed by the \fB--list\fR action on \fBvlogconf\fR(8)), or the +special name \fBANY\fR to set the logging levels for all modules. The +\fIfacility\fR may be \fBsyslog\fR or \fBconsole\fR to set the levels +for logging to the system log or to the console, respectively, or +\fBANY\fR to set the logging levels for both facilities. If it is +omitted, \fIfacility\fR defaults to \fBANY\fR. The \fIlevel\fR must +be one of \fBemer\fR, \fBerr\fR, \fBwarn\fR, or \fBdbg\fR, designating +the minimum severity of a message for it to be logged. If it is +omitted, \fIlevel\fR defaults to \fBdbg\fR. + +.TP +\fB-v\fR, \fB--verbose\fR +Sets the maximum logging verbosity level, equivalent to +\fB--verbose=ANY:ANY:dbg\fR. + +.TP +.BR \-V ", " \-\^\-version +Prints version information to the console. + +.SH BUGS +The userspace datapath's performance lags significantly behind that of +the kernel-based switch. It should only be used when the kernel-based +switch cannot be. + +On Linux, general-purpose support for VLAN tag rewriting is precluded +by the Linux kernel AF_PACKET implementation. + +.SH "SEE ALSO" + +.BR secchan (8), +.BR dpctl (8), +.BR controller (8), +.BR vlogconf (8). diff --git a/udatapath/udatapath.c b/udatapath/udatapath.c new file mode 100644 index 000000000..46932daea --- /dev/null +++ b/udatapath/udatapath.c @@ -0,0 +1,308 @@ +/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford + * Junior University + * + * We are making the OpenFlow specification and associated documentation + * (Software) available for public use and benefit with the expectation + * that others will use, modify and enhance the Software and contribute + * those enhancements back to the community. However, since we would + * like to make the Software available for broadest use, with as few + * restrictions as possible permission is hereby granted, free of + * charge, to any person obtaining a copy of this Software to deal in + * the Software under the copyrights without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * The name and trademarks of copyright holder(s) may NOT be used in + * advertising or publicity pertaining to the Software or any + * derivatives without specific, written prior permission. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "command-line.h" +#include "daemon.h" +#include "datapath.h" +#include "fault.h" +#include "openflow/openflow.h" +#include "poll-loop.h" +#include "queue.h" +#include "util.h" +#include "rconn.h" +#include "timeval.h" +#include "vconn.h" +#include "dirs.h" +#include "vconn-ssl.h" +#include "vlog-socket.h" + +#define THIS_MODULE VLM_udatapath +#include "vlog.h" + +/* Strings to describe the manufacturer, hardware, and software. This data + * is queriable through the switch description stats message. */ +char mfr_desc[DESC_STR_LEN] = "Nicira Networks"; +char hw_desc[DESC_STR_LEN] = "Reference User-Space Switch"; +char sw_desc[DESC_STR_LEN] = VERSION BUILDNR; +char serial_num[SERIAL_NUM_LEN] = "None"; + +static void parse_options(int argc, char *argv[]); +static void usage(void) NO_RETURN; + +static struct datapath *dp; +static uint64_t dpid = UINT64_MAX; +static char *port_list; +static char *local_port = "tap:"; + +static void add_ports(struct datapath *dp, char *port_list); + +int +main(int argc, char *argv[]) +{ + int n_listeners; + int error; + int i; + + set_program_name(argv[0]); + register_fault_handlers(); + time_init(); + vlog_init(); + parse_options(argc, argv); + signal(SIGPIPE, SIG_IGN); + + if (argc - optind < 1) { + ofp_fatal(0, "at least one listener argument is required; " + "use --help for usage"); + } + + error = dp_new(&dp, dpid); + + n_listeners = 0; + for (i = optind; i < argc; i++) { + const char *pvconn_name = argv[i]; + struct pvconn *pvconn; + int retval; + + retval = pvconn_open(pvconn_name, &pvconn); + if (!retval || retval == EAGAIN) { + dp_add_pvconn(dp, pvconn); + n_listeners++; + } else { + ofp_error(retval, "opening %s", pvconn_name); + } + } + if (!n_listeners) { + ofp_fatal(0, "could not listen for any connections"); + } + + if (port_list) { + add_ports(dp, port_list); + } + if (local_port) { + error = dp_add_local_port(dp, local_port); + if (error) { + ofp_fatal(error, "failed to add local port %s", local_port); + } + } + + error = vlog_server_listen(NULL, NULL); + if (error) { + ofp_fatal(error, "could not listen for vlog connections"); + } + + die_if_already_running(); + daemonize(); + + for (;;) { + dp_run(dp); + dp_wait(dp); + poll_block(); + } + + return 0; +} + +static void +add_ports(struct datapath *dp, char *port_list) +{ + char *port, *save_ptr; + + /* Glibc 2.7 has a bug in strtok_r when compiling with optimization that + * can cause segfaults here: + * http://sources.redhat.com/bugzilla/show_bug.cgi?id=5614. + * Using ",," instead of the obvious "," works around it. */ + for (port = strtok_r(port_list, ",,", &save_ptr); port; + port = strtok_r(NULL, ",,", &save_ptr)) { + int error = dp_add_port(dp, port); + if (error) { + ofp_fatal(error, "failed to add port %s", port); + } + } +} + +static void +parse_options(int argc, char *argv[]) +{ + enum { + OPT_MFR_DESC = UCHAR_MAX + 1, + OPT_HW_DESC, + OPT_SW_DESC, + OPT_SERIAL_NUM, + OPT_BOOTSTRAP_CA_CERT, + OPT_NO_LOCAL_PORT + }; + + static struct option long_options[] = { + {"interfaces", required_argument, 0, 'i'}, + {"local-port", required_argument, 0, 'L'}, + {"no-local-port", no_argument, 0, OPT_NO_LOCAL_PORT}, + {"datapath-id", required_argument, 0, 'd'}, + {"verbose", optional_argument, 0, 'v'}, + {"help", no_argument, 0, 'h'}, + {"version", no_argument, 0, 'V'}, + {"mfr-desc", required_argument, 0, OPT_MFR_DESC}, + {"hw-desc", required_argument, 0, OPT_HW_DESC}, + {"sw-desc", required_argument, 0, OPT_SW_DESC}, + {"serial_num", required_argument, 0, OPT_SERIAL_NUM}, + DAEMON_LONG_OPTIONS, +#ifdef HAVE_OPENSSL + VCONN_SSL_LONG_OPTIONS + {"bootstrap-ca-cert", required_argument, 0, OPT_BOOTSTRAP_CA_CERT}, +#endif + {0, 0, 0, 0}, + }; + char *short_options = long_options_to_short_options(long_options); + + for (;;) { + int indexptr; + int c; + + c = getopt_long(argc, argv, short_options, long_options, &indexptr); + if (c == -1) { + break; + } + + switch (c) { + case 'd': + if (strlen(optarg) != 12 + || strspn(optarg, "0123456789abcdefABCDEF") != 12) { + ofp_fatal(0, "argument to -d or --datapath-id must be " + "exactly 12 hex digits"); + } + dpid = strtoll(optarg, NULL, 16); + if (!dpid) { + ofp_fatal(0, "argument to -d or --datapath-id must " + "be nonzero"); + } + break; + + case 'h': + usage(); + + case 'V': + printf("%s %s compiled "__DATE__" "__TIME__"\n", + program_name, VERSION BUILDNR); + exit(EXIT_SUCCESS); + + case 'v': + vlog_set_verbosity(optarg); + break; + + case 'i': + if (!port_list) { + port_list = optarg; + } else { + port_list = xasprintf("%s,%s", port_list, optarg); + } + break; + + case 'L': + local_port = optarg; + break; + + case OPT_NO_LOCAL_PORT: + local_port = NULL; + break; + + case OPT_MFR_DESC: + strncpy(mfr_desc, optarg, sizeof mfr_desc); + break; + + case OPT_HW_DESC: + strncpy(hw_desc, optarg, sizeof hw_desc); + break; + + case OPT_SW_DESC: + strncpy(sw_desc, optarg, sizeof sw_desc); + break; + + case OPT_SERIAL_NUM: + strncpy(serial_num, optarg, sizeof serial_num); + break; + + DAEMON_OPTION_HANDLERS + +#ifdef HAVE_OPENSSL + VCONN_SSL_OPTION_HANDLERS + + case OPT_BOOTSTRAP_CA_CERT: + vconn_ssl_set_ca_cert_file(optarg, true); + break; +#endif + + case '?': + exit(EXIT_FAILURE); + + default: + abort(); + } + } + free(short_options); +} + +static void +usage(void) +{ + printf("%s: userspace OpenFlow datapath\n" + "usage: %s [OPTIONS] LISTEN...\n" + "where LISTEN is a passive OpenFlow connection method on which\n" + "to listen for incoming connections from the secure channel.\n", + program_name, program_name); + vconn_usage(false, true, false); + printf("\nConfiguration options:\n" + " -i, --interfaces=NETDEV[,NETDEV]...\n" + " add specified initial switch ports\n" + " -L, --local-port=NETDEV set network device for local port\n" + " --no-local-port disable local port\n" + " -d, --datapath-id=ID Use ID as the OpenFlow switch ID\n" + " (ID must consist of 12 hex digits)\n" + "\nOther options:\n" + " -D, --detach run in background as daemon\n" + " -P, --pidfile[=FILE] create pidfile (default: %s/udatapath.pid)\n" + " -f, --force with -P, start even if already running\n" + " -v, --verbose=MODULE[:FACILITY[:LEVEL]] set logging levels\n" + " -v, --verbose set maximum verbosity level\n" + " -h, --help display this help message\n" + " -V, --version display version information\n", + ofp_rundir); + exit(EXIT_SUCCESS); +} -- 2.43.0