2 * Distributed under the terms of the GNU GPL version 2.
3 * Copyright (c) 2007, 2008 The Board of Trustees of The Leland
4 * Stanford Junior University
8 #include <linux/netdevice.h>
9 #include <linux/etherdevice.h>
10 #include <linux/if_ether.h>
11 #include <linux/if_vlan.h>
12 #include <net/llc_pdu.h>
13 #include <linux/jiffies.h>
14 #include <linux/kernel.h>
15 #include <linux/llc.h>
16 #include <linux/module.h>
18 #include <linux/rcupdate.h>
20 #include "openflow/openflow.h"
21 #include "openflow/nicira-ext.h"
24 struct kmem_cache *flow_cache;
26 /* Internal function used to compare fields in flow. */
28 int flow_fields_match(const struct sw_flow_key *a, const struct sw_flow_key *b,
29 uint32_t w, uint32_t src_mask, uint32_t dst_mask)
31 return ((w & OFPFW_IN_PORT || a->in_port == b->in_port)
32 && (w & OFPFW_DL_VLAN || a->dl_vlan == b->dl_vlan)
33 && (w & OFPFW_DL_SRC || !memcmp(a->dl_src, b->dl_src, ETH_ALEN))
34 && (w & OFPFW_DL_DST || !memcmp(a->dl_dst, b->dl_dst, ETH_ALEN))
35 && (w & OFPFW_DL_TYPE || a->dl_type == b->dl_type)
36 && !((a->nw_src ^ b->nw_src) & src_mask)
37 && !((a->nw_dst ^ b->nw_dst) & dst_mask)
38 && (w & OFPFW_NW_PROTO || a->nw_proto == b->nw_proto)
39 && (w & OFPFW_TP_SRC || a->tp_src == b->tp_src)
40 && (w & OFPFW_TP_DST || a->tp_dst == b->tp_dst));
43 /* Returns nonzero if 'a' and 'b' match, that is, if their fields are equal
44 * modulo wildcards in 'b', zero otherwise. */
45 int flow_matches_1wild(const struct sw_flow_key *a,
46 const struct sw_flow_key *b)
48 return flow_fields_match(a, b, b->wildcards,
49 b->nw_src_mask, b->nw_dst_mask);
51 EXPORT_SYMBOL(flow_matches_1wild);
53 /* Returns nonzero if 'a' and 'b' match, that is, if their fields are equal
54 * modulo wildcards in 'a' or 'b', zero otherwise. */
55 int flow_matches_2wild(const struct sw_flow_key *a,
56 const struct sw_flow_key *b)
58 return flow_fields_match(a, b,
59 a->wildcards | b->wildcards,
60 a->nw_src_mask & b->nw_src_mask,
61 a->nw_dst_mask & b->nw_dst_mask);
63 EXPORT_SYMBOL(flow_matches_2wild);
65 /* Returns nonzero if 't' (the table entry's key) and 'd' (the key
66 * describing the match) match, that is, if their fields are
67 * equal modulo wildcards, zero otherwise. If 'strict' is nonzero, the
68 * wildcards must match in both 't_key' and 'd_key'. Note that the
69 * table's wildcards are ignored unless 'strict' is set. */
70 int flow_matches_desc(const struct sw_flow_key *t, const struct sw_flow_key *d,
73 if (strict && d->wildcards != t->wildcards)
75 return flow_matches_1wild(t, d);
77 EXPORT_SYMBOL(flow_matches_desc);
79 static uint32_t make_nw_mask(int n_wild_bits)
81 n_wild_bits &= (1u << OFPFW_NW_SRC_BITS) - 1;
82 return n_wild_bits < 32 ? htonl(~((1u << n_wild_bits) - 1)) : 0;
85 void flow_extract_match(struct sw_flow_key* to, const struct ofp_match* from)
87 to->wildcards = ntohl(from->wildcards) & OFPFW_ALL;
89 to->in_port = from->in_port;
90 to->dl_vlan = from->dl_vlan;
91 memcpy(to->dl_src, from->dl_src, ETH_ALEN);
92 memcpy(to->dl_dst, from->dl_dst, ETH_ALEN);
93 to->dl_type = from->dl_type;
95 to->nw_src = to->nw_dst = to->nw_proto = 0;
96 to->tp_src = to->tp_dst = 0;
98 #define OFPFW_TP (OFPFW_TP_SRC | OFPFW_TP_DST)
99 #define OFPFW_NW (OFPFW_NW_SRC_MASK | OFPFW_NW_DST_MASK | OFPFW_NW_PROTO)
100 if (to->wildcards & OFPFW_DL_TYPE) {
101 /* Can't sensibly match on network or transport headers if the
102 * data link type is unknown. */
103 to->wildcards |= OFPFW_NW | OFPFW_TP;
104 } else if (from->dl_type == htons(ETH_P_IP)) {
105 to->nw_src = from->nw_src;
106 to->nw_dst = from->nw_dst;
107 to->nw_proto = from->nw_proto;
109 if (to->wildcards & OFPFW_NW_PROTO) {
110 /* Can't sensibly match on transport headers if the
111 * network protocol is unknown. */
112 to->wildcards |= OFPFW_TP;
113 } else if (from->nw_proto == IPPROTO_TCP
114 || from->nw_proto == IPPROTO_UDP
115 || from->nw_proto == IPPROTO_ICMP) {
116 to->tp_src = from->tp_src;
117 to->tp_dst = from->tp_dst;
119 /* Transport layer fields are undefined. Mark them as
120 * exact-match to allow such flows to reside in
121 * table-hash, instead of falling into table-linear. */
122 to->wildcards &= ~OFPFW_TP;
125 /* Network and transport layer fields are undefined. Mark them
126 * as exact-match to allow such flows to reside in table-hash,
127 * instead of falling into table-linear. */
128 to->wildcards &= ~(OFPFW_NW | OFPFW_TP);
131 /* We set these late because code above adjusts to->wildcards. */
132 to->nw_src_mask = make_nw_mask(to->wildcards >> OFPFW_NW_SRC_SHIFT);
133 to->nw_dst_mask = make_nw_mask(to->wildcards >> OFPFW_NW_DST_SHIFT);
136 void flow_fill_match(struct ofp_match* to, const struct sw_flow_key* from)
138 to->wildcards = htonl(from->wildcards);
139 to->in_port = from->in_port;
140 to->dl_vlan = from->dl_vlan;
141 memcpy(to->dl_src, from->dl_src, ETH_ALEN);
142 memcpy(to->dl_dst, from->dl_dst, ETH_ALEN);
143 to->dl_type = from->dl_type;
144 to->nw_src = from->nw_src;
145 to->nw_dst = from->nw_dst;
146 to->nw_proto = from->nw_proto;
147 to->tp_src = from->tp_src;
148 to->tp_dst = from->tp_dst;
152 int flow_timeout(struct sw_flow *flow)
154 if (flow->idle_timeout != OFP_FLOW_PERMANENT
155 && time_after64(get_jiffies_64(), flow->used + flow->idle_timeout * HZ))
156 return NXFER_IDLE_TIMEOUT;
157 else if (flow->hard_timeout != OFP_FLOW_PERMANENT
158 && time_after64(get_jiffies_64(),
159 flow->created + flow->hard_timeout * HZ))
160 return NXFER_HARD_TIMEOUT;
164 EXPORT_SYMBOL(flow_timeout);
166 /* Returns nonzero if 'flow' contains an output action to 'out_port' or
167 * has the value OFPP_NONE. 'out_port' is in network-byte order. */
168 int flow_has_out_port(struct sw_flow *flow, uint16_t out_port)
170 struct sw_flow_actions *sf_acts;
174 if (out_port == htons(OFPP_NONE))
177 sf_acts = rcu_dereference(flow->sf_acts);
179 actions_len = sf_acts->actions_len;
180 p = (uint8_t *)sf_acts->actions;
182 while (actions_len > 0) {
183 struct ofp_action_header *ah = (struct ofp_action_header *)p;
184 size_t len = ntohs(ah->len);
186 if (ah->type == htons(OFPAT_OUTPUT)) {
187 struct ofp_action_output *oa = (struct ofp_action_output *)p;
188 if (oa->port == out_port)
198 EXPORT_SYMBOL(flow_has_out_port);
200 /* Allocates and returns a new flow with room for 'actions_len' actions,
201 * using allocation flags 'flags'. Returns the new flow or a null pointer
203 struct sw_flow *flow_alloc(size_t actions_len, gfp_t flags)
205 struct sw_flow_actions *sfa;
206 size_t size = sizeof *sfa + actions_len;
207 struct sw_flow *flow = kmem_cache_alloc(flow_cache, flags);
211 sfa = kmalloc(size, flags);
212 if (unlikely(!sfa)) {
213 kmem_cache_free(flow_cache, flow);
216 sfa->actions_len = actions_len;
222 /* Frees 'flow' immediately. */
223 void flow_free(struct sw_flow *flow)
227 kfree(flow->sf_acts);
228 kmem_cache_free(flow_cache, flow);
230 EXPORT_SYMBOL(flow_free);
232 /* RCU callback used by flow_deferred_free. */
233 static void rcu_free_flow_callback(struct rcu_head *rcu)
235 struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu);
239 /* Schedules 'flow' to be freed after the next RCU grace period.
240 * The caller must hold rcu_read_lock for this to be sensible. */
241 void flow_deferred_free(struct sw_flow *flow)
243 call_rcu(&flow->rcu, rcu_free_flow_callback);
245 EXPORT_SYMBOL(flow_deferred_free);
247 /* RCU callback used by flow_deferred_free_acts. */
248 static void rcu_free_acts_callback(struct rcu_head *rcu)
250 struct sw_flow_actions *sf_acts = container_of(rcu,
251 struct sw_flow_actions, rcu);
255 /* Schedules 'sf_acts' to be freed after the next RCU grace period.
256 * The caller must hold rcu_read_lock for this to be sensible. */
257 void flow_deferred_free_acts(struct sw_flow_actions *sf_acts)
259 call_rcu(&sf_acts->rcu, rcu_free_acts_callback);
261 EXPORT_SYMBOL(flow_deferred_free_acts);
263 /* Copies 'actions' into a newly allocated structure for use by 'flow'
264 * and safely frees the structure that defined the previous actions. */
265 void flow_replace_acts(struct sw_flow *flow,
266 const struct ofp_action_header *actions, size_t actions_len)
268 struct sw_flow_actions *sfa;
269 struct sw_flow_actions *orig_sfa = flow->sf_acts;
270 size_t size = sizeof *sfa + actions_len;
272 sfa = kmalloc(size, GFP_ATOMIC);
276 sfa->actions_len = actions_len;
277 memcpy(sfa->actions, actions, actions_len);
279 rcu_assign_pointer(flow->sf_acts, sfa);
280 flow_deferred_free_acts(orig_sfa);
284 EXPORT_SYMBOL(flow_replace_acts);
286 /* Prints a representation of 'key' to the kernel log. */
287 void print_flow(const struct sw_flow_key *key)
289 printk("wild%08x port%04x:vlan%04x mac%02x:%02x:%02x:%02x:%02x:%02x"
290 "->%02x:%02x:%02x:%02x:%02x:%02x "
291 "proto%04x ip%u.%u.%u.%u->%u.%u.%u.%u port%d->%d\n",
292 key->wildcards, ntohs(key->in_port), ntohs(key->dl_vlan),
293 key->dl_src[0], key->dl_src[1], key->dl_src[2],
294 key->dl_src[3], key->dl_src[4], key->dl_src[5],
295 key->dl_dst[0], key->dl_dst[1], key->dl_dst[2],
296 key->dl_dst[3], key->dl_dst[4], key->dl_dst[5],
298 ((unsigned char *)&key->nw_src)[0],
299 ((unsigned char *)&key->nw_src)[1],
300 ((unsigned char *)&key->nw_src)[2],
301 ((unsigned char *)&key->nw_src)[3],
302 ((unsigned char *)&key->nw_dst)[0],
303 ((unsigned char *)&key->nw_dst)[1],
304 ((unsigned char *)&key->nw_dst)[2],
305 ((unsigned char *)&key->nw_dst)[3],
306 ntohs(key->tp_src), ntohs(key->tp_dst));
308 EXPORT_SYMBOL(print_flow);
310 #define SNAP_OUI_LEN 3
315 uint8_t dsap; /* Always 0xAA */
316 uint8_t ssap; /* Always 0xAA */
318 uint8_t oui[SNAP_OUI_LEN];
320 } __attribute__ ((packed));
322 static int is_snap(const struct eth_snap_hdr *esh)
324 return (esh->dsap == LLC_SAP_SNAP
325 && esh->ssap == LLC_SAP_SNAP
326 && !memcmp(esh->oui, "\0\0\0", 3));
329 /* Parses the Ethernet frame in 'skb', which was received on 'in_port',
330 * and initializes 'key' to match. Returns 1 if 'skb' contains an IP
331 * fragment, 0 otherwise. */
332 int flow_extract(struct sk_buff *skb, uint16_t in_port,
333 struct sw_flow_key *key)
336 struct eth_snap_hdr *esh;
340 memset(key, 0, sizeof *key);
341 key->dl_vlan = htons(OFP_VLAN_NONE);
342 key->in_port = htons(in_port);
344 if (skb->len < sizeof *eth)
346 if (!pskb_may_pull(skb, skb->len >= 64 ? 64 : skb->len)) {
350 skb_reset_mac_header(skb);
352 esh = (struct eth_snap_hdr *) eth;
353 nh_ofs = sizeof *eth;
354 if (likely(ntohs(eth->h_proto) >= OFP_DL_TYPE_ETH2_CUTOFF))
355 key->dl_type = eth->h_proto;
356 else if (skb->len >= sizeof *esh && is_snap(esh)) {
357 key->dl_type = esh->ethertype;
358 nh_ofs = sizeof *esh;
360 key->dl_type = htons(OFP_DL_TYPE_NOT_ETH_TYPE);
361 if (skb->len >= nh_ofs + sizeof(struct llc_pdu_un)) {
362 nh_ofs += sizeof(struct llc_pdu_un);
366 /* Check for a VLAN tag */
367 if (key->dl_type == htons(ETH_P_8021Q) &&
368 skb->len >= nh_ofs + sizeof(struct vlan_hdr)) {
369 struct vlan_hdr *vh = (struct vlan_hdr*)(skb->data + nh_ofs);
370 key->dl_type = vh->h_vlan_encapsulated_proto;
371 key->dl_vlan = vh->h_vlan_TCI & htons(VLAN_VID_MASK);
372 nh_ofs += sizeof(struct vlan_hdr);
374 memcpy(key->dl_src, eth->h_source, ETH_ALEN);
375 memcpy(key->dl_dst, eth->h_dest, ETH_ALEN);
376 skb_set_network_header(skb, nh_ofs);
379 if (key->dl_type == htons(ETH_P_IP) && iphdr_ok(skb)) {
380 struct iphdr *nh = ip_hdr(skb);
381 int th_ofs = nh_ofs + nh->ihl * 4;
382 key->nw_src = nh->saddr;
383 key->nw_dst = nh->daddr;
384 key->nw_proto = nh->protocol;
385 skb_set_transport_header(skb, th_ofs);
387 /* Transport layer. */
388 if (!(nh->frag_off & htons(IP_MF | IP_OFFSET))) {
389 if (key->nw_proto == IPPROTO_TCP) {
390 if (tcphdr_ok(skb)) {
391 struct tcphdr *tcp = tcp_hdr(skb);
392 key->tp_src = tcp->source;
393 key->tp_dst = tcp->dest;
395 /* Avoid tricking other code into
396 * thinking that this packet has an L4
400 } else if (key->nw_proto == IPPROTO_UDP) {
401 if (udphdr_ok(skb)) {
402 struct udphdr *udp = udp_hdr(skb);
403 key->tp_src = udp->source;
404 key->tp_dst = udp->dest;
406 /* Avoid tricking other code into
407 * thinking that this packet has an L4
411 } else if (key->nw_proto == IPPROTO_ICMP) {
412 if (icmphdr_ok(skb)) {
413 struct icmphdr *icmp = icmp_hdr(skb);
414 /* The ICMP type and code fields use the 16-bit
415 * transport port fields, so we need to store them
416 * in 16-bit network byte order. */
417 key->icmp_type = htons(icmp->type);
418 key->icmp_code = htons(icmp->code);
420 /* Avoid tricking other code into
421 * thinking that this packet has an L4
430 skb_reset_transport_header(skb);
435 /* Initializes the flow module.
436 * Returns zero if successful or a negative error code. */
439 flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0,
441 if (flow_cache == NULL)
447 /* Uninitializes the flow module. */
450 kmem_cache_destroy(flow_cache);