Verify in fwd_port_input() that we are not passed packets that are shared.
[sliver-openvswitch.git] / datapath / forward.c
1 /*
2  * Distributed under the terms of the GNU GPL version 2.
3  * Copyright (c) 2007, 2008 The Board of Trustees of The Leland 
4  * Stanford Junior University
5  */
6
7 #include <linux/netdevice.h>
8 #include <linux/etherdevice.h>
9 #include <linux/if_ether.h>
10 #include <linux/if_vlan.h>
11 #include <asm/uaccess.h>
12 #include <linux/types.h>
13 #include "forward.h"
14 #include "datapath.h"
15 #include "openflow/nicira-ext.h"
16 #include "dp_act.h"
17 #include "nx_msg.h"
18 #include "chain.h"
19 #include "flow.h"
20
21 /* FIXME: do we need to use GFP_ATOMIC everywhere here? */
22
23
24 static struct sk_buff *retrieve_skb(uint32_t id);
25 static void discard_skb(uint32_t id);
26
27 /* 'skb' was received on port 'p', which may be a physical switch port, the
28  * local port, or a null pointer.  Process it according to 'chain'.  Returns 0
29  * if successful, in which case 'skb' is destroyed, or -ESRCH if there is no
30  * matching flow, in which case 'skb' still belongs to the caller. */
31 int run_flow_through_tables(struct sw_chain *chain, struct sk_buff *skb,
32                             struct net_bridge_port *p)
33 {
34         /* Ethernet address used as the destination for STP frames. */
35         static const uint8_t stp_eth_addr[ETH_ALEN]
36                 = { 0x01, 0x80, 0xC2, 0x00, 0x00, 0x01 };
37         struct sw_flow_key key;
38         struct sw_flow *flow;
39
40         if (flow_extract(skb, p ? p->port_no : OFPP_NONE, &key)
41             && (chain->dp->flags & OFPC_FRAG_MASK) == OFPC_FRAG_DROP) {
42                 /* Drop fragment. */
43                 kfree_skb(skb);
44                 return 0;
45         }
46         if (p && p->config & (OFPPC_NO_RECV | OFPPC_NO_RECV_STP) &&
47             p->config & (compare_ether_addr(key.dl_dst, stp_eth_addr)
48                         ? OFPPC_NO_RECV : OFPPC_NO_RECV_STP)) {
49                 kfree_skb(skb);
50                 return 0;
51         }
52
53         flow = chain_lookup(chain, &key);
54         if (likely(flow != NULL)) {
55                 struct sw_flow_actions *sf_acts = rcu_dereference(flow->sf_acts);
56                 flow_used(flow, skb);
57                 execute_actions(chain->dp, skb, &key,
58                                 sf_acts->actions, sf_acts->actions_len, 0);
59                 return 0;
60         } else {
61                 return -ESRCH;
62         }
63 }
64
65 /* 'skb' was received on port 'p', which may be a physical switch port, the
66  * local port, or a null pointer.  Process it according to 'chain', sending it
67  * up to the controller if no flow matches.  Takes ownership of 'skb'. */
68 void fwd_port_input(struct sw_chain *chain, struct sk_buff *skb,
69                     struct net_bridge_port *p)
70 {
71         WARN_ON_ONCE(skb_shared(skb));
72         if (run_flow_through_tables(chain, skb, p))
73                 dp_output_control(chain->dp, skb, fwd_save_skb(skb), 
74                                   chain->dp->miss_send_len,
75                                   OFPR_NO_MATCH);
76 }
77
78 static int
79 recv_hello(struct sw_chain *chain, const struct sender *sender,
80            const void *msg)
81 {
82         return dp_send_hello(chain->dp, sender, msg);
83 }
84
85 static int
86 recv_features_request(struct sw_chain *chain, const struct sender *sender,
87                       const void *msg) 
88 {
89         return dp_send_features_reply(chain->dp, sender);
90 }
91
92 static int
93 recv_get_config_request(struct sw_chain *chain, const struct sender *sender,
94                         const void *msg)
95 {
96         return dp_send_config_reply(chain->dp, sender);
97 }
98
99 static int
100 recv_set_config(struct sw_chain *chain, const struct sender *sender,
101                 const void *msg)
102 {
103         const struct ofp_switch_config *osc = msg;
104         int flags;
105
106         flags = ntohs(osc->flags) & (OFPC_SEND_FLOW_EXP | OFPC_FRAG_MASK);
107         if ((flags & OFPC_FRAG_MASK) != OFPC_FRAG_NORMAL
108             && (flags & OFPC_FRAG_MASK) != OFPC_FRAG_DROP) {
109                 flags = (flags & ~OFPC_FRAG_MASK) | OFPC_FRAG_DROP;
110         }
111         chain->dp->flags = flags;
112
113         chain->dp->miss_send_len = ntohs(osc->miss_send_len);
114
115         return 0;
116 }
117
118 static int
119 recv_packet_out(struct sw_chain *chain, const struct sender *sender,
120                 const void *msg)
121 {
122         const struct ofp_packet_out *opo = msg;
123         struct sk_buff *skb;
124         struct vlan_ethhdr *mac;
125         int nh_ofs;
126         uint16_t v_code;
127         struct sw_flow_key key;
128         size_t actions_len = ntohs(opo->actions_len);
129
130         if (actions_len > (ntohs(opo->header.length) - sizeof *opo)) {
131                 if (net_ratelimit()) 
132                         printk("message too short for number of actions\n");
133                 return -EINVAL;
134         }
135
136         if (ntohl(opo->buffer_id) == (uint32_t) -1) {
137                 int data_len = ntohs(opo->header.length) - sizeof *opo - actions_len;
138
139                 /* FIXME: there is likely a way to reuse the data in msg. */
140                 skb = alloc_skb(data_len, GFP_ATOMIC);
141                 if (!skb)
142                         return -ENOMEM;
143
144                 /* FIXME?  We don't reserve NET_IP_ALIGN or NET_SKB_PAD since
145                  * we're just transmitting this raw without examining anything
146                  * at those layers. */
147                 memcpy(skb_put(skb, data_len), (uint8_t *)opo->actions + actions_len, 
148                                 data_len);
149
150                 skb_set_mac_header(skb, 0);
151                 mac = vlan_eth_hdr(skb);
152                 if (likely(mac->h_vlan_proto != htons(ETH_P_8021Q)))
153                         nh_ofs = sizeof(struct ethhdr);
154                 else
155                         nh_ofs = sizeof(struct vlan_ethhdr);
156                 skb_set_network_header(skb, nh_ofs);
157         } else {
158                 skb = retrieve_skb(ntohl(opo->buffer_id));
159                 if (!skb)
160                         return -ESRCH;
161         }
162
163         dp_set_origin(chain->dp, ntohs(opo->in_port), skb);
164
165         flow_extract(skb, ntohs(opo->in_port), &key);
166
167         v_code = validate_actions(chain->dp, &key, opo->actions, actions_len);
168         if (v_code != ACT_VALIDATION_OK) {
169                 dp_send_error_msg(chain->dp, sender, OFPET_BAD_ACTION, v_code,
170                                   msg, ntohs(opo->header.length));
171                 goto error;
172         }
173
174         execute_actions(chain->dp, skb, &key, opo->actions, actions_len, 1);
175
176         return 0;
177
178 error:
179         kfree_skb(skb);
180         return -EINVAL;
181 }
182
183 static int
184 recv_port_mod(struct sw_chain *chain, const struct sender *sender,
185               const void *msg)
186 {
187         const struct ofp_port_mod *opm = msg;
188
189         dp_update_port_flags(chain->dp, opm);
190
191         return 0;
192 }
193
194 static int
195 recv_echo_request(struct sw_chain *chain, const struct sender *sender,
196                   const void *msg) 
197 {
198         return dp_send_echo_reply(chain->dp, sender, msg);
199 }
200
201 static int
202 recv_echo_reply(struct sw_chain *chain, const struct sender *sender,
203                   const void *msg) 
204 {
205         return 0;
206 }
207
208 static int
209 add_flow(struct sw_chain *chain, const struct sender *sender, 
210                 const struct ofp_flow_mod *ofm)
211 {
212         int error = -ENOMEM;
213         uint16_t v_code;
214         struct sw_flow *flow;
215         size_t actions_len = ntohs(ofm->header.length) - sizeof *ofm;
216
217         /* Allocate memory. */
218         flow = flow_alloc(actions_len, GFP_ATOMIC);
219         if (flow == NULL)
220                 goto error;
221
222         flow_extract_match(&flow->key, &ofm->match);
223
224         v_code = validate_actions(chain->dp, &flow->key, ofm->actions, actions_len);
225         if (v_code != ACT_VALIDATION_OK) {
226                 dp_send_error_msg(chain->dp, sender, OFPET_BAD_ACTION, v_code,
227                                   ofm, ntohs(ofm->header.length));
228                 goto error_free_flow;
229         }
230
231         /* Fill out flow. */
232         flow->priority = flow->key.wildcards ? ntohs(ofm->priority) : -1;
233         flow->idle_timeout = ntohs(ofm->idle_timeout);
234         flow->hard_timeout = ntohs(ofm->hard_timeout);
235         flow->used = jiffies;
236         flow->init_time = jiffies;
237         flow->byte_count = 0;
238         flow->packet_count = 0;
239         spin_lock_init(&flow->lock);
240         memcpy(flow->sf_acts->actions, ofm->actions, actions_len);
241
242         /* Act. */
243         error = chain_insert(chain, flow);
244         if (error == -ENOBUFS) {
245                 dp_send_error_msg(chain->dp, sender, OFPET_FLOW_MOD_FAILED, 
246                                 OFPFMFC_ALL_TABLES_FULL, ofm, ntohs(ofm->header.length));
247                 goto error_free_flow;
248         } else if (error)
249                 goto error_free_flow;
250         error = 0;
251         if (ntohl(ofm->buffer_id) != (uint32_t) -1) {
252                 struct sk_buff *skb = retrieve_skb(ntohl(ofm->buffer_id));
253                 if (skb) {
254                         struct sw_flow_key key;
255                         flow_used(flow, skb);
256                         dp_set_origin(chain->dp, ntohs(ofm->match.in_port), skb);
257                         flow_extract(skb, ntohs(ofm->match.in_port), &key);
258                         execute_actions(chain->dp, skb, &key, ofm->actions, actions_len, 0);
259                 }
260                 else
261                         error = -ESRCH;
262         }
263         return error;
264
265 error_free_flow:
266         flow_free(flow);
267 error:
268         if (ntohl(ofm->buffer_id) != (uint32_t) -1)
269                 discard_skb(ntohl(ofm->buffer_id));
270         return error;
271 }
272
273 static int
274 mod_flow(struct sw_chain *chain, const struct sender *sender,
275                 const struct ofp_flow_mod *ofm)
276 {
277         int error = -ENOMEM;
278         uint16_t v_code;
279         size_t actions_len;
280         struct sw_flow_key key;
281         uint16_t priority;
282         int strict;
283
284         flow_extract_match(&key, &ofm->match);
285
286         actions_len = ntohs(ofm->header.length) - sizeof *ofm;
287
288         v_code = validate_actions(chain->dp, &key, ofm->actions, actions_len);
289         if (v_code != ACT_VALIDATION_OK) {
290                 dp_send_error_msg(chain->dp, sender, OFPET_BAD_ACTION, v_code,
291                                   ofm, ntohs(ofm->header.length));
292                 goto error;
293         }
294
295         priority = key.wildcards ? ntohs(ofm->priority) : -1;
296         strict = (ofm->command == htons(OFPFC_MODIFY_STRICT)) ? 1 : 0;
297         chain_modify(chain, &key, priority, strict, ofm->actions, actions_len);
298
299         if (ntohl(ofm->buffer_id) != (uint32_t) -1) {
300                 struct sk_buff *skb = retrieve_skb(ntohl(ofm->buffer_id));
301                 if (skb) {
302                         struct sw_flow_key skb_key;
303                         flow_extract(skb, ntohs(ofm->match.in_port), &skb_key);
304                         execute_actions(chain->dp, skb, &skb_key, 
305                                         ofm->actions, actions_len, 0);
306                 }
307                 else
308                         error = -ESRCH;
309         }
310         return error;
311
312 error:
313         if (ntohl(ofm->buffer_id) != (uint32_t) -1)
314                 discard_skb(ntohl(ofm->buffer_id));
315         return error;
316 }
317
318 static int
319 recv_flow(struct sw_chain *chain, const struct sender *sender, const void *msg)
320 {
321         const struct ofp_flow_mod *ofm = msg;
322         uint16_t command = ntohs(ofm->command);
323
324         if (command == OFPFC_ADD) {
325                 return add_flow(chain, sender, ofm);
326         } else if ((command == OFPFC_MODIFY) || (command == OFPFC_MODIFY_STRICT)) {
327                 return mod_flow(chain, sender, ofm);
328         }  else if (command == OFPFC_DELETE) {
329                 struct sw_flow_key key;
330                 flow_extract_match(&key, &ofm->match);
331                 return chain_delete(chain, &key, 0, 0) ? 0 : -ESRCH;
332         } else if (command == OFPFC_DELETE_STRICT) {
333                 struct sw_flow_key key;
334                 uint16_t priority;
335                 flow_extract_match(&key, &ofm->match);
336                 priority = key.wildcards ? ntohs(ofm->priority) : -1;
337                 return chain_delete(chain, &key, priority, 1) ? 0 : -ESRCH;
338         } else {
339                 return -ENOTSUPP;
340         }
341 }
342
343 static int
344 recv_vendor(struct sw_chain *chain, const struct sender *sender, 
345                 const void *msg)
346 {
347         const struct ofp_vendor_header *ovh = msg;
348
349         switch(ntohl(ovh->vendor))
350         {
351         case NX_VENDOR_ID:
352                 return nx_recv_msg(chain, sender, msg);
353         default:
354                 if (net_ratelimit())
355                         printk("Uknown vendor: %#x\n", ntohl(ovh->vendor));
356                 dp_send_error_msg(chain->dp, sender, OFPET_BAD_REQUEST,
357                                   OFPBRC_BAD_VENDOR, msg, ntohs(ovh->header.length));
358                 return -EINVAL;
359         }
360 }
361
362 /* 'msg', which is 'length' bytes long, was received across Netlink from
363  * 'sender'.  Apply it to 'chain'. */
364 int
365 fwd_control_input(struct sw_chain *chain, const struct sender *sender,
366                   const void *msg, size_t length)
367 {
368
369         struct openflow_packet {
370                 size_t min_size;
371                 int (*handler)(struct sw_chain *, const struct sender *,
372                                const void *);
373         };
374
375         static const struct openflow_packet packets[] = {
376                 [OFPT_HELLO] = {
377                         sizeof (struct ofp_header),
378                         recv_hello,
379                 },
380                 [OFPT_ECHO_REQUEST] = {
381                         sizeof (struct ofp_header),
382                         recv_echo_request,
383                 },
384                 [OFPT_ECHO_REPLY] = {
385                         sizeof (struct ofp_header),
386                         recv_echo_reply,
387                 },
388                 [OFPT_VENDOR] = {
389                         sizeof (struct ofp_vendor_header),
390                         recv_vendor,
391                 },
392                 [OFPT_FEATURES_REQUEST] = {
393                         sizeof (struct ofp_header),
394                         recv_features_request,
395                 },
396                 [OFPT_GET_CONFIG_REQUEST] = {
397                         sizeof (struct ofp_header),
398                         recv_get_config_request,
399                 },
400                 [OFPT_SET_CONFIG] = {
401                         sizeof (struct ofp_switch_config),
402                         recv_set_config,
403                 },
404                 [OFPT_PACKET_OUT] = {
405                         sizeof (struct ofp_packet_out),
406                         recv_packet_out,
407                 },
408                 [OFPT_FLOW_MOD] = {
409                         sizeof (struct ofp_flow_mod),
410                         recv_flow,
411                 },
412                 [OFPT_PORT_MOD] = {
413                         sizeof (struct ofp_port_mod),
414                         recv_port_mod,
415                 }
416         };
417
418         struct ofp_header *oh;
419
420         oh = (struct ofp_header *) msg;
421         if (oh->version != OFP_VERSION
422             && oh->type != OFPT_HELLO
423             && oh->type != OFPT_ERROR
424             && oh->type != OFPT_ECHO_REQUEST
425             && oh->type != OFPT_ECHO_REPLY
426             && oh->type != OFPT_VENDOR)
427         {
428                 dp_send_error_msg(chain->dp, sender, OFPET_BAD_REQUEST,
429                                   OFPBRC_BAD_VERSION, msg, length);
430                 return -EINVAL;
431         }
432         if (ntohs(oh->length) != length) {
433                 if (net_ratelimit())
434                         printk("received message length wrong: %d/%d\n", 
435                                 ntohs(oh->length), length);
436                 return -EINVAL;
437         }
438
439         if (oh->type < ARRAY_SIZE(packets)) {
440                 const struct openflow_packet *pkt = &packets[oh->type];
441                 if (pkt->handler) {
442                         if (length < pkt->min_size)
443                                 return -EFAULT;
444                         return pkt->handler(chain, sender, msg);
445                 }
446         }
447         dp_send_error_msg(chain->dp, sender, OFPET_BAD_REQUEST,
448                           OFPBRC_BAD_TYPE, msg, length);
449         return -EINVAL;
450 }
451
452 /* Packet buffering. */
453
454 #define OVERWRITE_SECS  1
455 #define OVERWRITE_JIFFIES (OVERWRITE_SECS * HZ)
456
457 struct packet_buffer {
458         struct sk_buff *skb;
459         uint32_t cookie;
460         unsigned long exp_jiffies;
461 };
462
463 static struct packet_buffer buffers[N_PKT_BUFFERS];
464 static unsigned int buffer_idx;
465 static DEFINE_SPINLOCK(buffer_lock);
466
467 uint32_t fwd_save_skb(struct sk_buff *skb)
468 {
469         struct sk_buff *old_skb = NULL;
470         struct packet_buffer *p;
471         unsigned long int flags;
472         uint32_t id;
473
474         spin_lock_irqsave(&buffer_lock, flags);
475         buffer_idx = (buffer_idx + 1) & PKT_BUFFER_MASK;
476         p = &buffers[buffer_idx];
477         if (p->skb) {
478                 /* Don't buffer packet if existing entry is less than
479                  * OVERWRITE_SECS old. */
480                 if (time_before(jiffies, p->exp_jiffies)) {
481                         spin_unlock_irqrestore(&buffer_lock, flags);
482                         return -1;
483                 } else {
484                         /* Defer kfree_skb() until interrupts re-enabled. */
485                         old_skb = p->skb;
486                 }
487         }
488         /* Don't use maximum cookie value since the all-bits-1 id is
489          * special. */
490         if (++p->cookie >= (1u << PKT_COOKIE_BITS) - 1)
491                 p->cookie = 0;
492         skb_get(skb);
493         p->skb = skb;
494         p->exp_jiffies = jiffies + OVERWRITE_JIFFIES;
495         id = buffer_idx | (p->cookie << PKT_BUFFER_BITS);
496         spin_unlock_irqrestore(&buffer_lock, flags);
497
498         if (old_skb)
499                 kfree_skb(old_skb);
500
501         return id;
502 }
503
504 static struct sk_buff *retrieve_skb(uint32_t id)
505 {
506         unsigned long int flags;
507         struct sk_buff *skb = NULL;
508         struct packet_buffer *p;
509
510         spin_lock_irqsave(&buffer_lock, flags);
511         p = &buffers[id & PKT_BUFFER_MASK];
512         if (p->cookie == id >> PKT_BUFFER_BITS) {
513                 skb = p->skb;
514                 p->skb = NULL;
515         } else {
516                 printk("cookie mismatch: %x != %x\n",
517                                 id >> PKT_BUFFER_BITS, p->cookie);
518         }
519         spin_unlock_irqrestore(&buffer_lock, flags);
520
521         return skb;
522 }
523
524 void fwd_discard_all(void) 
525 {
526         int i;
527
528         for (i = 0; i < N_PKT_BUFFERS; i++) {
529                 struct sk_buff *skb;
530                 unsigned long int flags;
531
532                 /* Defer kfree_skb() until interrupts re-enabled. */
533                 spin_lock_irqsave(&buffer_lock, flags);
534                 skb = buffers[i].skb;
535                 buffers[i].skb = NULL;
536                 spin_unlock_irqrestore(&buffer_lock, flags);
537
538                 kfree_skb(skb);
539         }
540 }
541
542 static void discard_skb(uint32_t id)
543 {
544         struct sk_buff *old_skb = NULL;
545         unsigned long int flags;
546         struct packet_buffer *p;
547
548         spin_lock_irqsave(&buffer_lock, flags);
549         p = &buffers[id & PKT_BUFFER_MASK];
550         if (p->cookie == id >> PKT_BUFFER_BITS) {
551                 /* Defer kfree_skb() until interrupts re-enabled. */
552                 old_skb = p->skb;
553                 p->skb = NULL;
554         }
555         spin_unlock_irqrestore(&buffer_lock, flags);
556
557         if (old_skb)
558                 kfree_skb(old_skb);
559 }
560
561 void fwd_exit(void)
562 {
563         fwd_discard_all();
564 }