For SNAT, don't store the pre-fragment L2 header before actions are applied.
[sliver-openvswitch.git] / datapath / dp_act.c
1 /*
2  * Distributed under the terms of the GNU GPL version 2.
3  * Copyright (c) 2007, 2008 The Board of Trustees of The Leland 
4  * Stanford Junior University
5  */
6
7 /* Functions for executing OpenFlow actions. */
8
9 #include <linux/skbuff.h>
10 #include <linux/in.h>
11 #include <linux/ip.h>
12 #include <linux/tcp.h>
13 #include <linux/udp.h>
14 #include <linux/in6.h>
15 #include <linux/if_vlan.h>
16 #include <net/checksum.h>
17 #include "forward.h"
18 #include "dp_act.h"
19 #include "openflow/nicira-ext.h"
20 #include "nx_act.h"
21
22
23 static uint16_t
24 validate_output(struct datapath *dp, const struct sw_flow_key *key, 
25                 const struct ofp_action_header *ah) 
26 {
27         struct ofp_action_output *oa = (struct ofp_action_output *)ah;
28
29         if (oa->port == htons(OFPP_NONE) || 
30                         (!(key->wildcards & OFPFW_IN_PORT) && oa->port == key->in_port)) 
31                 return OFPBAC_BAD_OUT_PORT;
32
33         return ACT_VALIDATION_OK;
34 }
35
36 static int 
37 do_output(struct datapath *dp, struct sk_buff *skb, size_t max_len,
38                 int out_port, int ignore_no_fwd)
39 {
40         if (!skb)
41                 return -ENOMEM;
42         return (likely(out_port != OFPP_CONTROLLER)
43                 ? dp_output_port(dp, skb, out_port, ignore_no_fwd)
44                 : dp_output_control(dp, skb, fwd_save_skb(skb),
45                                          max_len, OFPR_ACTION));
46 }
47
48
49 static struct sk_buff *
50 vlan_pull_tag(struct sk_buff *skb)
51 {
52         struct vlan_ethhdr *vh = vlan_eth_hdr(skb);
53         struct ethhdr *eh;
54
55
56         /* Verify we were given a vlan packet */
57         if (vh->h_vlan_proto != htons(ETH_P_8021Q))
58                 return skb;
59
60         memmove(skb->data + VLAN_HLEN, skb->data, 2 * VLAN_ETH_ALEN);
61
62         eh = (struct ethhdr *)skb_pull(skb, VLAN_HLEN);
63
64         skb->protocol = eh->h_proto;
65         skb->mac_header += VLAN_HLEN;
66
67         return skb;
68 }
69
70
71 static struct sk_buff *
72 modify_vlan_tci(struct sk_buff *skb, struct sw_flow_key *key, 
73                 uint16_t tci, uint16_t mask)
74 {
75         struct vlan_ethhdr *vh = vlan_eth_hdr(skb);
76
77         if (key->dl_vlan != htons(OFP_VLAN_NONE)) {
78                 /* Modify vlan id, but maintain other TCI values */
79                 vh->h_vlan_TCI = (vh->h_vlan_TCI & ~(htons(mask))) | htons(tci);
80         } else  {
81                 /* Add vlan header */
82
83                 /* xxx The vlan_put_tag function, doesn't seem to work
84                  * xxx reliably when it attempts to use the hardware-accelerated
85                  * xxx version.  We'll directly use the software version
86                  * xxx until the problem can be diagnosed.
87                  */
88                 skb = __vlan_put_tag(skb, tci);
89                 vh = vlan_eth_hdr(skb);
90         }
91         key->dl_vlan = vh->h_vlan_TCI & htons(VLAN_VID_MASK);
92
93         return skb;
94 }
95
96 static struct sk_buff *
97 set_vlan_vid(struct sk_buff *skb, struct sw_flow_key *key, 
98                 const struct ofp_action_header *ah)
99 {
100         struct ofp_action_vlan_vid *va = (struct ofp_action_vlan_vid *)ah;
101         uint16_t tci = ntohs(va->vlan_vid);
102
103         return modify_vlan_tci(skb, key, tci, VLAN_VID_MASK);
104 }
105
106 /* Mask for the priority bits in a vlan header.  The kernel doesn't
107  * define this like it does for VID. */
108 #define VLAN_PCP_MASK 0xe000
109
110 static struct sk_buff *
111 set_vlan_pcp(struct sk_buff *skb, struct sw_flow_key *key, 
112                 const struct ofp_action_header *ah)
113 {
114         struct ofp_action_vlan_pcp *va = (struct ofp_action_vlan_pcp *)ah;
115         uint16_t tci = (uint16_t)va->vlan_pcp << 13;
116
117         return modify_vlan_tci(skb, key, tci, VLAN_PCP_MASK);
118 }
119
120 static struct sk_buff *
121 strip_vlan(struct sk_buff *skb, struct sw_flow_key *key, 
122                 const struct ofp_action_header *ah)
123 {
124         vlan_pull_tag(skb);
125         key->dl_vlan = htons(OFP_VLAN_NONE);
126
127         return skb;
128 }
129
130 static struct sk_buff *
131 set_dl_addr(struct sk_buff *skb, struct sw_flow_key *key, 
132                 const struct ofp_action_header *ah)
133 {
134         struct ofp_action_dl_addr *da = (struct ofp_action_dl_addr *)ah;
135         struct ethhdr *eh = eth_hdr(skb);
136
137         if (da->type == htons(OFPAT_SET_DL_SRC))
138                 memcpy(eh->h_source, da->dl_addr, sizeof eh->h_source);
139         else 
140                 memcpy(eh->h_dest, da->dl_addr, sizeof eh->h_dest);
141
142         return skb;
143 }
144
145 /* Updates 'sum', which is a field in 'skb''s data, given that a 4-byte field
146  * covered by the sum has been changed from 'from' to 'to'.  If set,
147  * 'pseudohdr' indicates that the field is in the TCP or UDP pseudo-header.
148  * Based on nf_proto_csum_replace4. */
149 static void update_csum(__sum16 *sum, struct sk_buff *skb,
150                         __be32 from, __be32 to, int pseudohdr)
151 {
152         __be32 diff[] = { ~from, to };
153         if (skb->ip_summed != CHECKSUM_PARTIAL) {
154                 *sum = csum_fold(csum_partial((char *)diff, sizeof(diff),
155                                 ~csum_unfold(*sum)));
156                 if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr)
157                         skb->csum = ~csum_partial((char *)diff, sizeof(diff),
158                                                 ~skb->csum);
159         } else if (pseudohdr)
160                 *sum = ~csum_fold(csum_partial((char *)diff, sizeof(diff),
161                                 csum_unfold(*sum)));
162 }
163
164 static struct sk_buff * 
165 set_nw_addr(struct sk_buff *skb, struct sw_flow_key *key, 
166                 const struct ofp_action_header *ah)
167 {
168         struct ofp_action_nw_addr *na = (struct ofp_action_nw_addr *)ah;
169         uint16_t eth_proto = ntohs(key->dl_type);
170
171         if (eth_proto == ETH_P_IP) {
172                 struct iphdr *nh = ip_hdr(skb);
173                 uint32_t new, *field;
174
175                 new = na->nw_addr;
176
177                 if (ah->type == htons(OFPAT_SET_NW_SRC))
178                         field = &nh->saddr;
179                 else
180                         field = &nh->daddr;
181
182                 if (key->nw_proto == IPPROTO_TCP) {
183                         struct tcphdr *th = tcp_hdr(skb);
184                         update_csum(&th->check, skb, *field, new, 1);
185                 } else if (key->nw_proto == IPPROTO_UDP) {
186                         struct udphdr *th = udp_hdr(skb);
187                         update_csum(&th->check, skb, *field, new, 1);
188                 }
189                 update_csum(&nh->check, skb, *field, new, 0);
190                 *field = new;
191         }
192
193         return skb;
194 }
195
196 static struct sk_buff *
197 set_tp_port(struct sk_buff *skb, struct sw_flow_key *key, 
198                 const struct ofp_action_header *ah)
199 {
200         struct ofp_action_tp_port *ta = (struct ofp_action_tp_port *)ah;
201         uint16_t eth_proto = ntohs(key->dl_type);
202
203         if (eth_proto == ETH_P_IP) {
204                 uint16_t new, *field;
205
206                 new = ta->tp_port;
207
208                 if (key->nw_proto == IPPROTO_TCP) {
209                         struct tcphdr *th = tcp_hdr(skb);
210
211                         if (ah->type == htons(OFPAT_SET_TP_SRC))
212                                 field = &th->source;
213                         else
214                                 field = &th->dest;
215
216                         update_csum(&th->check, skb, *field, new, 1);
217                         *field = new;
218                 } else if (key->nw_proto == IPPROTO_UDP) {
219                         struct udphdr *th = udp_hdr(skb);
220
221                         if (ah->type == htons(OFPAT_SET_TP_SRC))
222                                 field = &th->source;
223                         else
224                                 field = &th->dest;
225
226                         update_csum(&th->check, skb, *field, new, 1);
227                         *field = new;
228                 }
229         }
230
231         return skb;
232 }
233
234 struct openflow_action {
235         size_t min_size;
236         size_t max_size;
237         uint16_t (*validate)(struct datapath *dp, 
238                         const struct sw_flow_key *key,
239                         const struct ofp_action_header *ah);
240         struct sk_buff *(*execute)(struct sk_buff *skb, 
241                         struct sw_flow_key *key, 
242                         const struct ofp_action_header *ah);
243 };
244
245 static const struct openflow_action of_actions[] = {
246         [OFPAT_OUTPUT] = {
247                 sizeof(struct ofp_action_output),
248                 sizeof(struct ofp_action_output),
249                 validate_output,
250                 NULL                   /* This is optimized into execute_actions */
251         },
252         [OFPAT_SET_VLAN_VID] = {
253                 sizeof(struct ofp_action_vlan_vid),
254                 sizeof(struct ofp_action_vlan_vid),
255                 NULL,
256                 set_vlan_vid
257         },
258         [OFPAT_SET_VLAN_PCP] = {
259                 sizeof(struct ofp_action_vlan_pcp),
260                 sizeof(struct ofp_action_vlan_pcp),
261                 NULL,
262                 set_vlan_pcp
263         },
264         [OFPAT_STRIP_VLAN] = {
265                 sizeof(struct ofp_action_header),
266                 sizeof(struct ofp_action_header),
267                 NULL,
268                 strip_vlan
269         },
270         [OFPAT_SET_DL_SRC] = {
271                 sizeof(struct ofp_action_dl_addr),
272                 sizeof(struct ofp_action_dl_addr),
273                 NULL,
274                 set_dl_addr
275         },
276         [OFPAT_SET_DL_DST] = {
277                 sizeof(struct ofp_action_dl_addr),
278                 sizeof(struct ofp_action_dl_addr),
279                 NULL,
280                 set_dl_addr
281         },
282         [OFPAT_SET_NW_SRC] = {
283                 sizeof(struct ofp_action_nw_addr),
284                 sizeof(struct ofp_action_nw_addr),
285                 NULL,
286                 set_nw_addr
287         },
288         [OFPAT_SET_NW_DST] = {
289                 sizeof(struct ofp_action_nw_addr),
290                 sizeof(struct ofp_action_nw_addr),
291                 NULL,
292                 set_nw_addr
293         },
294         [OFPAT_SET_TP_SRC] = {
295                 sizeof(struct ofp_action_tp_port),
296                 sizeof(struct ofp_action_tp_port),
297                 NULL,
298                 set_tp_port
299         },
300         [OFPAT_SET_TP_DST] = {
301                 sizeof(struct ofp_action_tp_port),
302                 sizeof(struct ofp_action_tp_port),
303                 NULL,
304                 set_tp_port
305         }
306         /* OFPAT_VENDOR is not here, since it would blow up the array size. */
307 };
308
309 /* Validate built-in OpenFlow actions.  Either returns ACT_VALIDATION_OK
310  * or an OFPET_BAD_ACTION error code. */
311 static uint16_t 
312 validate_ofpat(struct datapath *dp, const struct sw_flow_key *key, 
313                 const struct ofp_action_header *ah, uint16_t type, uint16_t len)
314 {
315         int ret = ACT_VALIDATION_OK;
316         const struct openflow_action *act = &of_actions[type];
317
318         if ((len < act->min_size) || (len > act->max_size)) 
319                 return OFPBAC_BAD_LEN;
320
321         if (act->validate) 
322                 ret = act->validate(dp, key, ah);
323
324         return ret;
325 }
326
327 /* Validate vendor-defined actions.  Either returns ACT_VALIDATION_OK
328  * or an OFPET_BAD_ACTION error code. */
329 static uint16_t 
330 validate_vendor(struct datapath *dp, const struct sw_flow_key *key, 
331                 const struct ofp_action_header *ah, uint16_t len)
332 {
333         struct ofp_action_vendor_header *avh;
334         int ret = ACT_VALIDATION_OK;
335
336         if (len < sizeof(struct ofp_action_vendor_header))
337                 return OFPBAC_BAD_LEN;
338
339         avh = (struct ofp_action_vendor_header *)ah;
340
341         switch(ntohl(avh->vendor)) {
342         case NX_VENDOR_ID: 
343                 ret = nx_validate_act(dp, key, (struct nx_action_header *)avh, len);
344                 break;
345
346         default:
347                 return OFPBAC_BAD_VENDOR;
348         }
349
350         return ret;
351 }
352
353 /* Validates a list of actions.  If a problem is found, a code for the
354  * OFPET_BAD_ACTION error type is returned.  If the action list validates, 
355  * ACT_VALIDATION_OK is returned. */
356 uint16_t 
357 validate_actions(struct datapath *dp, const struct sw_flow_key *key,
358                 const struct ofp_action_header *actions, size_t actions_len)
359 {
360         uint8_t *p = (uint8_t *)actions;
361         int err;
362
363         while (actions_len >= sizeof(struct ofp_action_header)) {
364                 struct ofp_action_header *ah = (struct ofp_action_header *)p;
365                 size_t len = ntohs(ah->len);
366                 uint16_t type;
367
368                 /* Make there's enough remaining data for the specified length
369                  * and that the action length is a multiple of 64 bits. */
370                 if ((actions_len < len) || (len % 8) != 0)
371                         return OFPBAC_BAD_LEN;
372
373                 type = ntohs(ah->type);
374                 if (type < ARRAY_SIZE(of_actions)) {
375                         err = validate_ofpat(dp, key, ah, type, len);
376                         if (err != ACT_VALIDATION_OK)
377                                 return err;
378                 } else if (type == OFPAT_VENDOR) {
379                         err = validate_vendor(dp, key, ah, len);
380                         if (err != ACT_VALIDATION_OK)
381                                 return err;
382                 } else 
383                         return OFPBAC_BAD_TYPE;
384
385                 p += len;
386                 actions_len -= len;
387         }
388
389         /* Check if there's any trailing garbage. */
390         if (actions_len != 0) 
391                 return OFPBAC_BAD_LEN;
392
393         return ACT_VALIDATION_OK;
394 }
395
396 /* Execute a built-in OpenFlow action against 'skb'. */
397 static struct sk_buff *
398 execute_ofpat(struct sk_buff *skb, struct sw_flow_key *key, 
399                 const struct ofp_action_header *ah, uint16_t type)
400 {
401         const struct openflow_action *act = &of_actions[type];
402
403         if (act->execute)  {
404                 if (!make_writable(&skb)) {
405                         if (net_ratelimit())
406                                 printk("make_writable failed\n");
407                         return skb;
408                 }
409                 skb = act->execute(skb, key, ah);
410         }
411
412         return skb;
413 }
414
415 /* Execute a vendor-defined action against 'skb'. */
416 static struct sk_buff *
417 execute_vendor(struct sk_buff *skb, const struct sw_flow_key *key, 
418                 const struct ofp_action_header *ah)
419 {
420         struct ofp_action_vendor_header *avh 
421                         = (struct ofp_action_vendor_header *)ah;
422
423         /* NB: If changes need to be made to the packet, a call should be
424          * made to make_writable or its equivalent first. */
425
426         switch(ntohl(avh->vendor)) {
427         case NX_VENDOR_ID: 
428                 skb = nx_execute_act(skb, key, (struct nx_action_header *)avh);
429                 break;
430
431         default:
432                 /* This should not be possible due to prior validation. */
433                 if (net_ratelimit())
434                         printk("attempt to execute action with unknown vendor: %#x\n", 
435                                         ntohl(avh->vendor));
436                 break;
437         }
438
439         return skb;
440 }
441
442 /* Execute a list of actions against 'skb'. */
443 void execute_actions(struct datapath *dp, struct sk_buff *skb,
444                      struct sw_flow_key *key,
445                      const struct ofp_action_header *actions, size_t actions_len,
446                      int ignore_no_fwd)
447 {
448         /* Every output action needs a separate clone of 'skb', but the common
449          * case is just a single output action, so that doing a clone and
450          * then freeing the original skbuff is wasteful.  So the following code
451          * is slightly obscure just to avoid that. */
452         int prev_port;
453         size_t max_len=0;        /* Initialze to make compiler happy */
454         uint8_t *p = (uint8_t *)actions;
455
456         prev_port = -1;
457
458         /* The action list was already validated, so we can be a bit looser
459          * in our sanity-checking. */
460         while (actions_len > 0) {
461                 struct ofp_action_header *ah = (struct ofp_action_header *)p;
462                 size_t len = htons(ah->len);
463
464                 WARN_ON_ONCE(skb_shared(skb));
465                 if (prev_port != -1) {
466                         do_output(dp, skb_clone(skb, GFP_ATOMIC),
467                                   max_len, prev_port, ignore_no_fwd);
468                         prev_port = -1;
469                 }
470
471                 if (likely(ah->type == htons(OFPAT_OUTPUT))) {
472                         struct ofp_action_output *oa = (struct ofp_action_output *)p;
473                         prev_port = ntohs(oa->port);
474                         max_len = ntohs(oa->max_len);
475                 } else {
476                         uint16_t type = ntohs(ah->type);
477
478                         if (type < ARRAY_SIZE(of_actions)) 
479                                 skb = execute_ofpat(skb, key, ah, type);
480                         else if (type == OFPAT_VENDOR) 
481                                 skb = execute_vendor(skb, key, ah);
482
483                         if (!skb) {
484                                 if (net_ratelimit())
485                                         printk("execute_actions lost skb\n");
486                                 return;
487                         }
488                 }
489
490                 p += len;
491                 actions_len -= len;
492         }
493         if (prev_port != -1)
494                 do_output(dp, skb, max_len, prev_port, ignore_no_fwd);
495         else
496                 kfree_skb(skb);
497 }
498
499 /* Utility functions. */
500
501 /* Makes '*pskb' writable, possibly copying it and setting '*pskb' to point to
502  * the copy.
503  * Returns 1 if successful, 0 on failure. */
504 int
505 make_writable(struct sk_buff **pskb)
506 {
507         struct sk_buff *skb = *pskb;
508         if (skb_shared(skb) || skb_cloned(skb)) {
509                 struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
510                 if (!nskb)
511                         return 0;
512                 kfree_skb(skb);
513                 *pskb = nskb;
514                 return 1;
515         } else {
516                 unsigned int hdr_len = (skb_transport_offset(skb)
517                                         + sizeof(struct tcphdr));
518                 return pskb_may_pull(skb, min(hdr_len, skb->len));
519         }
520 }