Merge branch 'mainstream'
[sliver-openvswitch.git] / datapath / flow_netlink.c
1 /*
2  * Copyright (c) 2007-2013 Nicira, Inc.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of version 2 of the GNU General Public
6  * License as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16  * 02110-1301, USA
17  */
18
19 #include "flow.h"
20 #include "datapath.h"
21 #include <linux/uaccess.h>
22 #include <linux/netdevice.h>
23 #include <linux/etherdevice.h>
24 #include <linux/if_ether.h>
25 #include <linux/if_vlan.h>
26 #include <net/llc_pdu.h>
27 #include <linux/kernel.h>
28 #include <linux/jhash.h>
29 #include <linux/jiffies.h>
30 #include <linux/llc.h>
31 #include <linux/module.h>
32 #include <linux/in.h>
33 #include <linux/rcupdate.h>
34 #include <linux/if_arp.h>
35 #include <linux/ip.h>
36 #include <linux/ipv6.h>
37 #include <linux/sctp.h>
38 #include <linux/tcp.h>
39 #include <linux/udp.h>
40 #include <linux/icmp.h>
41 #include <linux/icmpv6.h>
42 #include <linux/rculist.h>
43 #include <net/ip.h>
44 #include <net/ipv6.h>
45 #include <net/ndisc.h>
46
47 #include "flow_netlink.h"
48
49 static void update_range__(struct sw_flow_match *match,
50                            size_t offset, size_t size, bool is_mask)
51 {
52         struct sw_flow_key_range *range = NULL;
53         size_t start = rounddown(offset, sizeof(long));
54         size_t end = roundup(offset + size, sizeof(long));
55
56         if (!is_mask)
57                 range = &match->range;
58         else if (match->mask)
59                 range = &match->mask->range;
60
61         if (!range)
62                 return;
63
64         if (range->start == range->end) {
65                 range->start = start;
66                 range->end = end;
67                 return;
68         }
69
70         if (range->start > start)
71                 range->start = start;
72
73         if (range->end < end)
74                 range->end = end;
75 }
76
77 #define SW_FLOW_KEY_PUT(match, field, value, is_mask) \
78         do { \
79                 update_range__(match, offsetof(struct sw_flow_key, field),  \
80                                      sizeof((match)->key->field), is_mask); \
81                 if (is_mask) {                                              \
82                         if ((match)->mask)                                  \
83                                 (match)->mask->key.field = value;           \
84                 } else {                                                    \
85                         (match)->key->field = value;                        \
86                 }                                                           \
87         } while (0)
88
89 #define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \
90         do { \
91                 update_range__(match, offsetof(struct sw_flow_key, field),  \
92                                 len, is_mask);                              \
93                 if (is_mask) {                                              \
94                         if ((match)->mask)                                  \
95                                 memcpy(&(match)->mask->key.field, value_p, len);\
96                 } else {                                                    \
97                         memcpy(&(match)->key->field, value_p, len);         \
98                 }                                                           \
99         } while (0)
100
101 static u16 range_n_bytes(const struct sw_flow_key_range *range)
102 {
103         return range->end - range->start;
104 }
105
106 static bool match_validate(const struct sw_flow_match *match,
107                            u64 key_attrs, u64 mask_attrs)
108 {
109         u64 key_expected = 1ULL << OVS_KEY_ATTR_ETHERNET;
110         u64 mask_allowed = key_attrs;  /* At most allow all key attributes */
111
112         /* The following mask attributes allowed only if they
113          * pass the validation tests. */
114         mask_allowed &= ~((1ULL << OVS_KEY_ATTR_IPV4)
115                         | (1ULL << OVS_KEY_ATTR_IPV6)
116                         | (1ULL << OVS_KEY_ATTR_TCP)
117                         | (1ULL << OVS_KEY_ATTR_TCP_FLAGS)
118                         | (1ULL << OVS_KEY_ATTR_UDP)
119                         | (1ULL << OVS_KEY_ATTR_SCTP)
120                         | (1ULL << OVS_KEY_ATTR_ICMP)
121                         | (1ULL << OVS_KEY_ATTR_ICMPV6)
122                         | (1ULL << OVS_KEY_ATTR_ARP)
123                         | (1ULL << OVS_KEY_ATTR_ND));
124
125         /* Always allowed mask fields. */
126         mask_allowed |= ((1ULL << OVS_KEY_ATTR_TUNNEL)
127                        | (1ULL << OVS_KEY_ATTR_IN_PORT)
128                        | (1ULL << OVS_KEY_ATTR_ETHERTYPE));
129
130         /* Check key attributes. */
131         if (match->key->eth.type == htons(ETH_P_ARP)
132                         || match->key->eth.type == htons(ETH_P_RARP)) {
133                 key_expected |= 1ULL << OVS_KEY_ATTR_ARP;
134                 if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
135                         mask_allowed |= 1ULL << OVS_KEY_ATTR_ARP;
136         }
137
138         if (match->key->eth.type == htons(ETH_P_IP)) {
139                 key_expected |= 1ULL << OVS_KEY_ATTR_IPV4;
140                 if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
141                         mask_allowed |= 1ULL << OVS_KEY_ATTR_IPV4;
142
143                 if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
144                         if (match->key->ip.proto == IPPROTO_UDP) {
145                                 key_expected |= 1ULL << OVS_KEY_ATTR_UDP;
146                                 if (match->mask && (match->mask->key.ip.proto == 0xff))
147                                         mask_allowed |= 1ULL << OVS_KEY_ATTR_UDP;
148                         }
149
150                         if (match->key->ip.proto == IPPROTO_SCTP) {
151                                 key_expected |= 1ULL << OVS_KEY_ATTR_SCTP;
152                                 if (match->mask && (match->mask->key.ip.proto == 0xff))
153                                         mask_allowed |= 1ULL << OVS_KEY_ATTR_SCTP;
154                         }
155
156                         if (match->key->ip.proto == IPPROTO_TCP) {
157                                 key_expected |= 1ULL << OVS_KEY_ATTR_TCP;
158                                 key_expected |= 1ULL << OVS_KEY_ATTR_TCP_FLAGS;
159                                 if (match->mask && (match->mask->key.ip.proto == 0xff)) {
160                                         mask_allowed |= 1ULL << OVS_KEY_ATTR_TCP;
161                                         mask_allowed |= 1ULL << OVS_KEY_ATTR_TCP_FLAGS;
162                                 }
163                         }
164
165                         if (match->key->ip.proto == IPPROTO_ICMP) {
166                                 key_expected |= 1ULL << OVS_KEY_ATTR_ICMP;
167                                 if (match->mask && (match->mask->key.ip.proto == 0xff))
168                                         mask_allowed |= 1ULL << OVS_KEY_ATTR_ICMP;
169                         }
170                 }
171         }
172
173         if (match->key->eth.type == htons(ETH_P_IPV6)) {
174                 key_expected |= 1ULL << OVS_KEY_ATTR_IPV6;
175                 if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
176                         mask_allowed |= 1ULL << OVS_KEY_ATTR_IPV6;
177
178                 if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
179                         if (match->key->ip.proto == IPPROTO_UDP) {
180                                 key_expected |= 1ULL << OVS_KEY_ATTR_UDP;
181                                 if (match->mask && (match->mask->key.ip.proto == 0xff))
182                                         mask_allowed |= 1ULL << OVS_KEY_ATTR_UDP;
183                         }
184
185                         if (match->key->ip.proto == IPPROTO_SCTP) {
186                                 key_expected |= 1ULL << OVS_KEY_ATTR_SCTP;
187                                 if (match->mask && (match->mask->key.ip.proto == 0xff))
188                                         mask_allowed |= 1ULL << OVS_KEY_ATTR_SCTP;
189                         }
190
191                         if (match->key->ip.proto == IPPROTO_TCP) {
192                                 key_expected |= 1ULL << OVS_KEY_ATTR_TCP;
193                                 key_expected |= 1ULL << OVS_KEY_ATTR_TCP_FLAGS;
194                                 if (match->mask && (match->mask->key.ip.proto == 0xff)) {
195                                         mask_allowed |= 1ULL << OVS_KEY_ATTR_TCP;
196                                         mask_allowed |= 1ULL << OVS_KEY_ATTR_TCP_FLAGS;
197                                 }
198                         }
199
200                         if (match->key->ip.proto == IPPROTO_ICMPV6) {
201                                 key_expected |= 1ULL << OVS_KEY_ATTR_ICMPV6;
202                                 if (match->mask && (match->mask->key.ip.proto == 0xff))
203                                         mask_allowed |= 1ULL << OVS_KEY_ATTR_ICMPV6;
204
205                                 if (match->key->ipv6.tp.src ==
206                                                 htons(NDISC_NEIGHBOUR_SOLICITATION) ||
207                                     match->key->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
208                                         key_expected |= 1ULL << OVS_KEY_ATTR_ND;
209                                         if (match->mask && (match->mask->key.ipv6.tp.src == htons(0xffff)))
210                                                 mask_allowed |= 1ULL << OVS_KEY_ATTR_ND;
211                                 }
212                         }
213                 }
214         }
215
216         if ((key_attrs & key_expected) != key_expected) {
217                 /* Key attributes check failed. */
218                 OVS_NLERR("Missing expected key attributes (key_attrs=%llx, expected=%llx).\n",
219                                 key_attrs, key_expected);
220                 return false;
221         }
222
223         if ((mask_attrs & mask_allowed) != mask_attrs) {
224                 /* Mask attributes check failed. */
225                 OVS_NLERR("Contain more than allowed mask fields (mask_attrs=%llx, mask_allowed=%llx).\n",
226                                 mask_attrs, mask_allowed);
227                 return false;
228         }
229
230         return true;
231 }
232
233 /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute.  */
234 static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
235         [OVS_KEY_ATTR_ENCAP] = -1,
236         [OVS_KEY_ATTR_PRIORITY] = sizeof(u32),
237         [OVS_KEY_ATTR_IN_PORT] = sizeof(u32),
238         [OVS_KEY_ATTR_SKB_MARK] = sizeof(u32),
239         [OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet),
240         [OVS_KEY_ATTR_VLAN] = sizeof(__be16),
241         [OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16),
242         [OVS_KEY_ATTR_IPV4] = sizeof(struct ovs_key_ipv4),
243         [OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6),
244         [OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp),
245         [OVS_KEY_ATTR_TCP_FLAGS] = sizeof(__be16),
246         [OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp),
247         [OVS_KEY_ATTR_SCTP] = sizeof(struct ovs_key_sctp),
248         [OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp),
249         [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6),
250         [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp),
251         [OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd),
252         [OVS_KEY_ATTR_TUNNEL] = -1,
253 };
254
255 static bool is_all_zero(const u8 *fp, size_t size)
256 {
257         int i;
258
259         if (!fp)
260                 return false;
261
262         for (i = 0; i < size; i++)
263                 if (fp[i])
264                         return false;
265
266         return true;
267 }
268
269 static int __parse_flow_nlattrs(const struct nlattr *attr,
270                                 const struct nlattr *a[],
271                                 u64 *attrsp, bool nz)
272 {
273         const struct nlattr *nla;
274         u64 attrs;
275         int rem;
276
277         attrs = *attrsp;
278         nla_for_each_nested(nla, attr, rem) {
279                 u16 type = nla_type(nla);
280                 int expected_len;
281
282                 if (type > OVS_KEY_ATTR_MAX) {
283                         OVS_NLERR("Unknown key attribute (type=%d, max=%d).\n",
284                                   type, OVS_KEY_ATTR_MAX);
285                         return -EINVAL;
286                 }
287
288                 if (attrs & (1ULL << type)) {
289                         OVS_NLERR("Duplicate key attribute (type %d).\n", type);
290                         return -EINVAL;
291                 }
292
293                 expected_len = ovs_key_lens[type];
294                 if (nla_len(nla) != expected_len && expected_len != -1) {
295                         OVS_NLERR("Key attribute has unexpected length (type=%d"
296                                   ", length=%d, expected=%d).\n", type,
297                                   nla_len(nla), expected_len);
298                         return -EINVAL;
299                 }
300
301                 if (!nz || !is_all_zero(nla_data(nla), expected_len)) {
302                         attrs |= 1ULL << type;
303                         a[type] = nla;
304                 }
305         }
306         if (rem) {
307                 OVS_NLERR("Message has %d unknown bytes.\n", rem);
308                 return -EINVAL;
309         }
310
311         *attrsp = attrs;
312         return 0;
313 }
314
315 static int parse_flow_mask_nlattrs(const struct nlattr *attr,
316                                    const struct nlattr *a[], u64 *attrsp)
317 {
318         return __parse_flow_nlattrs(attr, a, attrsp, true);
319 }
320
321 static int parse_flow_nlattrs(const struct nlattr *attr,
322                               const struct nlattr *a[], u64 *attrsp)
323 {
324         return __parse_flow_nlattrs(attr, a, attrsp, false);
325 }
326
327 static int ipv4_tun_from_nlattr(const struct nlattr *attr,
328                                 struct sw_flow_match *match, bool is_mask)
329 {
330         struct nlattr *a;
331         int rem;
332         bool ttl = false;
333         __be16 tun_flags = 0;
334
335         nla_for_each_nested(a, attr, rem) {
336                 int type = nla_type(a);
337                 static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = {
338                         [OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64),
339                         [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32),
340                         [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = sizeof(u32),
341                         [OVS_TUNNEL_KEY_ATTR_TOS] = 1,
342                         [OVS_TUNNEL_KEY_ATTR_TTL] = 1,
343                         [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0,
344                         [OVS_TUNNEL_KEY_ATTR_CSUM] = 0,
345                 };
346
347                 if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
348                         OVS_NLERR("Unknown IPv4 tunnel attribute (type=%d, max=%d).\n",
349                         type, OVS_TUNNEL_KEY_ATTR_MAX);
350                         return -EINVAL;
351                 }
352
353                 if (ovs_tunnel_key_lens[type] != nla_len(a)) {
354                         OVS_NLERR("IPv4 tunnel attribute type has unexpected "
355                                   " length (type=%d, length=%d, expected=%d).\n",
356                                   type, nla_len(a), ovs_tunnel_key_lens[type]);
357                         return -EINVAL;
358                 }
359
360                 switch (type) {
361                 case OVS_TUNNEL_KEY_ATTR_ID:
362                         SW_FLOW_KEY_PUT(match, tun_key.tun_id,
363                                         nla_get_be64(a), is_mask);
364                         tun_flags |= TUNNEL_KEY;
365                         break;
366                 case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
367                         SW_FLOW_KEY_PUT(match, tun_key.ipv4_src,
368                                         nla_get_be32(a), is_mask);
369                         break;
370                 case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
371                         SW_FLOW_KEY_PUT(match, tun_key.ipv4_dst,
372                                         nla_get_be32(a), is_mask);
373                         break;
374                 case OVS_TUNNEL_KEY_ATTR_TOS:
375                         SW_FLOW_KEY_PUT(match, tun_key.ipv4_tos,
376                                         nla_get_u8(a), is_mask);
377                         break;
378                 case OVS_TUNNEL_KEY_ATTR_TTL:
379                         SW_FLOW_KEY_PUT(match, tun_key.ipv4_ttl,
380                                         nla_get_u8(a), is_mask);
381                         ttl = true;
382                         break;
383                 case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT:
384                         tun_flags |= TUNNEL_DONT_FRAGMENT;
385                         break;
386                 case OVS_TUNNEL_KEY_ATTR_CSUM:
387                         tun_flags |= TUNNEL_CSUM;
388                         break;
389                 default:
390                         return -EINVAL;
391                 }
392         }
393
394         SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask);
395
396         if (rem > 0) {
397                 OVS_NLERR("IPv4 tunnel attribute has %d unknown bytes.\n", rem);
398                 return -EINVAL;
399         }
400
401         if (!is_mask) {
402                 if (!match->key->tun_key.ipv4_dst) {
403                         OVS_NLERR("IPv4 tunnel destination address is zero.\n");
404                         return -EINVAL;
405                 }
406
407                 if (!ttl) {
408                         OVS_NLERR("IPv4 tunnel TTL not specified.\n");
409                         return -EINVAL;
410                 }
411         }
412
413         return 0;
414 }
415
416 static int ipv4_tun_to_nlattr(struct sk_buff *skb,
417                               const struct ovs_key_ipv4_tunnel *tun_key,
418                               const struct ovs_key_ipv4_tunnel *output)
419 {
420         struct nlattr *nla;
421
422         nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL);
423         if (!nla)
424                 return -EMSGSIZE;
425
426         if (output->tun_flags & TUNNEL_KEY &&
427             nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id))
428                 return -EMSGSIZE;
429         if (output->ipv4_src &&
430                 nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, output->ipv4_src))
431                 return -EMSGSIZE;
432         if (output->ipv4_dst &&
433                 nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, output->ipv4_dst))
434                 return -EMSGSIZE;
435         if (output->ipv4_tos &&
436                 nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->ipv4_tos))
437                 return -EMSGSIZE;
438         if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ipv4_ttl))
439                 return -EMSGSIZE;
440         if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) &&
441                 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
442                 return -EMSGSIZE;
443         if ((output->tun_flags & TUNNEL_CSUM) &&
444                 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
445                 return -EMSGSIZE;
446
447         nla_nest_end(skb, nla);
448         return 0;
449 }
450
451
452 static int metadata_from_nlattrs(struct sw_flow_match *match,  u64 *attrs,
453                                  const struct nlattr **a, bool is_mask)
454 {
455         if (*attrs & (1ULL << OVS_KEY_ATTR_PRIORITY)) {
456                 SW_FLOW_KEY_PUT(match, phy.priority,
457                           nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask);
458                 *attrs &= ~(1ULL << OVS_KEY_ATTR_PRIORITY);
459         }
460
461         if (*attrs & (1ULL << OVS_KEY_ATTR_IN_PORT)) {
462                 u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]);
463
464                 if (is_mask)
465                         in_port = 0xffffffff; /* Always exact match in_port. */
466                 else if (in_port >= DP_MAX_PORTS)
467                         return -EINVAL;
468
469                 SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask);
470                 *attrs &= ~(1ULL << OVS_KEY_ATTR_IN_PORT);
471         } else if (!is_mask) {
472                 SW_FLOW_KEY_PUT(match, phy.in_port, DP_MAX_PORTS, is_mask);
473         }
474
475         if (*attrs & (1ULL << OVS_KEY_ATTR_SKB_MARK)) {
476                 uint32_t mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]);
477
478                 SW_FLOW_KEY_PUT(match, phy.skb_mark, mark, is_mask);
479                 *attrs &= ~(1ULL << OVS_KEY_ATTR_SKB_MARK);
480         }
481         if (*attrs & (1ULL << OVS_KEY_ATTR_TUNNEL)) {
482                 if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
483                                          is_mask))
484                         return -EINVAL;
485                 *attrs &= ~(1ULL << OVS_KEY_ATTR_TUNNEL);
486         }
487         return 0;
488 }
489
490 static int ovs_key_from_nlattrs(struct sw_flow_match *match,  u64 attrs,
491                                 const struct nlattr **a, bool is_mask)
492 {
493         int err;
494         u64 orig_attrs = attrs;
495
496         err = metadata_from_nlattrs(match, &attrs, a, is_mask);
497         if (err)
498                 return err;
499
500         if (attrs & (1ULL << OVS_KEY_ATTR_ETHERNET)) {
501                 const struct ovs_key_ethernet *eth_key;
502
503                 eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]);
504                 SW_FLOW_KEY_MEMCPY(match, eth.src,
505                                 eth_key->eth_src, ETH_ALEN, is_mask);
506                 SW_FLOW_KEY_MEMCPY(match, eth.dst,
507                                 eth_key->eth_dst, ETH_ALEN, is_mask);
508                 attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERNET);
509         }
510
511         if (attrs & (1ULL << OVS_KEY_ATTR_VLAN)) {
512                 __be16 tci;
513
514                 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
515                 if (!(tci & htons(VLAN_TAG_PRESENT))) {
516                         if (is_mask)
517                                 OVS_NLERR("VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit.\n");
518                         else
519                                 OVS_NLERR("VLAN TCI does not have VLAN_TAG_PRESENT bit set.\n");
520
521                         return -EINVAL;
522                 }
523
524                 SW_FLOW_KEY_PUT(match, eth.tci, tci, is_mask);
525                 attrs &= ~(1ULL << OVS_KEY_ATTR_VLAN);
526         } else if (!is_mask)
527                 SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true);
528
529         if (attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE)) {
530                 __be16 eth_type;
531
532                 eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
533                 if (is_mask) {
534                         /* Always exact match EtherType. */
535                         eth_type = htons(0xffff);
536                 } else if (ntohs(eth_type) < ETH_P_802_3_MIN) {
537                         OVS_NLERR("EtherType is less than minimum (type=%x, min=%x).\n",
538                                         ntohs(eth_type), ETH_P_802_3_MIN);
539                         return -EINVAL;
540                 }
541
542                 SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask);
543                 attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERTYPE);
544         } else if (!is_mask) {
545                 SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask);
546         }
547
548         if (attrs & (1ULL << OVS_KEY_ATTR_IPV4)) {
549                 const struct ovs_key_ipv4 *ipv4_key;
550
551                 ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]);
552                 if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) {
553                         OVS_NLERR("Unknown IPv4 fragment type (value=%d, max=%d).\n",
554                                 ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX);
555                         return -EINVAL;
556                 }
557                 SW_FLOW_KEY_PUT(match, ip.proto,
558                                 ipv4_key->ipv4_proto, is_mask);
559                 SW_FLOW_KEY_PUT(match, ip.tos,
560                                 ipv4_key->ipv4_tos, is_mask);
561                 SW_FLOW_KEY_PUT(match, ip.ttl,
562                                 ipv4_key->ipv4_ttl, is_mask);
563                 SW_FLOW_KEY_PUT(match, ip.frag,
564                                 ipv4_key->ipv4_frag, is_mask);
565                 SW_FLOW_KEY_PUT(match, ipv4.addr.src,
566                                 ipv4_key->ipv4_src, is_mask);
567                 SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
568                                 ipv4_key->ipv4_dst, is_mask);
569                 attrs &= ~(1ULL << OVS_KEY_ATTR_IPV4);
570         }
571
572         if (attrs & (1ULL << OVS_KEY_ATTR_IPV6)) {
573                 const struct ovs_key_ipv6 *ipv6_key;
574
575                 ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]);
576                 if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) {
577                         OVS_NLERR("Unknown IPv6 fragment type (value=%d, max=%d).\n",
578                                 ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX);
579                         return -EINVAL;
580                 }
581                 SW_FLOW_KEY_PUT(match, ipv6.label,
582                                 ipv6_key->ipv6_label, is_mask);
583                 SW_FLOW_KEY_PUT(match, ip.proto,
584                                 ipv6_key->ipv6_proto, is_mask);
585                 SW_FLOW_KEY_PUT(match, ip.tos,
586                                 ipv6_key->ipv6_tclass, is_mask);
587                 SW_FLOW_KEY_PUT(match, ip.ttl,
588                                 ipv6_key->ipv6_hlimit, is_mask);
589                 SW_FLOW_KEY_PUT(match, ip.frag,
590                                 ipv6_key->ipv6_frag, is_mask);
591                 SW_FLOW_KEY_MEMCPY(match, ipv6.addr.src,
592                                 ipv6_key->ipv6_src,
593                                 sizeof(match->key->ipv6.addr.src),
594                                 is_mask);
595                 SW_FLOW_KEY_MEMCPY(match, ipv6.addr.dst,
596                                 ipv6_key->ipv6_dst,
597                                 sizeof(match->key->ipv6.addr.dst),
598                                 is_mask);
599
600                 attrs &= ~(1ULL << OVS_KEY_ATTR_IPV6);
601         }
602
603         if (attrs & (1ULL << OVS_KEY_ATTR_ARP)) {
604                 const struct ovs_key_arp *arp_key;
605
606                 arp_key = nla_data(a[OVS_KEY_ATTR_ARP]);
607                 if (!is_mask && (arp_key->arp_op & htons(0xff00))) {
608                         OVS_NLERR("Unknown ARP opcode (opcode=%d).\n",
609                                   arp_key->arp_op);
610                         return -EINVAL;
611                 }
612
613                 SW_FLOW_KEY_PUT(match, ipv4.addr.src,
614                                 arp_key->arp_sip, is_mask);
615                 SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
616                         arp_key->arp_tip, is_mask);
617                 SW_FLOW_KEY_PUT(match, ip.proto,
618                                 ntohs(arp_key->arp_op), is_mask);
619                 SW_FLOW_KEY_MEMCPY(match, ipv4.arp.sha,
620                                 arp_key->arp_sha, ETH_ALEN, is_mask);
621                 SW_FLOW_KEY_MEMCPY(match, ipv4.arp.tha,
622                                 arp_key->arp_tha, ETH_ALEN, is_mask);
623
624                 attrs &= ~(1ULL << OVS_KEY_ATTR_ARP);
625         }
626
627         if (attrs & (1ULL << OVS_KEY_ATTR_TCP)) {
628                 const struct ovs_key_tcp *tcp_key;
629
630                 tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
631                 if (orig_attrs & (1ULL << OVS_KEY_ATTR_IPV4)) {
632                         SW_FLOW_KEY_PUT(match, ipv4.tp.src,
633                                         tcp_key->tcp_src, is_mask);
634                         SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
635                                         tcp_key->tcp_dst, is_mask);
636                 } else {
637                         SW_FLOW_KEY_PUT(match, ipv6.tp.src,
638                                         tcp_key->tcp_src, is_mask);
639                         SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
640                                         tcp_key->tcp_dst, is_mask);
641                 }
642                 attrs &= ~(1ULL << OVS_KEY_ATTR_TCP);
643         }
644
645         if (attrs & (1ULL << OVS_KEY_ATTR_TCP_FLAGS)) {
646                 if (orig_attrs & (1ULL << OVS_KEY_ATTR_IPV4)) {
647                         SW_FLOW_KEY_PUT(match, ipv4.tp.flags,
648                                         nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]),
649                                         is_mask);
650                 } else {
651                         SW_FLOW_KEY_PUT(match, ipv6.tp.flags,
652                                         nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]),
653                                         is_mask);
654                 }
655                 attrs &= ~(1ULL << OVS_KEY_ATTR_TCP_FLAGS);
656         }
657
658         if (attrs & (1ULL << OVS_KEY_ATTR_UDP)) {
659                 const struct ovs_key_udp *udp_key;
660
661                 udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
662                 if (orig_attrs & (1ULL << OVS_KEY_ATTR_IPV4)) {
663                         SW_FLOW_KEY_PUT(match, ipv4.tp.src,
664                                         udp_key->udp_src, is_mask);
665                         SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
666                                         udp_key->udp_dst, is_mask);
667                 } else {
668                         SW_FLOW_KEY_PUT(match, ipv6.tp.src,
669                                         udp_key->udp_src, is_mask);
670                         SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
671                                         udp_key->udp_dst, is_mask);
672                 }
673                 attrs &= ~(1ULL << OVS_KEY_ATTR_UDP);
674         }
675
676         if (attrs & (1ULL << OVS_KEY_ATTR_SCTP)) {
677                 const struct ovs_key_sctp *sctp_key;
678
679                 sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]);
680                 if (orig_attrs & (1ULL << OVS_KEY_ATTR_IPV4)) {
681                         SW_FLOW_KEY_PUT(match, ipv4.tp.src,
682                                         sctp_key->sctp_src, is_mask);
683                         SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
684                                         sctp_key->sctp_dst, is_mask);
685                 } else {
686                         SW_FLOW_KEY_PUT(match, ipv6.tp.src,
687                                         sctp_key->sctp_src, is_mask);
688                         SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
689                                         sctp_key->sctp_dst, is_mask);
690                 }
691                 attrs &= ~(1ULL << OVS_KEY_ATTR_SCTP);
692         }
693
694         if (attrs & (1ULL << OVS_KEY_ATTR_ICMP)) {
695                 const struct ovs_key_icmp *icmp_key;
696
697                 icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]);
698                 SW_FLOW_KEY_PUT(match, ipv4.tp.src,
699                                 htons(icmp_key->icmp_type), is_mask);
700                 SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
701                                 htons(icmp_key->icmp_code), is_mask);
702                 attrs &= ~(1ULL << OVS_KEY_ATTR_ICMP);
703         }
704
705         if (attrs & (1ULL << OVS_KEY_ATTR_ICMPV6)) {
706                 const struct ovs_key_icmpv6 *icmpv6_key;
707
708                 icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]);
709                 SW_FLOW_KEY_PUT(match, ipv6.tp.src,
710                                 htons(icmpv6_key->icmpv6_type), is_mask);
711                 SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
712                                 htons(icmpv6_key->icmpv6_code), is_mask);
713                 attrs &= ~(1ULL << OVS_KEY_ATTR_ICMPV6);
714         }
715
716         if (attrs & (1ULL << OVS_KEY_ATTR_ND)) {
717                 const struct ovs_key_nd *nd_key;
718
719                 nd_key = nla_data(a[OVS_KEY_ATTR_ND]);
720                 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.target,
721                         nd_key->nd_target,
722                         sizeof(match->key->ipv6.nd.target),
723                         is_mask);
724                 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.sll,
725                         nd_key->nd_sll, ETH_ALEN, is_mask);
726                 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.tll,
727                                 nd_key->nd_tll, ETH_ALEN, is_mask);
728                 attrs &= ~(1ULL << OVS_KEY_ATTR_ND);
729         }
730
731         if (attrs != 0)
732                 return -EINVAL;
733
734         return 0;
735 }
736
737 static void sw_flow_mask_set(struct sw_flow_mask *mask,
738                              struct sw_flow_key_range *range, u8 val)
739 {
740         u8 *m = (u8 *)&mask->key + range->start;
741
742         mask->range = *range;
743         memset(m, val, range_n_bytes(range));
744 }
745
746 /**
747  * ovs_nla_get_match - parses Netlink attributes into a flow key and
748  * mask. In case the 'mask' is NULL, the flow is treated as exact match
749  * flow. Otherwise, it is treated as a wildcarded flow, except the mask
750  * does not include any don't care bit.
751  * @match: receives the extracted flow match information.
752  * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
753  * sequence. The fields should of the packet that triggered the creation
754  * of this flow.
755  * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink
756  * attribute specifies the mask field of the wildcarded flow.
757  */
758 int ovs_nla_get_match(struct sw_flow_match *match,
759                       const struct nlattr *key,
760                       const struct nlattr *mask)
761 {
762         const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
763         const struct nlattr *encap;
764         u64 key_attrs = 0;
765         u64 mask_attrs = 0;
766         bool encap_valid = false;
767         int err;
768
769         err = parse_flow_nlattrs(key, a, &key_attrs);
770         if (err)
771                 return err;
772
773         if ((key_attrs & (1ULL << OVS_KEY_ATTR_ETHERNET)) &&
774             (key_attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE)) &&
775             (nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q))) {
776                 __be16 tci;
777
778                 if (!((key_attrs & (1ULL << OVS_KEY_ATTR_VLAN)) &&
779                       (key_attrs & (1ULL << OVS_KEY_ATTR_ENCAP)))) {
780                         OVS_NLERR("Invalid Vlan frame.\n");
781                         return -EINVAL;
782                 }
783
784                 key_attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERTYPE);
785                 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
786                 encap = a[OVS_KEY_ATTR_ENCAP];
787                 key_attrs &= ~(1ULL << OVS_KEY_ATTR_ENCAP);
788                 encap_valid = true;
789
790                 if (tci & htons(VLAN_TAG_PRESENT)) {
791                         err = parse_flow_nlattrs(encap, a, &key_attrs);
792                         if (err)
793                                 return err;
794                 } else if (!tci) {
795                         /* Corner case for truncated 802.1Q header. */
796                         if (nla_len(encap)) {
797                                 OVS_NLERR("Truncated 802.1Q header has non-zero encap attribute.\n");
798                                 return -EINVAL;
799                         }
800                 } else {
801                         OVS_NLERR("Encap attribute is set for a non-VLAN frame.\n");
802                         return  -EINVAL;
803                 }
804         }
805
806         err = ovs_key_from_nlattrs(match, key_attrs, a, false);
807         if (err)
808                 return err;
809
810         if (mask) {
811                 err = parse_flow_mask_nlattrs(mask, a, &mask_attrs);
812                 if (err)
813                         return err;
814
815                 if (mask_attrs & 1ULL << OVS_KEY_ATTR_ENCAP)  {
816                         __be16 eth_type = 0;
817                         __be16 tci = 0;
818
819                         if (!encap_valid) {
820                                 OVS_NLERR("Encap mask attribute is set for non-VLAN frame.\n");
821                                 return  -EINVAL;
822                         }
823
824                         mask_attrs &= ~(1ULL << OVS_KEY_ATTR_ENCAP);
825                         if (a[OVS_KEY_ATTR_ETHERTYPE])
826                                 eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
827
828                         if (eth_type == htons(0xffff)) {
829                                 mask_attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERTYPE);
830                                 encap = a[OVS_KEY_ATTR_ENCAP];
831                                 err = parse_flow_mask_nlattrs(encap, a, &mask_attrs);
832                         } else {
833                                 OVS_NLERR("VLAN frames must have an exact match on the TPID (mask=%x).\n",
834                                                 ntohs(eth_type));
835                                 return -EINVAL;
836                         }
837
838                         if (a[OVS_KEY_ATTR_VLAN])
839                                 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
840
841                         if (!(tci & htons(VLAN_TAG_PRESENT))) {
842                                 OVS_NLERR("VLAN tag present bit must have an exact match (tci_mask=%x).\n", ntohs(tci));
843                                 return -EINVAL;
844                         }
845                 }
846
847                 err = ovs_key_from_nlattrs(match, mask_attrs, a, true);
848                 if (err)
849                         return err;
850         } else {
851                 /* Populate exact match flow's key mask. */
852                 if (match->mask)
853                         sw_flow_mask_set(match->mask, &match->range, 0xff);
854         }
855
856         if (!match_validate(match, key_attrs, mask_attrs))
857                 return -EINVAL;
858
859         return 0;
860 }
861
862 /**
863  * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key.
864  * @flow: Receives extracted in_port, priority, tun_key and skb_mark.
865  * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
866  * sequence.
867  *
868  * This parses a series of Netlink attributes that form a flow key, which must
869  * take the same form accepted by flow_from_nlattrs(), but only enough of it to
870  * get the metadata, that is, the parts of the flow key that cannot be
871  * extracted from the packet itself.
872  */
873
874 int ovs_nla_get_flow_metadata(struct sw_flow *flow,
875                               const struct nlattr *attr)
876 {
877         struct ovs_key_ipv4_tunnel *tun_key = &flow->key.tun_key;
878         const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
879         u64 attrs = 0;
880         int err;
881         struct sw_flow_match match;
882
883         flow->key.phy.in_port = DP_MAX_PORTS;
884         flow->key.phy.priority = 0;
885         flow->key.phy.skb_mark = 0;
886         memset(tun_key, 0, sizeof(flow->key.tun_key));
887
888         err = parse_flow_nlattrs(attr, a, &attrs);
889         if (err)
890                 return -EINVAL;
891
892         memset(&match, 0, sizeof(match));
893         match.key = &flow->key;
894
895         err = metadata_from_nlattrs(&match, &attrs, a, false);
896         if (err)
897                 return err;
898
899         return 0;
900 }
901
902 int ovs_nla_put_flow(const struct sw_flow_key *swkey,
903                      const struct sw_flow_key *output, struct sk_buff *skb)
904 {
905         struct ovs_key_ethernet *eth_key;
906         struct nlattr *nla, *encap;
907         bool is_mask = (swkey != output);
908
909         if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
910                 goto nla_put_failure;
911
912         if ((swkey->tun_key.ipv4_dst || is_mask) &&
913             ipv4_tun_to_nlattr(skb, &swkey->tun_key, &output->tun_key))
914                 goto nla_put_failure;
915
916         if (swkey->phy.in_port == DP_MAX_PORTS) {
917                 if (is_mask && (output->phy.in_port == 0xffff))
918                         if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff))
919                                 goto nla_put_failure;
920         } else {
921                 u16 upper_u16;
922                 upper_u16 = !is_mask ? 0 : 0xffff;
923
924                 if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT,
925                                 (upper_u16 << 16) | output->phy.in_port))
926                         goto nla_put_failure;
927         }
928
929         if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark))
930                 goto nla_put_failure;
931
932         nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
933         if (!nla)
934                 goto nla_put_failure;
935
936         eth_key = nla_data(nla);
937         memcpy(eth_key->eth_src, output->eth.src, ETH_ALEN);
938         memcpy(eth_key->eth_dst, output->eth.dst, ETH_ALEN);
939
940         if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) {
941                 __be16 eth_type;
942                 eth_type = !is_mask ? htons(ETH_P_8021Q) : htons(0xffff);
943                 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) ||
944                     nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.tci))
945                         goto nla_put_failure;
946                 encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
947                 if (!swkey->eth.tci)
948                         goto unencap;
949         } else
950                 encap = NULL;
951
952         if (swkey->eth.type == htons(ETH_P_802_2)) {
953                 /*
954                  * Ethertype 802.2 is represented in the netlink with omitted
955                  * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and
956                  * 0xffff in the mask attribute.  Ethertype can also
957                  * be wildcarded.
958                  */
959                 if (is_mask && output->eth.type)
960                         if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE,
961                                                 output->eth.type))
962                                 goto nla_put_failure;
963                 goto unencap;
964         }
965
966         if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type))
967                 goto nla_put_failure;
968
969         if (swkey->eth.type == htons(ETH_P_IP)) {
970                 struct ovs_key_ipv4 *ipv4_key;
971
972                 nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key));
973                 if (!nla)
974                         goto nla_put_failure;
975                 ipv4_key = nla_data(nla);
976                 ipv4_key->ipv4_src = output->ipv4.addr.src;
977                 ipv4_key->ipv4_dst = output->ipv4.addr.dst;
978                 ipv4_key->ipv4_proto = output->ip.proto;
979                 ipv4_key->ipv4_tos = output->ip.tos;
980                 ipv4_key->ipv4_ttl = output->ip.ttl;
981                 ipv4_key->ipv4_frag = output->ip.frag;
982         } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
983                 struct ovs_key_ipv6 *ipv6_key;
984
985                 nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key));
986                 if (!nla)
987                         goto nla_put_failure;
988                 ipv6_key = nla_data(nla);
989                 memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src,
990                                 sizeof(ipv6_key->ipv6_src));
991                 memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst,
992                                 sizeof(ipv6_key->ipv6_dst));
993                 ipv6_key->ipv6_label = output->ipv6.label;
994                 ipv6_key->ipv6_proto = output->ip.proto;
995                 ipv6_key->ipv6_tclass = output->ip.tos;
996                 ipv6_key->ipv6_hlimit = output->ip.ttl;
997                 ipv6_key->ipv6_frag = output->ip.frag;
998         } else if (swkey->eth.type == htons(ETH_P_ARP) ||
999                    swkey->eth.type == htons(ETH_P_RARP)) {
1000                 struct ovs_key_arp *arp_key;
1001
1002                 nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key));
1003                 if (!nla)
1004                         goto nla_put_failure;
1005                 arp_key = nla_data(nla);
1006                 memset(arp_key, 0, sizeof(struct ovs_key_arp));
1007                 arp_key->arp_sip = output->ipv4.addr.src;
1008                 arp_key->arp_tip = output->ipv4.addr.dst;
1009                 arp_key->arp_op = htons(output->ip.proto);
1010                 memcpy(arp_key->arp_sha, output->ipv4.arp.sha, ETH_ALEN);
1011                 memcpy(arp_key->arp_tha, output->ipv4.arp.tha, ETH_ALEN);
1012         }
1013
1014         if ((swkey->eth.type == htons(ETH_P_IP) ||
1015              swkey->eth.type == htons(ETH_P_IPV6)) &&
1016              swkey->ip.frag != OVS_FRAG_TYPE_LATER) {
1017
1018                 if (swkey->ip.proto == IPPROTO_TCP) {
1019                         struct ovs_key_tcp *tcp_key;
1020
1021                         nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key));
1022                         if (!nla)
1023                                 goto nla_put_failure;
1024                         tcp_key = nla_data(nla);
1025                         if (swkey->eth.type == htons(ETH_P_IP)) {
1026                                 tcp_key->tcp_src = output->ipv4.tp.src;
1027                                 tcp_key->tcp_dst = output->ipv4.tp.dst;
1028                                 if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS,
1029                                                  output->ipv4.tp.flags))
1030                                         goto nla_put_failure;
1031                         } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
1032                                 tcp_key->tcp_src = output->ipv6.tp.src;
1033                                 tcp_key->tcp_dst = output->ipv6.tp.dst;
1034                                 if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS,
1035                                                  output->ipv6.tp.flags))
1036                                         goto nla_put_failure;
1037                         }
1038                 } else if (swkey->ip.proto == IPPROTO_UDP) {
1039                         struct ovs_key_udp *udp_key;
1040
1041                         nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key));
1042                         if (!nla)
1043                                 goto nla_put_failure;
1044                         udp_key = nla_data(nla);
1045                         if (swkey->eth.type == htons(ETH_P_IP)) {
1046                                 udp_key->udp_src = output->ipv4.tp.src;
1047                                 udp_key->udp_dst = output->ipv4.tp.dst;
1048                         } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
1049                                 udp_key->udp_src = output->ipv6.tp.src;
1050                                 udp_key->udp_dst = output->ipv6.tp.dst;
1051                         }
1052                 } else if (swkey->ip.proto == IPPROTO_SCTP) {
1053                         struct ovs_key_sctp *sctp_key;
1054
1055                         nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key));
1056                         if (!nla)
1057                                 goto nla_put_failure;
1058                         sctp_key = nla_data(nla);
1059                         if (swkey->eth.type == htons(ETH_P_IP)) {
1060                                 sctp_key->sctp_src = swkey->ipv4.tp.src;
1061                                 sctp_key->sctp_dst = swkey->ipv4.tp.dst;
1062                         } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
1063                                 sctp_key->sctp_src = swkey->ipv6.tp.src;
1064                                 sctp_key->sctp_dst = swkey->ipv6.tp.dst;
1065                         }
1066                 } else if (swkey->eth.type == htons(ETH_P_IP) &&
1067                            swkey->ip.proto == IPPROTO_ICMP) {
1068                         struct ovs_key_icmp *icmp_key;
1069
1070                         nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key));
1071                         if (!nla)
1072                                 goto nla_put_failure;
1073                         icmp_key = nla_data(nla);
1074                         icmp_key->icmp_type = ntohs(output->ipv4.tp.src);
1075                         icmp_key->icmp_code = ntohs(output->ipv4.tp.dst);
1076                 } else if (swkey->eth.type == htons(ETH_P_IPV6) &&
1077                            swkey->ip.proto == IPPROTO_ICMPV6) {
1078                         struct ovs_key_icmpv6 *icmpv6_key;
1079
1080                         nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6,
1081                                                 sizeof(*icmpv6_key));
1082                         if (!nla)
1083                                 goto nla_put_failure;
1084                         icmpv6_key = nla_data(nla);
1085                         icmpv6_key->icmpv6_type = ntohs(output->ipv6.tp.src);
1086                         icmpv6_key->icmpv6_code = ntohs(output->ipv6.tp.dst);
1087
1088                         if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION ||
1089                             icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) {
1090                                 struct ovs_key_nd *nd_key;
1091
1092                                 nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key));
1093                                 if (!nla)
1094                                         goto nla_put_failure;
1095                                 nd_key = nla_data(nla);
1096                                 memcpy(nd_key->nd_target, &output->ipv6.nd.target,
1097                                                         sizeof(nd_key->nd_target));
1098                                 memcpy(nd_key->nd_sll, output->ipv6.nd.sll, ETH_ALEN);
1099                                 memcpy(nd_key->nd_tll, output->ipv6.nd.tll, ETH_ALEN);
1100                         }
1101                 }
1102         }
1103
1104 unencap:
1105         if (encap)
1106                 nla_nest_end(skb, encap);
1107
1108         return 0;
1109
1110 nla_put_failure:
1111         return -EMSGSIZE;
1112 }
1113
1114 #define MAX_ACTIONS_BUFSIZE     (32 * 1024)
1115
1116 struct sw_flow_actions *ovs_nla_alloc_flow_actions(int size)
1117 {
1118         struct sw_flow_actions *sfa;
1119
1120         if (size > MAX_ACTIONS_BUFSIZE)
1121                 return ERR_PTR(-EINVAL);
1122
1123         sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL);
1124         if (!sfa)
1125                 return ERR_PTR(-ENOMEM);
1126
1127         sfa->actions_len = 0;
1128         return sfa;
1129 }
1130
1131 /* RCU callback used by ovs_nla_free_flow_actions. */
1132 static void rcu_free_acts_callback(struct rcu_head *rcu)
1133 {
1134         struct sw_flow_actions *sf_acts = container_of(rcu,
1135                         struct sw_flow_actions, rcu);
1136         kfree(sf_acts);
1137 }
1138
1139 /* Schedules 'sf_acts' to be freed after the next RCU grace period.
1140  * The caller must hold rcu_read_lock for this to be sensible. */
1141 void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts)
1142 {
1143         call_rcu(&sf_acts->rcu, rcu_free_acts_callback);
1144 }
1145
1146 static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
1147                                        int attr_len)
1148 {
1149
1150         struct sw_flow_actions *acts;
1151         int new_acts_size;
1152         int req_size = NLA_ALIGN(attr_len);
1153         int next_offset = offsetof(struct sw_flow_actions, actions) +
1154                                         (*sfa)->actions_len;
1155
1156         if (req_size <= (ksize(*sfa) - next_offset))
1157                 goto out;
1158
1159         new_acts_size = ksize(*sfa) * 2;
1160
1161         if (new_acts_size > MAX_ACTIONS_BUFSIZE) {
1162                 if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size)
1163                         return ERR_PTR(-EMSGSIZE);
1164                 new_acts_size = MAX_ACTIONS_BUFSIZE;
1165         }
1166
1167         acts = ovs_nla_alloc_flow_actions(new_acts_size);
1168         if (IS_ERR(acts))
1169                 return (void *)acts;
1170
1171         memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len);
1172         acts->actions_len = (*sfa)->actions_len;
1173         kfree(*sfa);
1174         *sfa = acts;
1175
1176 out:
1177         (*sfa)->actions_len += req_size;
1178         return  (struct nlattr *) ((unsigned char *)(*sfa) + next_offset);
1179 }
1180
1181 static int add_action(struct sw_flow_actions **sfa, int attrtype, void *data, int len)
1182 {
1183         struct nlattr *a;
1184
1185         a = reserve_sfa_size(sfa, nla_attr_size(len));
1186         if (IS_ERR(a))
1187                 return PTR_ERR(a);
1188
1189         a->nla_type = attrtype;
1190         a->nla_len = nla_attr_size(len);
1191
1192         if (data)
1193                 memcpy(nla_data(a), data, len);
1194         memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len));
1195
1196         return 0;
1197 }
1198
1199 static inline int add_nested_action_start(struct sw_flow_actions **sfa,
1200                                           int attrtype)
1201 {
1202         int used = (*sfa)->actions_len;
1203         int err;
1204
1205         err = add_action(sfa, attrtype, NULL, 0);
1206         if (err)
1207                 return err;
1208
1209         return used;
1210 }
1211
1212 static inline void add_nested_action_end(struct sw_flow_actions *sfa,
1213                                          int st_offset)
1214 {
1215         struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions +
1216                                                                st_offset);
1217
1218         a->nla_len = sfa->actions_len - st_offset;
1219 }
1220
1221 static int validate_and_copy_sample(const struct nlattr *attr,
1222                                     const struct sw_flow_key *key, int depth,
1223                                     struct sw_flow_actions **sfa)
1224 {
1225         const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
1226         const struct nlattr *probability, *actions;
1227         const struct nlattr *a;
1228         int rem, start, err, st_acts;
1229
1230         memset(attrs, 0, sizeof(attrs));
1231         nla_for_each_nested(a, attr, rem) {
1232                 int type = nla_type(a);
1233                 if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type])
1234                         return -EINVAL;
1235                 attrs[type] = a;
1236         }
1237         if (rem)
1238                 return -EINVAL;
1239
1240         probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY];
1241         if (!probability || nla_len(probability) != sizeof(u32))
1242                 return -EINVAL;
1243
1244         actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
1245         if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
1246                 return -EINVAL;
1247
1248         /* validation done, copy sample action. */
1249         start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE);
1250         if (start < 0)
1251                 return start;
1252         err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY,
1253                          nla_data(probability), sizeof(u32));
1254         if (err)
1255                 return err;
1256         st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS);
1257         if (st_acts < 0)
1258                 return st_acts;
1259
1260         err = ovs_nla_copy_actions(actions, key, depth + 1, sfa);
1261         if (err)
1262                 return err;
1263
1264         add_nested_action_end(*sfa, st_acts);
1265         add_nested_action_end(*sfa, start);
1266
1267         return 0;
1268 }
1269
1270 static int validate_tp_port(const struct sw_flow_key *flow_key)
1271 {
1272         if (flow_key->eth.type == htons(ETH_P_IP)) {
1273                 if (flow_key->ipv4.tp.src || flow_key->ipv4.tp.dst)
1274                         return 0;
1275         } else if (flow_key->eth.type == htons(ETH_P_IPV6)) {
1276                 if (flow_key->ipv6.tp.src || flow_key->ipv6.tp.dst)
1277                         return 0;
1278         }
1279
1280         return -EINVAL;
1281 }
1282
1283 void ovs_match_init(struct sw_flow_match *match,
1284                     struct sw_flow_key *key,
1285                     struct sw_flow_mask *mask)
1286 {
1287         memset(match, 0, sizeof(*match));
1288         match->key = key;
1289         match->mask = mask;
1290
1291         memset(key, 0, sizeof(*key));
1292
1293         if (mask) {
1294                 memset(&mask->key, 0, sizeof(mask->key));
1295                 mask->range.start = mask->range.end = 0;
1296         }
1297 }
1298
1299 static int validate_and_copy_set_tun(const struct nlattr *attr,
1300                                      struct sw_flow_actions **sfa)
1301 {
1302         struct sw_flow_match match;
1303         struct sw_flow_key key;
1304         int err, start;
1305
1306         ovs_match_init(&match, &key, NULL);
1307         err = ipv4_tun_from_nlattr(nla_data(attr), &match, false);
1308         if (err)
1309                 return err;
1310
1311         start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET);
1312         if (start < 0)
1313                 return start;
1314
1315         err = add_action(sfa, OVS_KEY_ATTR_IPV4_TUNNEL, &match.key->tun_key,
1316                         sizeof(match.key->tun_key));
1317         add_nested_action_end(*sfa, start);
1318
1319         return err;
1320 }
1321
1322 static int validate_set(const struct nlattr *a,
1323                         const struct sw_flow_key *flow_key,
1324                         struct sw_flow_actions **sfa,
1325                         bool *set_tun)
1326 {
1327         const struct nlattr *ovs_key = nla_data(a);
1328         int key_type = nla_type(ovs_key);
1329
1330         /* There can be only one key in a action */
1331         if (nla_total_size(nla_len(ovs_key)) != nla_len(a))
1332                 return -EINVAL;
1333
1334         if (key_type > OVS_KEY_ATTR_MAX ||
1335             (ovs_key_lens[key_type] != nla_len(ovs_key) &&
1336              ovs_key_lens[key_type] != -1))
1337                 return -EINVAL;
1338
1339         switch (key_type) {
1340         const struct ovs_key_ipv4 *ipv4_key;
1341         const struct ovs_key_ipv6 *ipv6_key;
1342         int err;
1343
1344         case OVS_KEY_ATTR_PRIORITY:
1345         case OVS_KEY_ATTR_SKB_MARK:
1346         case OVS_KEY_ATTR_ETHERNET:
1347                 break;
1348
1349         case OVS_KEY_ATTR_TUNNEL:
1350                 *set_tun = true;
1351                 err = validate_and_copy_set_tun(a, sfa);
1352                 if (err)
1353                         return err;
1354                 break;
1355
1356         case OVS_KEY_ATTR_IPV4:
1357                 if (flow_key->eth.type != htons(ETH_P_IP))
1358                         return -EINVAL;
1359
1360                 if (!flow_key->ip.proto)
1361                         return -EINVAL;
1362
1363                 ipv4_key = nla_data(ovs_key);
1364                 if (ipv4_key->ipv4_proto != flow_key->ip.proto)
1365                         return -EINVAL;
1366
1367                 if (ipv4_key->ipv4_frag != flow_key->ip.frag)
1368                         return -EINVAL;
1369
1370                 break;
1371
1372         case OVS_KEY_ATTR_IPV6:
1373                 if (flow_key->eth.type != htons(ETH_P_IPV6))
1374                         return -EINVAL;
1375
1376                 if (!flow_key->ip.proto)
1377                         return -EINVAL;
1378
1379                 ipv6_key = nla_data(ovs_key);
1380                 if (ipv6_key->ipv6_proto != flow_key->ip.proto)
1381                         return -EINVAL;
1382
1383                 if (ipv6_key->ipv6_frag != flow_key->ip.frag)
1384                         return -EINVAL;
1385
1386                 if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000)
1387                         return -EINVAL;
1388
1389                 break;
1390
1391         case OVS_KEY_ATTR_TCP:
1392                 if (flow_key->ip.proto != IPPROTO_TCP)
1393                         return -EINVAL;
1394
1395                 return validate_tp_port(flow_key);
1396
1397         case OVS_KEY_ATTR_UDP:
1398                 if (flow_key->ip.proto != IPPROTO_UDP)
1399                         return -EINVAL;
1400
1401                 return validate_tp_port(flow_key);
1402
1403         case OVS_KEY_ATTR_SCTP:
1404                 if (flow_key->ip.proto != IPPROTO_SCTP)
1405                         return -EINVAL;
1406
1407                 return validate_tp_port(flow_key);
1408
1409         default:
1410                 return -EINVAL;
1411         }
1412
1413         return 0;
1414 }
1415
1416 static int validate_userspace(const struct nlattr *attr)
1417 {
1418         static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = {
1419                 [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 },
1420                 [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC },
1421         };
1422         struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
1423         int error;
1424
1425         error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX,
1426                                  attr, userspace_policy);
1427         if (error)
1428                 return error;
1429
1430         if (!a[OVS_USERSPACE_ATTR_PID] ||
1431             !nla_get_u32(a[OVS_USERSPACE_ATTR_PID]))
1432                 return -EINVAL;
1433
1434         return 0;
1435 }
1436
1437 static int copy_action(const struct nlattr *from,
1438                        struct sw_flow_actions **sfa)
1439 {
1440         int totlen = NLA_ALIGN(from->nla_len);
1441         struct nlattr *to;
1442
1443         to = reserve_sfa_size(sfa, from->nla_len);
1444         if (IS_ERR(to))
1445                 return PTR_ERR(to);
1446
1447         memcpy(to, from, totlen);
1448         return 0;
1449 }
1450
1451 int ovs_nla_copy_actions(const struct nlattr *attr,
1452                          const struct sw_flow_key *key,
1453                          int depth,
1454                          struct sw_flow_actions **sfa)
1455 {
1456         const struct nlattr *a;
1457         int rem, err;
1458
1459         if (depth >= SAMPLE_ACTION_DEPTH)
1460                 return -EOVERFLOW;
1461
1462         nla_for_each_nested(a, attr, rem) {
1463                 /* Expected argument lengths, (u32)-1 for variable length. */
1464                 static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
1465                         [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
1466                         [OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
1467                         [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
1468                         [OVS_ACTION_ATTR_POP_VLAN] = 0,
1469                         [OVS_ACTION_ATTR_SET] = (u32)-1,
1470                         [OVS_ACTION_ATTR_SAMPLE] = (u32)-1
1471                 };
1472                 const struct ovs_action_push_vlan *vlan;
1473                 int type = nla_type(a);
1474                 bool skip_copy;
1475
1476                 if (type > OVS_ACTION_ATTR_MAX ||
1477                     (action_lens[type] != nla_len(a) &&
1478                      action_lens[type] != (u32)-1))
1479                         return -EINVAL;
1480
1481                 skip_copy = false;
1482                 switch (type) {
1483                 case OVS_ACTION_ATTR_UNSPEC:
1484                         return -EINVAL;
1485
1486                 case OVS_ACTION_ATTR_USERSPACE:
1487                         err = validate_userspace(a);
1488                         if (err)
1489                                 return err;
1490                         break;
1491
1492                 case OVS_ACTION_ATTR_OUTPUT:
1493                         if (nla_get_u32(a) >= DP_MAX_PORTS)
1494                                 return -EINVAL;
1495                         break;
1496
1497
1498                 case OVS_ACTION_ATTR_POP_VLAN:
1499                         break;
1500
1501                 case OVS_ACTION_ATTR_PUSH_VLAN:
1502                         vlan = nla_data(a);
1503                         if (vlan->vlan_tpid != htons(ETH_P_8021Q))
1504                                 return -EINVAL;
1505                         if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT)))
1506                                 return -EINVAL;
1507                         break;
1508
1509                 case OVS_ACTION_ATTR_SET:
1510                         err = validate_set(a, key, sfa, &skip_copy);
1511                         if (err)
1512                                 return err;
1513                         break;
1514
1515                 case OVS_ACTION_ATTR_SAMPLE:
1516                         err = validate_and_copy_sample(a, key, depth, sfa);
1517                         if (err)
1518                                 return err;
1519                         skip_copy = true;
1520                         break;
1521
1522                 default:
1523                         return -EINVAL;
1524                 }
1525                 if (!skip_copy) {
1526                         err = copy_action(a, sfa);
1527                         if (err)
1528                                 return err;
1529                 }
1530         }
1531
1532         if (rem > 0)
1533                 return -EINVAL;
1534
1535         return 0;
1536 }
1537
1538 static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb)
1539 {
1540         const struct nlattr *a;
1541         struct nlattr *start;
1542         int err = 0, rem;
1543
1544         start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE);
1545         if (!start)
1546                 return -EMSGSIZE;
1547
1548         nla_for_each_nested(a, attr, rem) {
1549                 int type = nla_type(a);
1550                 struct nlattr *st_sample;
1551
1552                 switch (type) {
1553                 case OVS_SAMPLE_ATTR_PROBABILITY:
1554                         if (nla_put(skb, OVS_SAMPLE_ATTR_PROBABILITY,
1555                                     sizeof(u32), nla_data(a)))
1556                                 return -EMSGSIZE;
1557                         break;
1558                 case OVS_SAMPLE_ATTR_ACTIONS:
1559                         st_sample = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS);
1560                         if (!st_sample)
1561                                 return -EMSGSIZE;
1562                         err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb);
1563                         if (err)
1564                                 return err;
1565                         nla_nest_end(skb, st_sample);
1566                         break;
1567                 }
1568         }
1569
1570         nla_nest_end(skb, start);
1571         return err;
1572 }
1573
1574 static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
1575 {
1576         const struct nlattr *ovs_key = nla_data(a);
1577         int key_type = nla_type(ovs_key);
1578         struct nlattr *start;
1579         int err;
1580
1581         switch (key_type) {
1582         case OVS_KEY_ATTR_IPV4_TUNNEL:
1583                 start = nla_nest_start(skb, OVS_ACTION_ATTR_SET);
1584                 if (!start)
1585                         return -EMSGSIZE;
1586
1587                 err = ipv4_tun_to_nlattr(skb, nla_data(ovs_key),
1588                                              nla_data(ovs_key));
1589                 if (err)
1590                         return err;
1591                 nla_nest_end(skb, start);
1592                 break;
1593         default:
1594                 if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key))
1595                         return -EMSGSIZE;
1596                 break;
1597         }
1598
1599         return 0;
1600 }
1601
1602 int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
1603 {
1604         const struct nlattr *a;
1605         int rem, err;
1606
1607         nla_for_each_attr(a, attr, len, rem) {
1608                 int type = nla_type(a);
1609
1610                 switch (type) {
1611                 case OVS_ACTION_ATTR_SET:
1612                         err = set_action_to_attr(a, skb);
1613                         if (err)
1614                                 return err;
1615                         break;
1616
1617                 case OVS_ACTION_ATTR_SAMPLE:
1618                         err = sample_action_to_attr(a, skb);
1619                         if (err)
1620                                 return err;
1621                         break;
1622                 default:
1623                         if (nla_put(skb, type, nla_len(a), nla_data(a)))
1624                                 return -EMSGSIZE;
1625                         break;
1626                 }
1627         }
1628
1629         return 0;
1630 }