datapath: Restructure datapath.c and flow.c
[sliver-openvswitch.git] / datapath / flow_netlink.c
1 /*
2  * Copyright (c) 2007-2013 Nicira, Inc.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of version 2 of the GNU General Public
6  * License as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16  * 02110-1301, USA
17  */
18
19 #include "flow.h"
20 #include "datapath.h"
21 #include <linux/uaccess.h>
22 #include <linux/netdevice.h>
23 #include <linux/etherdevice.h>
24 #include <linux/if_ether.h>
25 #include <linux/if_vlan.h>
26 #include <net/llc_pdu.h>
27 #include <linux/kernel.h>
28 #include <linux/jhash.h>
29 #include <linux/jiffies.h>
30 #include <linux/llc.h>
31 #include <linux/module.h>
32 #include <linux/in.h>
33 #include <linux/rcupdate.h>
34 #include <linux/if_arp.h>
35 #include <linux/ip.h>
36 #include <linux/ipv6.h>
37 #include <linux/sctp.h>
38 #include <linux/tcp.h>
39 #include <linux/udp.h>
40 #include <linux/icmp.h>
41 #include <linux/icmpv6.h>
42 #include <linux/rculist.h>
43 #include <net/ip.h>
44 #include <net/ipv6.h>
45 #include <net/ndisc.h>
46
47 #include "flow_netlink.h"
48
49 static void update_range__(struct sw_flow_match *match,
50                            size_t offset, size_t size, bool is_mask)
51 {
52         struct sw_flow_key_range *range = NULL;
53         size_t start = rounddown(offset, sizeof(long));
54         size_t end = roundup(offset + size, sizeof(long));
55
56         if (!is_mask)
57                 range = &match->range;
58         else if (match->mask)
59                 range = &match->mask->range;
60
61         if (!range)
62                 return;
63
64         if (range->start == range->end) {
65                 range->start = start;
66                 range->end = end;
67                 return;
68         }
69
70         if (range->start > start)
71                 range->start = start;
72
73         if (range->end < end)
74                 range->end = end;
75 }
76
77 #define SW_FLOW_KEY_PUT(match, field, value, is_mask) \
78         do { \
79                 update_range__(match, offsetof(struct sw_flow_key, field),  \
80                                      sizeof((match)->key->field), is_mask); \
81                 if (is_mask) {                                              \
82                         if ((match)->mask)                                  \
83                                 (match)->mask->key.field = value;           \
84                 } else {                                                    \
85                         (match)->key->field = value;                        \
86                 }                                                           \
87         } while (0)
88
89 #define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \
90         do { \
91                 update_range__(match, offsetof(struct sw_flow_key, field),  \
92                                 len, is_mask);                              \
93                 if (is_mask) {                                              \
94                         if ((match)->mask)                                  \
95                                 memcpy(&(match)->mask->key.field, value_p, len);\
96                 } else {                                                    \
97                         memcpy(&(match)->key->field, value_p, len);         \
98                 }                                                           \
99         } while (0)
100
101 static u16 range_n_bytes(const struct sw_flow_key_range *range)
102 {
103         return range->end - range->start;
104 }
105
106 static bool match_validate(const struct sw_flow_match *match,
107                            u64 key_attrs, u64 mask_attrs)
108 {
109         u64 key_expected = 1ULL << OVS_KEY_ATTR_ETHERNET;
110         u64 mask_allowed = key_attrs;  /* At most allow all key attributes */
111
112         /* The following mask attributes allowed only if they
113          * pass the validation tests. */
114         mask_allowed &= ~((1ULL << OVS_KEY_ATTR_IPV4)
115                         | (1ULL << OVS_KEY_ATTR_IPV6)
116                         | (1ULL << OVS_KEY_ATTR_TCP)
117                         | (1ULL << OVS_KEY_ATTR_UDP)
118                         | (1ULL << OVS_KEY_ATTR_SCTP)
119                         | (1ULL << OVS_KEY_ATTR_ICMP)
120                         | (1ULL << OVS_KEY_ATTR_ICMPV6)
121                         | (1ULL << OVS_KEY_ATTR_ARP)
122                         | (1ULL << OVS_KEY_ATTR_ND));
123
124         /* Always allowed mask fields. */
125         mask_allowed |= ((1ULL << OVS_KEY_ATTR_TUNNEL)
126                        | (1ULL << OVS_KEY_ATTR_IN_PORT)
127                        | (1ULL << OVS_KEY_ATTR_ETHERTYPE));
128
129         /* Check key attributes. */
130         if (match->key->eth.type == htons(ETH_P_ARP)
131                         || match->key->eth.type == htons(ETH_P_RARP)) {
132                 key_expected |= 1ULL << OVS_KEY_ATTR_ARP;
133                 if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
134                         mask_allowed |= 1ULL << OVS_KEY_ATTR_ARP;
135         }
136
137         if (match->key->eth.type == htons(ETH_P_IP)) {
138                 key_expected |= 1ULL << OVS_KEY_ATTR_IPV4;
139                 if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
140                         mask_allowed |= 1ULL << OVS_KEY_ATTR_IPV4;
141
142                 if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
143                         if (match->key->ip.proto == IPPROTO_UDP) {
144                                 key_expected |= 1ULL << OVS_KEY_ATTR_UDP;
145                                 if (match->mask && (match->mask->key.ip.proto == 0xff))
146                                         mask_allowed |= 1ULL << OVS_KEY_ATTR_UDP;
147                         }
148
149                         if (match->key->ip.proto == IPPROTO_SCTP) {
150                                 key_expected |= 1ULL << OVS_KEY_ATTR_SCTP;
151                                 if (match->mask && (match->mask->key.ip.proto == 0xff))
152                                         mask_allowed |= 1ULL << OVS_KEY_ATTR_SCTP;
153                         }
154
155                         if (match->key->ip.proto == IPPROTO_TCP) {
156                                 key_expected |= 1ULL << OVS_KEY_ATTR_TCP;
157                                 if (match->mask && (match->mask->key.ip.proto == 0xff))
158                                         mask_allowed |= 1ULL << OVS_KEY_ATTR_TCP;
159                         }
160
161                         if (match->key->ip.proto == IPPROTO_ICMP) {
162                                 key_expected |= 1ULL << OVS_KEY_ATTR_ICMP;
163                                 if (match->mask && (match->mask->key.ip.proto == 0xff))
164                                         mask_allowed |= 1ULL << OVS_KEY_ATTR_ICMP;
165                         }
166                 }
167         }
168
169         if (match->key->eth.type == htons(ETH_P_IPV6)) {
170                 key_expected |= 1ULL << OVS_KEY_ATTR_IPV6;
171                 if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
172                         mask_allowed |= 1ULL << OVS_KEY_ATTR_IPV6;
173
174                 if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
175                         if (match->key->ip.proto == IPPROTO_UDP) {
176                                 key_expected |= 1ULL << OVS_KEY_ATTR_UDP;
177                                 if (match->mask && (match->mask->key.ip.proto == 0xff))
178                                         mask_allowed |= 1ULL << OVS_KEY_ATTR_UDP;
179                         }
180
181                         if (match->key->ip.proto == IPPROTO_SCTP) {
182                                 key_expected |= 1ULL << OVS_KEY_ATTR_SCTP;
183                                 if (match->mask && (match->mask->key.ip.proto == 0xff))
184                                         mask_allowed |= 1ULL << OVS_KEY_ATTR_SCTP;
185                         }
186
187                         if (match->key->ip.proto == IPPROTO_TCP) {
188                                 key_expected |= 1ULL << OVS_KEY_ATTR_TCP;
189                                 if (match->mask && (match->mask->key.ip.proto == 0xff))
190                                         mask_allowed |= 1ULL << OVS_KEY_ATTR_TCP;
191                         }
192
193                         if (match->key->ip.proto == IPPROTO_ICMPV6) {
194                                 key_expected |= 1ULL << OVS_KEY_ATTR_ICMPV6;
195                                 if (match->mask && (match->mask->key.ip.proto == 0xff))
196                                         mask_allowed |= 1ULL << OVS_KEY_ATTR_ICMPV6;
197
198                                 if (match->key->ipv6.tp.src ==
199                                                 htons(NDISC_NEIGHBOUR_SOLICITATION) ||
200                                     match->key->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
201                                         key_expected |= 1ULL << OVS_KEY_ATTR_ND;
202                                         if (match->mask && (match->mask->key.ipv6.tp.src == htons(0xffff)))
203                                                 mask_allowed |= 1ULL << OVS_KEY_ATTR_ND;
204                                 }
205                         }
206                 }
207         }
208
209         if ((key_attrs & key_expected) != key_expected) {
210                 /* Key attributes check failed. */
211                 OVS_NLERR("Missing expected key attributes (key_attrs=%llx, expected=%llx).\n",
212                                 key_attrs, key_expected);
213                 return false;
214         }
215
216         if ((mask_attrs & mask_allowed) != mask_attrs) {
217                 /* Mask attributes check failed. */
218                 OVS_NLERR("Contain more than allowed mask fields (mask_attrs=%llx, mask_allowed=%llx).\n",
219                                 mask_attrs, mask_allowed);
220                 return false;
221         }
222
223         return true;
224 }
225
226 /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute.  */
227 static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
228         [OVS_KEY_ATTR_ENCAP] = -1,
229         [OVS_KEY_ATTR_PRIORITY] = sizeof(u32),
230         [OVS_KEY_ATTR_IN_PORT] = sizeof(u32),
231         [OVS_KEY_ATTR_SKB_MARK] = sizeof(u32),
232         [OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet),
233         [OVS_KEY_ATTR_VLAN] = sizeof(__be16),
234         [OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16),
235         [OVS_KEY_ATTR_IPV4] = sizeof(struct ovs_key_ipv4),
236         [OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6),
237         [OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp),
238         [OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp),
239         [OVS_KEY_ATTR_SCTP] = sizeof(struct ovs_key_sctp),
240         [OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp),
241         [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6),
242         [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp),
243         [OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd),
244         [OVS_KEY_ATTR_TUNNEL] = -1,
245 };
246
247 static bool is_all_zero(const u8 *fp, size_t size)
248 {
249         int i;
250
251         if (!fp)
252                 return false;
253
254         for (i = 0; i < size; i++)
255                 if (fp[i])
256                         return false;
257
258         return true;
259 }
260
261 static int __parse_flow_nlattrs(const struct nlattr *attr,
262                                 const struct nlattr *a[],
263                                 u64 *attrsp, bool nz)
264 {
265         const struct nlattr *nla;
266         u64 attrs;
267         int rem;
268
269         attrs = *attrsp;
270         nla_for_each_nested(nla, attr, rem) {
271                 u16 type = nla_type(nla);
272                 int expected_len;
273
274                 if (type > OVS_KEY_ATTR_MAX) {
275                         OVS_NLERR("Unknown key attribute (type=%d, max=%d).\n",
276                                   type, OVS_KEY_ATTR_MAX);
277                         return -EINVAL;
278                 }
279
280                 if (attrs & (1ULL << type)) {
281                         OVS_NLERR("Duplicate key attribute (type %d).\n", type);
282                         return -EINVAL;
283                 }
284
285                 expected_len = ovs_key_lens[type];
286                 if (nla_len(nla) != expected_len && expected_len != -1) {
287                         OVS_NLERR("Key attribute has unexpected length (type=%d"
288                                   ", length=%d, expected=%d).\n", type,
289                                   nla_len(nla), expected_len);
290                         return -EINVAL;
291                 }
292
293                 if (!nz || !is_all_zero(nla_data(nla), expected_len)) {
294                         attrs |= 1ULL << type;
295                         a[type] = nla;
296                 }
297         }
298         if (rem) {
299                 OVS_NLERR("Message has %d unknown bytes.\n", rem);
300                 return -EINVAL;
301         }
302
303         *attrsp = attrs;
304         return 0;
305 }
306
307 static int parse_flow_mask_nlattrs(const struct nlattr *attr,
308                                    const struct nlattr *a[], u64 *attrsp)
309 {
310         return __parse_flow_nlattrs(attr, a, attrsp, true);
311 }
312
313 static int parse_flow_nlattrs(const struct nlattr *attr,
314                               const struct nlattr *a[], u64 *attrsp)
315 {
316         return __parse_flow_nlattrs(attr, a, attrsp, false);
317 }
318
319 static int ipv4_tun_from_nlattr(const struct nlattr *attr,
320                                 struct sw_flow_match *match, bool is_mask)
321 {
322         struct nlattr *a;
323         int rem;
324         bool ttl = false;
325         __be16 tun_flags = 0;
326
327         nla_for_each_nested(a, attr, rem) {
328                 int type = nla_type(a);
329                 static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = {
330                         [OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64),
331                         [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32),
332                         [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = sizeof(u32),
333                         [OVS_TUNNEL_KEY_ATTR_TOS] = 1,
334                         [OVS_TUNNEL_KEY_ATTR_TTL] = 1,
335                         [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0,
336                         [OVS_TUNNEL_KEY_ATTR_CSUM] = 0,
337                 };
338
339                 if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
340                         OVS_NLERR("Unknown IPv4 tunnel attribute (type=%d, max=%d).\n",
341                         type, OVS_TUNNEL_KEY_ATTR_MAX);
342                         return -EINVAL;
343                 }
344
345                 if (ovs_tunnel_key_lens[type] != nla_len(a)) {
346                         OVS_NLERR("IPv4 tunnel attribute type has unexpected "
347                                   " length (type=%d, length=%d, expected=%d).\n",
348                                   type, nla_len(a), ovs_tunnel_key_lens[type]);
349                         return -EINVAL;
350                 }
351
352                 switch (type) {
353                 case OVS_TUNNEL_KEY_ATTR_ID:
354                         SW_FLOW_KEY_PUT(match, tun_key.tun_id,
355                                         nla_get_be64(a), is_mask);
356                         tun_flags |= TUNNEL_KEY;
357                         break;
358                 case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
359                         SW_FLOW_KEY_PUT(match, tun_key.ipv4_src,
360                                         nla_get_be32(a), is_mask);
361                         break;
362                 case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
363                         SW_FLOW_KEY_PUT(match, tun_key.ipv4_dst,
364                                         nla_get_be32(a), is_mask);
365                         break;
366                 case OVS_TUNNEL_KEY_ATTR_TOS:
367                         SW_FLOW_KEY_PUT(match, tun_key.ipv4_tos,
368                                         nla_get_u8(a), is_mask);
369                         break;
370                 case OVS_TUNNEL_KEY_ATTR_TTL:
371                         SW_FLOW_KEY_PUT(match, tun_key.ipv4_ttl,
372                                         nla_get_u8(a), is_mask);
373                         ttl = true;
374                         break;
375                 case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT:
376                         tun_flags |= TUNNEL_DONT_FRAGMENT;
377                         break;
378                 case OVS_TUNNEL_KEY_ATTR_CSUM:
379                         tun_flags |= TUNNEL_CSUM;
380                         break;
381                 default:
382                         return -EINVAL;
383                 }
384         }
385
386         SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask);
387
388         if (rem > 0) {
389                 OVS_NLERR("IPv4 tunnel attribute has %d unknown bytes.\n", rem);
390                 return -EINVAL;
391         }
392
393         if (!is_mask) {
394                 if (!match->key->tun_key.ipv4_dst) {
395                         OVS_NLERR("IPv4 tunnel destination address is zero.\n");
396                         return -EINVAL;
397                 }
398
399                 if (!ttl) {
400                         OVS_NLERR("IPv4 tunnel TTL not specified.\n");
401                         return -EINVAL;
402                 }
403         }
404
405         return 0;
406 }
407
408 static int ipv4_tun_to_nlattr(struct sk_buff *skb,
409                               const struct ovs_key_ipv4_tunnel *tun_key,
410                               const struct ovs_key_ipv4_tunnel *output)
411 {
412         struct nlattr *nla;
413
414         nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL);
415         if (!nla)
416                 return -EMSGSIZE;
417
418         if (output->tun_flags & TUNNEL_KEY &&
419             nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id))
420                 return -EMSGSIZE;
421         if (output->ipv4_src &&
422                 nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, output->ipv4_src))
423                 return -EMSGSIZE;
424         if (output->ipv4_dst &&
425                 nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, output->ipv4_dst))
426                 return -EMSGSIZE;
427         if (output->ipv4_tos &&
428                 nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->ipv4_tos))
429                 return -EMSGSIZE;
430         if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ipv4_ttl))
431                 return -EMSGSIZE;
432         if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) &&
433                 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
434                 return -EMSGSIZE;
435         if ((output->tun_flags & TUNNEL_CSUM) &&
436                 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
437                 return -EMSGSIZE;
438
439         nla_nest_end(skb, nla);
440         return 0;
441 }
442
443
444 static int metadata_from_nlattrs(struct sw_flow_match *match,  u64 *attrs,
445                                  const struct nlattr **a, bool is_mask)
446 {
447         if (*attrs & (1ULL << OVS_KEY_ATTR_PRIORITY)) {
448                 SW_FLOW_KEY_PUT(match, phy.priority,
449                           nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask);
450                 *attrs &= ~(1ULL << OVS_KEY_ATTR_PRIORITY);
451         }
452
453         if (*attrs & (1ULL << OVS_KEY_ATTR_IN_PORT)) {
454                 u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]);
455
456                 if (is_mask)
457                         in_port = 0xffffffff; /* Always exact match in_port. */
458                 else if (in_port >= DP_MAX_PORTS)
459                         return -EINVAL;
460
461                 SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask);
462                 *attrs &= ~(1ULL << OVS_KEY_ATTR_IN_PORT);
463         } else if (!is_mask) {
464                 SW_FLOW_KEY_PUT(match, phy.in_port, DP_MAX_PORTS, is_mask);
465         }
466
467         if (*attrs & (1ULL << OVS_KEY_ATTR_SKB_MARK)) {
468                 uint32_t mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]);
469
470                 SW_FLOW_KEY_PUT(match, phy.skb_mark, mark, is_mask);
471                 *attrs &= ~(1ULL << OVS_KEY_ATTR_SKB_MARK);
472         }
473         if (*attrs & (1ULL << OVS_KEY_ATTR_TUNNEL)) {
474                 if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
475                                          is_mask))
476                         return -EINVAL;
477                 *attrs &= ~(1ULL << OVS_KEY_ATTR_TUNNEL);
478         }
479         return 0;
480 }
481
482 static int ovs_key_from_nlattrs(struct sw_flow_match *match,  u64 attrs,
483                                 const struct nlattr **a, bool is_mask)
484 {
485         int err;
486         u64 orig_attrs = attrs;
487
488         err = metadata_from_nlattrs(match, &attrs, a, is_mask);
489         if (err)
490                 return err;
491
492         if (attrs & (1ULL << OVS_KEY_ATTR_ETHERNET)) {
493                 const struct ovs_key_ethernet *eth_key;
494
495                 eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]);
496                 SW_FLOW_KEY_MEMCPY(match, eth.src,
497                                 eth_key->eth_src, ETH_ALEN, is_mask);
498                 SW_FLOW_KEY_MEMCPY(match, eth.dst,
499                                 eth_key->eth_dst, ETH_ALEN, is_mask);
500                 attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERNET);
501         }
502
503         if (attrs & (1ULL << OVS_KEY_ATTR_VLAN)) {
504                 __be16 tci;
505
506                 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
507                 if (!(tci & htons(VLAN_TAG_PRESENT))) {
508                         if (is_mask)
509                                 OVS_NLERR("VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit.\n");
510                         else
511                                 OVS_NLERR("VLAN TCI does not have VLAN_TAG_PRESENT bit set.\n");
512
513                         return -EINVAL;
514                 }
515
516                 SW_FLOW_KEY_PUT(match, eth.tci, tci, is_mask);
517                 attrs &= ~(1ULL << OVS_KEY_ATTR_VLAN);
518         } else if (!is_mask)
519                 SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true);
520
521         if (attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE)) {
522                 __be16 eth_type;
523
524                 eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
525                 if (is_mask) {
526                         /* Always exact match EtherType. */
527                         eth_type = htons(0xffff);
528                 } else if (ntohs(eth_type) < ETH_P_802_3_MIN) {
529                         OVS_NLERR("EtherType is less than minimum (type=%x, min=%x).\n",
530                                         ntohs(eth_type), ETH_P_802_3_MIN);
531                         return -EINVAL;
532                 }
533
534                 SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask);
535                 attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERTYPE);
536         } else if (!is_mask) {
537                 SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask);
538         }
539
540         if (attrs & (1ULL << OVS_KEY_ATTR_IPV4)) {
541                 const struct ovs_key_ipv4 *ipv4_key;
542
543                 ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]);
544                 if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) {
545                         OVS_NLERR("Unknown IPv4 fragment type (value=%d, max=%d).\n",
546                                 ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX);
547                         return -EINVAL;
548                 }
549                 SW_FLOW_KEY_PUT(match, ip.proto,
550                                 ipv4_key->ipv4_proto, is_mask);
551                 SW_FLOW_KEY_PUT(match, ip.tos,
552                                 ipv4_key->ipv4_tos, is_mask);
553                 SW_FLOW_KEY_PUT(match, ip.ttl,
554                                 ipv4_key->ipv4_ttl, is_mask);
555                 SW_FLOW_KEY_PUT(match, ip.frag,
556                                 ipv4_key->ipv4_frag, is_mask);
557                 SW_FLOW_KEY_PUT(match, ipv4.addr.src,
558                                 ipv4_key->ipv4_src, is_mask);
559                 SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
560                                 ipv4_key->ipv4_dst, is_mask);
561                 attrs &= ~(1ULL << OVS_KEY_ATTR_IPV4);
562         }
563
564         if (attrs & (1ULL << OVS_KEY_ATTR_IPV6)) {
565                 const struct ovs_key_ipv6 *ipv6_key;
566
567                 ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]);
568                 if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) {
569                         OVS_NLERR("Unknown IPv6 fragment type (value=%d, max=%d).\n",
570                                 ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX);
571                         return -EINVAL;
572                 }
573                 SW_FLOW_KEY_PUT(match, ipv6.label,
574                                 ipv6_key->ipv6_label, is_mask);
575                 SW_FLOW_KEY_PUT(match, ip.proto,
576                                 ipv6_key->ipv6_proto, is_mask);
577                 SW_FLOW_KEY_PUT(match, ip.tos,
578                                 ipv6_key->ipv6_tclass, is_mask);
579                 SW_FLOW_KEY_PUT(match, ip.ttl,
580                                 ipv6_key->ipv6_hlimit, is_mask);
581                 SW_FLOW_KEY_PUT(match, ip.frag,
582                                 ipv6_key->ipv6_frag, is_mask);
583                 SW_FLOW_KEY_MEMCPY(match, ipv6.addr.src,
584                                 ipv6_key->ipv6_src,
585                                 sizeof(match->key->ipv6.addr.src),
586                                 is_mask);
587                 SW_FLOW_KEY_MEMCPY(match, ipv6.addr.dst,
588                                 ipv6_key->ipv6_dst,
589                                 sizeof(match->key->ipv6.addr.dst),
590                                 is_mask);
591
592                 attrs &= ~(1ULL << OVS_KEY_ATTR_IPV6);
593         }
594
595         if (attrs & (1ULL << OVS_KEY_ATTR_ARP)) {
596                 const struct ovs_key_arp *arp_key;
597
598                 arp_key = nla_data(a[OVS_KEY_ATTR_ARP]);
599                 if (!is_mask && (arp_key->arp_op & htons(0xff00))) {
600                         OVS_NLERR("Unknown ARP opcode (opcode=%d).\n",
601                                   arp_key->arp_op);
602                         return -EINVAL;
603                 }
604
605                 SW_FLOW_KEY_PUT(match, ipv4.addr.src,
606                                 arp_key->arp_sip, is_mask);
607                 SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
608                         arp_key->arp_tip, is_mask);
609                 SW_FLOW_KEY_PUT(match, ip.proto,
610                                 ntohs(arp_key->arp_op), is_mask);
611                 SW_FLOW_KEY_MEMCPY(match, ipv4.arp.sha,
612                                 arp_key->arp_sha, ETH_ALEN, is_mask);
613                 SW_FLOW_KEY_MEMCPY(match, ipv4.arp.tha,
614                                 arp_key->arp_tha, ETH_ALEN, is_mask);
615
616                 attrs &= ~(1ULL << OVS_KEY_ATTR_ARP);
617         }
618
619         if (attrs & (1ULL << OVS_KEY_ATTR_TCP)) {
620                 const struct ovs_key_tcp *tcp_key;
621
622                 tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
623                 if (orig_attrs & (1ULL << OVS_KEY_ATTR_IPV4)) {
624                         SW_FLOW_KEY_PUT(match, ipv4.tp.src,
625                                         tcp_key->tcp_src, is_mask);
626                         SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
627                                         tcp_key->tcp_dst, is_mask);
628                 } else {
629                         SW_FLOW_KEY_PUT(match, ipv6.tp.src,
630                                         tcp_key->tcp_src, is_mask);
631                         SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
632                                         tcp_key->tcp_dst, is_mask);
633                 }
634                 attrs &= ~(1ULL << OVS_KEY_ATTR_TCP);
635         }
636
637         if (attrs & (1ULL << OVS_KEY_ATTR_UDP)) {
638                 const struct ovs_key_udp *udp_key;
639
640                 udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
641                 if (orig_attrs & (1ULL << OVS_KEY_ATTR_IPV4)) {
642                         SW_FLOW_KEY_PUT(match, ipv4.tp.src,
643                                         udp_key->udp_src, is_mask);
644                         SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
645                                         udp_key->udp_dst, is_mask);
646                 } else {
647                         SW_FLOW_KEY_PUT(match, ipv6.tp.src,
648                                         udp_key->udp_src, is_mask);
649                         SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
650                                         udp_key->udp_dst, is_mask);
651                 }
652                 attrs &= ~(1ULL << OVS_KEY_ATTR_UDP);
653         }
654
655         if (attrs & (1ULL << OVS_KEY_ATTR_SCTP)) {
656                 const struct ovs_key_sctp *sctp_key;
657
658                 sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]);
659                 if (orig_attrs & (1ULL << OVS_KEY_ATTR_IPV4)) {
660                         SW_FLOW_KEY_PUT(match, ipv4.tp.src,
661                                         sctp_key->sctp_src, is_mask);
662                         SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
663                                         sctp_key->sctp_dst, is_mask);
664                 } else {
665                         SW_FLOW_KEY_PUT(match, ipv6.tp.src,
666                                         sctp_key->sctp_src, is_mask);
667                         SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
668                                         sctp_key->sctp_dst, is_mask);
669                 }
670                 attrs &= ~(1ULL << OVS_KEY_ATTR_SCTP);
671         }
672
673         if (attrs & (1ULL << OVS_KEY_ATTR_ICMP)) {
674                 const struct ovs_key_icmp *icmp_key;
675
676                 icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]);
677                 SW_FLOW_KEY_PUT(match, ipv4.tp.src,
678                                 htons(icmp_key->icmp_type), is_mask);
679                 SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
680                                 htons(icmp_key->icmp_code), is_mask);
681                 attrs &= ~(1ULL << OVS_KEY_ATTR_ICMP);
682         }
683
684         if (attrs & (1ULL << OVS_KEY_ATTR_ICMPV6)) {
685                 const struct ovs_key_icmpv6 *icmpv6_key;
686
687                 icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]);
688                 SW_FLOW_KEY_PUT(match, ipv6.tp.src,
689                                 htons(icmpv6_key->icmpv6_type), is_mask);
690                 SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
691                                 htons(icmpv6_key->icmpv6_code), is_mask);
692                 attrs &= ~(1ULL << OVS_KEY_ATTR_ICMPV6);
693         }
694
695         if (attrs & (1ULL << OVS_KEY_ATTR_ND)) {
696                 const struct ovs_key_nd *nd_key;
697
698                 nd_key = nla_data(a[OVS_KEY_ATTR_ND]);
699                 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.target,
700                         nd_key->nd_target,
701                         sizeof(match->key->ipv6.nd.target),
702                         is_mask);
703                 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.sll,
704                         nd_key->nd_sll, ETH_ALEN, is_mask);
705                 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.tll,
706                                 nd_key->nd_tll, ETH_ALEN, is_mask);
707                 attrs &= ~(1ULL << OVS_KEY_ATTR_ND);
708         }
709
710         if (attrs != 0)
711                 return -EINVAL;
712
713         return 0;
714 }
715
716 static void sw_flow_mask_set(struct sw_flow_mask *mask,
717                              struct sw_flow_key_range *range, u8 val)
718 {
719         u8 *m = (u8 *)&mask->key + range->start;
720
721         mask->range = *range;
722         memset(m, val, range_n_bytes(range));
723 }
724
725 /**
726  * ovs_nla_get_match - parses Netlink attributes into a flow key and
727  * mask. In case the 'mask' is NULL, the flow is treated as exact match
728  * flow. Otherwise, it is treated as a wildcarded flow, except the mask
729  * does not include any don't care bit.
730  * @match: receives the extracted flow match information.
731  * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
732  * sequence. The fields should of the packet that triggered the creation
733  * of this flow.
734  * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink
735  * attribute specifies the mask field of the wildcarded flow.
736  */
737 int ovs_nla_get_match(struct sw_flow_match *match,
738                       const struct nlattr *key,
739                       const struct nlattr *mask)
740 {
741         const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
742         const struct nlattr *encap;
743         u64 key_attrs = 0;
744         u64 mask_attrs = 0;
745         bool encap_valid = false;
746         int err;
747
748         err = parse_flow_nlattrs(key, a, &key_attrs);
749         if (err)
750                 return err;
751
752         if ((key_attrs & (1ULL << OVS_KEY_ATTR_ETHERNET)) &&
753             (key_attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE)) &&
754             (nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q))) {
755                 __be16 tci;
756
757                 if (!((key_attrs & (1ULL << OVS_KEY_ATTR_VLAN)) &&
758                       (key_attrs & (1ULL << OVS_KEY_ATTR_ENCAP)))) {
759                         OVS_NLERR("Invalid Vlan frame.\n");
760                         return -EINVAL;
761                 }
762
763                 key_attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERTYPE);
764                 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
765                 encap = a[OVS_KEY_ATTR_ENCAP];
766                 key_attrs &= ~(1ULL << OVS_KEY_ATTR_ENCAP);
767                 encap_valid = true;
768
769                 if (tci & htons(VLAN_TAG_PRESENT)) {
770                         err = parse_flow_nlattrs(encap, a, &key_attrs);
771                         if (err)
772                                 return err;
773                 } else if (!tci) {
774                         /* Corner case for truncated 802.1Q header. */
775                         if (nla_len(encap)) {
776                                 OVS_NLERR("Truncated 802.1Q header has non-zero encap attribute.\n");
777                                 return -EINVAL;
778                         }
779                 } else {
780                         OVS_NLERR("Encap attribute is set for a non-VLAN frame.\n");
781                         return  -EINVAL;
782                 }
783         }
784
785         err = ovs_key_from_nlattrs(match, key_attrs, a, false);
786         if (err)
787                 return err;
788
789         if (mask) {
790                 err = parse_flow_mask_nlattrs(mask, a, &mask_attrs);
791                 if (err)
792                         return err;
793
794                 if (mask_attrs & 1ULL << OVS_KEY_ATTR_ENCAP)  {
795                         __be16 eth_type = 0;
796                         __be16 tci = 0;
797
798                         if (!encap_valid) {
799                                 OVS_NLERR("Encap mask attribute is set for non-VLAN frame.\n");
800                                 return  -EINVAL;
801                         }
802
803                         mask_attrs &= ~(1ULL << OVS_KEY_ATTR_ENCAP);
804                         if (a[OVS_KEY_ATTR_ETHERTYPE])
805                                 eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
806
807                         if (eth_type == htons(0xffff)) {
808                                 mask_attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERTYPE);
809                                 encap = a[OVS_KEY_ATTR_ENCAP];
810                                 err = parse_flow_mask_nlattrs(encap, a, &mask_attrs);
811                         } else {
812                                 OVS_NLERR("VLAN frames must have an exact match on the TPID (mask=%x).\n",
813                                                 ntohs(eth_type));
814                                 return -EINVAL;
815                         }
816
817                         if (a[OVS_KEY_ATTR_VLAN])
818                                 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
819
820                         if (!(tci & htons(VLAN_TAG_PRESENT))) {
821                                 OVS_NLERR("VLAN tag present bit must have an exact match (tci_mask=%x).\n", ntohs(tci));
822                                 return -EINVAL;
823                         }
824                 }
825
826                 err = ovs_key_from_nlattrs(match, mask_attrs, a, true);
827                 if (err)
828                         return err;
829         } else {
830                 /* Populate exact match flow's key mask. */
831                 if (match->mask)
832                         sw_flow_mask_set(match->mask, &match->range, 0xff);
833         }
834
835         if (!match_validate(match, key_attrs, mask_attrs))
836                 return -EINVAL;
837
838         return 0;
839 }
840
841 /**
842  * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key.
843  * @flow: Receives extracted in_port, priority, tun_key and skb_mark.
844  * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
845  * sequence.
846  *
847  * This parses a series of Netlink attributes that form a flow key, which must
848  * take the same form accepted by flow_from_nlattrs(), but only enough of it to
849  * get the metadata, that is, the parts of the flow key that cannot be
850  * extracted from the packet itself.
851  */
852
853 int ovs_nla_get_flow_metadata(struct sw_flow *flow,
854                               const struct nlattr *attr)
855 {
856         struct ovs_key_ipv4_tunnel *tun_key = &flow->key.tun_key;
857         const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
858         u64 attrs = 0;
859         int err;
860         struct sw_flow_match match;
861
862         flow->key.phy.in_port = DP_MAX_PORTS;
863         flow->key.phy.priority = 0;
864         flow->key.phy.skb_mark = 0;
865         memset(tun_key, 0, sizeof(flow->key.tun_key));
866
867         err = parse_flow_nlattrs(attr, a, &attrs);
868         if (err)
869                 return -EINVAL;
870
871         memset(&match, 0, sizeof(match));
872         match.key = &flow->key;
873
874         err = metadata_from_nlattrs(&match, &attrs, a, false);
875         if (err)
876                 return err;
877
878         return 0;
879 }
880
881 int ovs_nla_put_flow(const struct sw_flow_key *swkey,
882                      const struct sw_flow_key *output, struct sk_buff *skb)
883 {
884         struct ovs_key_ethernet *eth_key;
885         struct nlattr *nla, *encap;
886         bool is_mask = (swkey != output);
887
888         if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
889                 goto nla_put_failure;
890
891         if ((swkey->tun_key.ipv4_dst || is_mask) &&
892             ipv4_tun_to_nlattr(skb, &swkey->tun_key, &output->tun_key))
893                 goto nla_put_failure;
894
895         if (swkey->phy.in_port == DP_MAX_PORTS) {
896                 if (is_mask && (output->phy.in_port == 0xffff))
897                         if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff))
898                                 goto nla_put_failure;
899         } else {
900                 u16 upper_u16;
901                 upper_u16 = !is_mask ? 0 : 0xffff;
902
903                 if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT,
904                                 (upper_u16 << 16) | output->phy.in_port))
905                         goto nla_put_failure;
906         }
907
908         if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark))
909                 goto nla_put_failure;
910
911         nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
912         if (!nla)
913                 goto nla_put_failure;
914
915         eth_key = nla_data(nla);
916         memcpy(eth_key->eth_src, output->eth.src, ETH_ALEN);
917         memcpy(eth_key->eth_dst, output->eth.dst, ETH_ALEN);
918
919         if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) {
920                 __be16 eth_type;
921                 eth_type = !is_mask ? htons(ETH_P_8021Q) : htons(0xffff);
922                 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) ||
923                     nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.tci))
924                         goto nla_put_failure;
925                 encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
926                 if (!swkey->eth.tci)
927                         goto unencap;
928         } else
929                 encap = NULL;
930
931         if (swkey->eth.type == htons(ETH_P_802_2)) {
932                 /*
933                  * Ethertype 802.2 is represented in the netlink with omitted
934                  * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and
935                  * 0xffff in the mask attribute.  Ethertype can also
936                  * be wildcarded.
937                  */
938                 if (is_mask && output->eth.type)
939                         if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE,
940                                                 output->eth.type))
941                                 goto nla_put_failure;
942                 goto unencap;
943         }
944
945         if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type))
946                 goto nla_put_failure;
947
948         if (swkey->eth.type == htons(ETH_P_IP)) {
949                 struct ovs_key_ipv4 *ipv4_key;
950
951                 nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key));
952                 if (!nla)
953                         goto nla_put_failure;
954                 ipv4_key = nla_data(nla);
955                 ipv4_key->ipv4_src = output->ipv4.addr.src;
956                 ipv4_key->ipv4_dst = output->ipv4.addr.dst;
957                 ipv4_key->ipv4_proto = output->ip.proto;
958                 ipv4_key->ipv4_tos = output->ip.tos;
959                 ipv4_key->ipv4_ttl = output->ip.ttl;
960                 ipv4_key->ipv4_frag = output->ip.frag;
961         } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
962                 struct ovs_key_ipv6 *ipv6_key;
963
964                 nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key));
965                 if (!nla)
966                         goto nla_put_failure;
967                 ipv6_key = nla_data(nla);
968                 memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src,
969                                 sizeof(ipv6_key->ipv6_src));
970                 memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst,
971                                 sizeof(ipv6_key->ipv6_dst));
972                 ipv6_key->ipv6_label = output->ipv6.label;
973                 ipv6_key->ipv6_proto = output->ip.proto;
974                 ipv6_key->ipv6_tclass = output->ip.tos;
975                 ipv6_key->ipv6_hlimit = output->ip.ttl;
976                 ipv6_key->ipv6_frag = output->ip.frag;
977         } else if (swkey->eth.type == htons(ETH_P_ARP) ||
978                    swkey->eth.type == htons(ETH_P_RARP)) {
979                 struct ovs_key_arp *arp_key;
980
981                 nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key));
982                 if (!nla)
983                         goto nla_put_failure;
984                 arp_key = nla_data(nla);
985                 memset(arp_key, 0, sizeof(struct ovs_key_arp));
986                 arp_key->arp_sip = output->ipv4.addr.src;
987                 arp_key->arp_tip = output->ipv4.addr.dst;
988                 arp_key->arp_op = htons(output->ip.proto);
989                 memcpy(arp_key->arp_sha, output->ipv4.arp.sha, ETH_ALEN);
990                 memcpy(arp_key->arp_tha, output->ipv4.arp.tha, ETH_ALEN);
991         }
992
993         if ((swkey->eth.type == htons(ETH_P_IP) ||
994              swkey->eth.type == htons(ETH_P_IPV6)) &&
995              swkey->ip.frag != OVS_FRAG_TYPE_LATER) {
996
997                 if (swkey->ip.proto == IPPROTO_TCP) {
998                         struct ovs_key_tcp *tcp_key;
999
1000                         nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key));
1001                         if (!nla)
1002                                 goto nla_put_failure;
1003                         tcp_key = nla_data(nla);
1004                         if (swkey->eth.type == htons(ETH_P_IP)) {
1005                                 tcp_key->tcp_src = output->ipv4.tp.src;
1006                                 tcp_key->tcp_dst = output->ipv4.tp.dst;
1007                         } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
1008                                 tcp_key->tcp_src = output->ipv6.tp.src;
1009                                 tcp_key->tcp_dst = output->ipv6.tp.dst;
1010                         }
1011                 } else if (swkey->ip.proto == IPPROTO_UDP) {
1012                         struct ovs_key_udp *udp_key;
1013
1014                         nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key));
1015                         if (!nla)
1016                                 goto nla_put_failure;
1017                         udp_key = nla_data(nla);
1018                         if (swkey->eth.type == htons(ETH_P_IP)) {
1019                                 udp_key->udp_src = output->ipv4.tp.src;
1020                                 udp_key->udp_dst = output->ipv4.tp.dst;
1021                         } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
1022                                 udp_key->udp_src = output->ipv6.tp.src;
1023                                 udp_key->udp_dst = output->ipv6.tp.dst;
1024                         }
1025                 } else if (swkey->ip.proto == IPPROTO_SCTP) {
1026                         struct ovs_key_sctp *sctp_key;
1027
1028                         nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key));
1029                         if (!nla)
1030                                 goto nla_put_failure;
1031                         sctp_key = nla_data(nla);
1032                         if (swkey->eth.type == htons(ETH_P_IP)) {
1033                                 sctp_key->sctp_src = swkey->ipv4.tp.src;
1034                                 sctp_key->sctp_dst = swkey->ipv4.tp.dst;
1035                         } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
1036                                 sctp_key->sctp_src = swkey->ipv6.tp.src;
1037                                 sctp_key->sctp_dst = swkey->ipv6.tp.dst;
1038                         }
1039                 } else if (swkey->eth.type == htons(ETH_P_IP) &&
1040                            swkey->ip.proto == IPPROTO_ICMP) {
1041                         struct ovs_key_icmp *icmp_key;
1042
1043                         nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key));
1044                         if (!nla)
1045                                 goto nla_put_failure;
1046                         icmp_key = nla_data(nla);
1047                         icmp_key->icmp_type = ntohs(output->ipv4.tp.src);
1048                         icmp_key->icmp_code = ntohs(output->ipv4.tp.dst);
1049                 } else if (swkey->eth.type == htons(ETH_P_IPV6) &&
1050                            swkey->ip.proto == IPPROTO_ICMPV6) {
1051                         struct ovs_key_icmpv6 *icmpv6_key;
1052
1053                         nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6,
1054                                                 sizeof(*icmpv6_key));
1055                         if (!nla)
1056                                 goto nla_put_failure;
1057                         icmpv6_key = nla_data(nla);
1058                         icmpv6_key->icmpv6_type = ntohs(output->ipv6.tp.src);
1059                         icmpv6_key->icmpv6_code = ntohs(output->ipv6.tp.dst);
1060
1061                         if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION ||
1062                             icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) {
1063                                 struct ovs_key_nd *nd_key;
1064
1065                                 nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key));
1066                                 if (!nla)
1067                                         goto nla_put_failure;
1068                                 nd_key = nla_data(nla);
1069                                 memcpy(nd_key->nd_target, &output->ipv6.nd.target,
1070                                                         sizeof(nd_key->nd_target));
1071                                 memcpy(nd_key->nd_sll, output->ipv6.nd.sll, ETH_ALEN);
1072                                 memcpy(nd_key->nd_tll, output->ipv6.nd.tll, ETH_ALEN);
1073                         }
1074                 }
1075         }
1076
1077 unencap:
1078         if (encap)
1079                 nla_nest_end(skb, encap);
1080
1081         return 0;
1082
1083 nla_put_failure:
1084         return -EMSGSIZE;
1085 }
1086
1087 #define MAX_ACTIONS_BUFSIZE     (32 * 1024)
1088
1089 struct sw_flow_actions *ovs_nla_alloc_flow_actions(int size)
1090 {
1091         struct sw_flow_actions *sfa;
1092
1093         if (size > MAX_ACTIONS_BUFSIZE)
1094                 return ERR_PTR(-EINVAL);
1095
1096         sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL);
1097         if (!sfa)
1098                 return ERR_PTR(-ENOMEM);
1099
1100         sfa->actions_len = 0;
1101         return sfa;
1102 }
1103
1104 /* RCU callback used by ovs_nla_free_flow_actions. */
1105 static void rcu_free_acts_callback(struct rcu_head *rcu)
1106 {
1107         struct sw_flow_actions *sf_acts = container_of(rcu,
1108                         struct sw_flow_actions, rcu);
1109         kfree(sf_acts);
1110 }
1111
1112 /* Schedules 'sf_acts' to be freed after the next RCU grace period.
1113  * The caller must hold rcu_read_lock for this to be sensible. */
1114 void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts)
1115 {
1116         call_rcu(&sf_acts->rcu, rcu_free_acts_callback);
1117 }
1118
1119 static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
1120                                        int attr_len)
1121 {
1122
1123         struct sw_flow_actions *acts;
1124         int new_acts_size;
1125         int req_size = NLA_ALIGN(attr_len);
1126         int next_offset = offsetof(struct sw_flow_actions, actions) +
1127                                         (*sfa)->actions_len;
1128
1129         if (req_size <= (ksize(*sfa) - next_offset))
1130                 goto out;
1131
1132         new_acts_size = ksize(*sfa) * 2;
1133
1134         if (new_acts_size > MAX_ACTIONS_BUFSIZE) {
1135                 if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size)
1136                         return ERR_PTR(-EMSGSIZE);
1137                 new_acts_size = MAX_ACTIONS_BUFSIZE;
1138         }
1139
1140         acts = ovs_nla_alloc_flow_actions(new_acts_size);
1141         if (IS_ERR(acts))
1142                 return (void *)acts;
1143
1144         memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len);
1145         acts->actions_len = (*sfa)->actions_len;
1146         kfree(*sfa);
1147         *sfa = acts;
1148
1149 out:
1150         (*sfa)->actions_len += req_size;
1151         return  (struct nlattr *) ((unsigned char *)(*sfa) + next_offset);
1152 }
1153
1154 static int add_action(struct sw_flow_actions **sfa, int attrtype, void *data, int len)
1155 {
1156         struct nlattr *a;
1157
1158         a = reserve_sfa_size(sfa, nla_attr_size(len));
1159         if (IS_ERR(a))
1160                 return PTR_ERR(a);
1161
1162         a->nla_type = attrtype;
1163         a->nla_len = nla_attr_size(len);
1164
1165         if (data)
1166                 memcpy(nla_data(a), data, len);
1167         memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len));
1168
1169         return 0;
1170 }
1171
1172 static inline int add_nested_action_start(struct sw_flow_actions **sfa,
1173                                           int attrtype)
1174 {
1175         int used = (*sfa)->actions_len;
1176         int err;
1177
1178         err = add_action(sfa, attrtype, NULL, 0);
1179         if (err)
1180                 return err;
1181
1182         return used;
1183 }
1184
1185 static inline void add_nested_action_end(struct sw_flow_actions *sfa,
1186                                          int st_offset)
1187 {
1188         struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions +
1189                                                                st_offset);
1190
1191         a->nla_len = sfa->actions_len - st_offset;
1192 }
1193
1194 static int validate_and_copy_sample(const struct nlattr *attr,
1195                                     const struct sw_flow_key *key, int depth,
1196                                     struct sw_flow_actions **sfa)
1197 {
1198         const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
1199         const struct nlattr *probability, *actions;
1200         const struct nlattr *a;
1201         int rem, start, err, st_acts;
1202
1203         memset(attrs, 0, sizeof(attrs));
1204         nla_for_each_nested(a, attr, rem) {
1205                 int type = nla_type(a);
1206                 if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type])
1207                         return -EINVAL;
1208                 attrs[type] = a;
1209         }
1210         if (rem)
1211                 return -EINVAL;
1212
1213         probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY];
1214         if (!probability || nla_len(probability) != sizeof(u32))
1215                 return -EINVAL;
1216
1217         actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
1218         if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
1219                 return -EINVAL;
1220
1221         /* validation done, copy sample action. */
1222         start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE);
1223         if (start < 0)
1224                 return start;
1225         err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY,
1226                          nla_data(probability), sizeof(u32));
1227         if (err)
1228                 return err;
1229         st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS);
1230         if (st_acts < 0)
1231                 return st_acts;
1232
1233         err = ovs_nla_copy_actions(actions, key, depth + 1, sfa);
1234         if (err)
1235                 return err;
1236
1237         add_nested_action_end(*sfa, st_acts);
1238         add_nested_action_end(*sfa, start);
1239
1240         return 0;
1241 }
1242
1243 static int validate_tp_port(const struct sw_flow_key *flow_key)
1244 {
1245         if (flow_key->eth.type == htons(ETH_P_IP)) {
1246                 if (flow_key->ipv4.tp.src || flow_key->ipv4.tp.dst)
1247                         return 0;
1248         } else if (flow_key->eth.type == htons(ETH_P_IPV6)) {
1249                 if (flow_key->ipv6.tp.src || flow_key->ipv6.tp.dst)
1250                         return 0;
1251         }
1252
1253         return -EINVAL;
1254 }
1255
1256 void ovs_match_init(struct sw_flow_match *match,
1257                     struct sw_flow_key *key,
1258                     struct sw_flow_mask *mask)
1259 {
1260         memset(match, 0, sizeof(*match));
1261         match->key = key;
1262         match->mask = mask;
1263
1264         memset(key, 0, sizeof(*key));
1265
1266         if (mask) {
1267                 memset(&mask->key, 0, sizeof(mask->key));
1268                 mask->range.start = mask->range.end = 0;
1269         }
1270 }
1271
1272 static int validate_and_copy_set_tun(const struct nlattr *attr,
1273                                      struct sw_flow_actions **sfa)
1274 {
1275         struct sw_flow_match match;
1276         struct sw_flow_key key;
1277         int err, start;
1278
1279         ovs_match_init(&match, &key, NULL);
1280         err = ipv4_tun_from_nlattr(nla_data(attr), &match, false);
1281         if (err)
1282                 return err;
1283
1284         start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET);
1285         if (start < 0)
1286                 return start;
1287
1288         err = add_action(sfa, OVS_KEY_ATTR_IPV4_TUNNEL, &match.key->tun_key,
1289                         sizeof(match.key->tun_key));
1290         add_nested_action_end(*sfa, start);
1291
1292         return err;
1293 }
1294
1295 static int validate_set(const struct nlattr *a,
1296                         const struct sw_flow_key *flow_key,
1297                         struct sw_flow_actions **sfa,
1298                         bool *set_tun)
1299 {
1300         const struct nlattr *ovs_key = nla_data(a);
1301         int key_type = nla_type(ovs_key);
1302
1303         /* There can be only one key in a action */
1304         if (nla_total_size(nla_len(ovs_key)) != nla_len(a))
1305                 return -EINVAL;
1306
1307         if (key_type > OVS_KEY_ATTR_MAX ||
1308             (ovs_key_lens[key_type] != nla_len(ovs_key) &&
1309              ovs_key_lens[key_type] != -1))
1310                 return -EINVAL;
1311
1312         switch (key_type) {
1313         const struct ovs_key_ipv4 *ipv4_key;
1314         const struct ovs_key_ipv6 *ipv6_key;
1315         int err;
1316
1317         case OVS_KEY_ATTR_PRIORITY:
1318         case OVS_KEY_ATTR_SKB_MARK:
1319         case OVS_KEY_ATTR_ETHERNET:
1320                 break;
1321
1322         case OVS_KEY_ATTR_TUNNEL:
1323                 *set_tun = true;
1324                 err = validate_and_copy_set_tun(a, sfa);
1325                 if (err)
1326                         return err;
1327                 break;
1328
1329         case OVS_KEY_ATTR_IPV4:
1330                 if (flow_key->eth.type != htons(ETH_P_IP))
1331                         return -EINVAL;
1332
1333                 if (!flow_key->ip.proto)
1334                         return -EINVAL;
1335
1336                 ipv4_key = nla_data(ovs_key);
1337                 if (ipv4_key->ipv4_proto != flow_key->ip.proto)
1338                         return -EINVAL;
1339
1340                 if (ipv4_key->ipv4_frag != flow_key->ip.frag)
1341                         return -EINVAL;
1342
1343                 break;
1344
1345         case OVS_KEY_ATTR_IPV6:
1346                 if (flow_key->eth.type != htons(ETH_P_IPV6))
1347                         return -EINVAL;
1348
1349                 if (!flow_key->ip.proto)
1350                         return -EINVAL;
1351
1352                 ipv6_key = nla_data(ovs_key);
1353                 if (ipv6_key->ipv6_proto != flow_key->ip.proto)
1354                         return -EINVAL;
1355
1356                 if (ipv6_key->ipv6_frag != flow_key->ip.frag)
1357                         return -EINVAL;
1358
1359                 if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000)
1360                         return -EINVAL;
1361
1362                 break;
1363
1364         case OVS_KEY_ATTR_TCP:
1365                 if (flow_key->ip.proto != IPPROTO_TCP)
1366                         return -EINVAL;
1367
1368                 return validate_tp_port(flow_key);
1369
1370         case OVS_KEY_ATTR_UDP:
1371                 if (flow_key->ip.proto != IPPROTO_UDP)
1372                         return -EINVAL;
1373
1374                 return validate_tp_port(flow_key);
1375
1376         case OVS_KEY_ATTR_SCTP:
1377                 if (flow_key->ip.proto != IPPROTO_SCTP)
1378                         return -EINVAL;
1379
1380                 return validate_tp_port(flow_key);
1381
1382         default:
1383                 return -EINVAL;
1384         }
1385
1386         return 0;
1387 }
1388
1389 static int validate_userspace(const struct nlattr *attr)
1390 {
1391         static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = {
1392                 [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 },
1393                 [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC },
1394         };
1395         struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
1396         int error;
1397
1398         error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX,
1399                                  attr, userspace_policy);
1400         if (error)
1401                 return error;
1402
1403         if (!a[OVS_USERSPACE_ATTR_PID] ||
1404             !nla_get_u32(a[OVS_USERSPACE_ATTR_PID]))
1405                 return -EINVAL;
1406
1407         return 0;
1408 }
1409
1410 static int copy_action(const struct nlattr *from,
1411                        struct sw_flow_actions **sfa)
1412 {
1413         int totlen = NLA_ALIGN(from->nla_len);
1414         struct nlattr *to;
1415
1416         to = reserve_sfa_size(sfa, from->nla_len);
1417         if (IS_ERR(to))
1418                 return PTR_ERR(to);
1419
1420         memcpy(to, from, totlen);
1421         return 0;
1422 }
1423
1424 int ovs_nla_copy_actions(const struct nlattr *attr,
1425                          const struct sw_flow_key *key,
1426                          int depth,
1427                          struct sw_flow_actions **sfa)
1428 {
1429         const struct nlattr *a;
1430         int rem, err;
1431
1432         if (depth >= SAMPLE_ACTION_DEPTH)
1433                 return -EOVERFLOW;
1434
1435         nla_for_each_nested(a, attr, rem) {
1436                 /* Expected argument lengths, (u32)-1 for variable length. */
1437                 static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
1438                         [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
1439                         [OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
1440                         [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
1441                         [OVS_ACTION_ATTR_POP_VLAN] = 0,
1442                         [OVS_ACTION_ATTR_SET] = (u32)-1,
1443                         [OVS_ACTION_ATTR_SAMPLE] = (u32)-1
1444                 };
1445                 const struct ovs_action_push_vlan *vlan;
1446                 int type = nla_type(a);
1447                 bool skip_copy;
1448
1449                 if (type > OVS_ACTION_ATTR_MAX ||
1450                     (action_lens[type] != nla_len(a) &&
1451                      action_lens[type] != (u32)-1))
1452                         return -EINVAL;
1453
1454                 skip_copy = false;
1455                 switch (type) {
1456                 case OVS_ACTION_ATTR_UNSPEC:
1457                         return -EINVAL;
1458
1459                 case OVS_ACTION_ATTR_USERSPACE:
1460                         err = validate_userspace(a);
1461                         if (err)
1462                                 return err;
1463                         break;
1464
1465                 case OVS_ACTION_ATTR_OUTPUT:
1466                         if (nla_get_u32(a) >= DP_MAX_PORTS)
1467                                 return -EINVAL;
1468                         break;
1469
1470
1471                 case OVS_ACTION_ATTR_POP_VLAN:
1472                         break;
1473
1474                 case OVS_ACTION_ATTR_PUSH_VLAN:
1475                         vlan = nla_data(a);
1476                         if (vlan->vlan_tpid != htons(ETH_P_8021Q))
1477                                 return -EINVAL;
1478                         if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT)))
1479                                 return -EINVAL;
1480                         break;
1481
1482                 case OVS_ACTION_ATTR_SET:
1483                         err = validate_set(a, key, sfa, &skip_copy);
1484                         if (err)
1485                                 return err;
1486                         break;
1487
1488                 case OVS_ACTION_ATTR_SAMPLE:
1489                         err = validate_and_copy_sample(a, key, depth, sfa);
1490                         if (err)
1491                                 return err;
1492                         skip_copy = true;
1493                         break;
1494
1495                 default:
1496                         return -EINVAL;
1497                 }
1498                 if (!skip_copy) {
1499                         err = copy_action(a, sfa);
1500                         if (err)
1501                                 return err;
1502                 }
1503         }
1504
1505         if (rem > 0)
1506                 return -EINVAL;
1507
1508         return 0;
1509 }
1510
1511 static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb)
1512 {
1513         const struct nlattr *a;
1514         struct nlattr *start;
1515         int err = 0, rem;
1516
1517         start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE);
1518         if (!start)
1519                 return -EMSGSIZE;
1520
1521         nla_for_each_nested(a, attr, rem) {
1522                 int type = nla_type(a);
1523                 struct nlattr *st_sample;
1524
1525                 switch (type) {
1526                 case OVS_SAMPLE_ATTR_PROBABILITY:
1527                         if (nla_put(skb, OVS_SAMPLE_ATTR_PROBABILITY,
1528                                     sizeof(u32), nla_data(a)))
1529                                 return -EMSGSIZE;
1530                         break;
1531                 case OVS_SAMPLE_ATTR_ACTIONS:
1532                         st_sample = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS);
1533                         if (!st_sample)
1534                                 return -EMSGSIZE;
1535                         err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb);
1536                         if (err)
1537                                 return err;
1538                         nla_nest_end(skb, st_sample);
1539                         break;
1540                 }
1541         }
1542
1543         nla_nest_end(skb, start);
1544         return err;
1545 }
1546
1547 static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
1548 {
1549         const struct nlattr *ovs_key = nla_data(a);
1550         int key_type = nla_type(ovs_key);
1551         struct nlattr *start;
1552         int err;
1553
1554         switch (key_type) {
1555         case OVS_KEY_ATTR_IPV4_TUNNEL:
1556                 start = nla_nest_start(skb, OVS_ACTION_ATTR_SET);
1557                 if (!start)
1558                         return -EMSGSIZE;
1559
1560                 err = ipv4_tun_to_nlattr(skb, nla_data(ovs_key),
1561                                              nla_data(ovs_key));
1562                 if (err)
1563                         return err;
1564                 nla_nest_end(skb, start);
1565                 break;
1566         default:
1567                 if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key))
1568                         return -EMSGSIZE;
1569                 break;
1570         }
1571
1572         return 0;
1573 }
1574
1575 int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
1576 {
1577         const struct nlattr *a;
1578         int rem, err;
1579
1580         nla_for_each_attr(a, attr, len, rem) {
1581                 int type = nla_type(a);
1582
1583                 switch (type) {
1584                 case OVS_ACTION_ATTR_SET:
1585                         err = set_action_to_attr(a, skb);
1586                         if (err)
1587                                 return err;
1588                         break;
1589
1590                 case OVS_ACTION_ATTR_SAMPLE:
1591                         err = sample_action_to_attr(a, skb);
1592                         if (err)
1593                                 return err;
1594                         break;
1595                 default:
1596                         if (nla_put(skb, type, nla_len(a), nla_data(a)))
1597                                 return -EMSGSIZE;
1598                         break;
1599                 }
1600         }
1601
1602         return 0;
1603 }