Strip down vport interface : iflink
[sliver-openvswitch.git] / datapath / datapath.c
1 /*
2  * Copyright (c) 2007, 2008, 2009, 2010, 2011 Nicira Networks.
3  * Distributed under the terms of the GNU GPL version 2.
4  *
5  * Significant portions of this file may be copied from parts of the Linux
6  * kernel, by Linus Torvalds and others.
7  */
8
9 /* Functions for managing the dp interface/device. */
10
11 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12
13 #include <linux/init.h>
14 #include <linux/module.h>
15 #include <linux/if_arp.h>
16 #include <linux/if_vlan.h>
17 #include <linux/in.h>
18 #include <linux/ip.h>
19 #include <linux/jhash.h>
20 #include <linux/delay.h>
21 #include <linux/time.h>
22 #include <linux/etherdevice.h>
23 #include <linux/genetlink.h>
24 #include <linux/kernel.h>
25 #include <linux/kthread.h>
26 #include <linux/mutex.h>
27 #include <linux/percpu.h>
28 #include <linux/rcupdate.h>
29 #include <linux/tcp.h>
30 #include <linux/udp.h>
31 #include <linux/version.h>
32 #include <linux/ethtool.h>
33 #include <linux/wait.h>
34 #include <asm/system.h>
35 #include <asm/div64.h>
36 #include <asm/bug.h>
37 #include <linux/highmem.h>
38 #include <linux/netfilter_bridge.h>
39 #include <linux/netfilter_ipv4.h>
40 #include <linux/inetdevice.h>
41 #include <linux/list.h>
42 #include <linux/rculist.h>
43 #include <linux/dmi.h>
44 #include <net/inet_ecn.h>
45 #include <net/genetlink.h>
46
47 #include "openvswitch/datapath-protocol.h"
48 #include "checksum.h"
49 #include "datapath.h"
50 #include "actions.h"
51 #include "flow.h"
52 #include "table.h"
53 #include "vlan.h"
54 #include "vport-internal_dev.h"
55
56 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) || \
57     LINUX_VERSION_CODE >= KERNEL_VERSION(3,1,0)
58 #error Kernels before 2.6.18 or after 3.0 are not supported by this version of Open vSwitch.
59 #endif
60
61 int (*dp_ioctl_hook)(struct net_device *dev, struct ifreq *rq, int cmd);
62 EXPORT_SYMBOL(dp_ioctl_hook);
63
64 /**
65  * DOC: Locking:
66  *
67  * Writes to device state (add/remove datapath, port, set operations on vports,
68  * etc.) are protected by RTNL.
69  *
70  * Writes to other state (flow table modifications, set miscellaneous datapath
71  * parameters such as drop frags, etc.) are protected by genl_mutex.  The RTNL
72  * lock nests inside genl_mutex.
73  *
74  * Reads are protected by RCU.
75  *
76  * There are a few special cases (mostly stats) that have their own
77  * synchronization but they nest under all of above and don't interact with
78  * each other.
79  */
80
81 /* Global list of datapaths to enable dumping them all out.
82  * Protected by genl_mutex.
83  */
84 static LIST_HEAD(dps);
85
86 static struct vport *new_vport(const struct vport_parms *);
87 static int queue_userspace_packets(struct datapath *, struct sk_buff *,
88                                  const struct dp_upcall_info *);
89
90 /* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */
91 struct datapath *get_dp(int dp_ifindex)
92 {
93         struct datapath *dp = NULL;
94         struct net_device *dev;
95
96         rcu_read_lock();
97         dev = dev_get_by_index_rcu(&init_net, dp_ifindex);
98         if (dev) {
99                 struct vport *vport = internal_dev_get_vport(dev);
100                 if (vport)
101                         dp = vport->dp;
102         }
103         rcu_read_unlock();
104
105         return dp;
106 }
107 EXPORT_SYMBOL_GPL(get_dp);
108
109 /* Must be called with genl_mutex. */
110 static struct tbl *get_table_protected(struct datapath *dp)
111 {
112         return rcu_dereference_protected(dp->table, lockdep_genl_is_held());
113 }
114
115 /* Must be called with rcu_read_lock or RTNL lock. */
116 static struct vport *get_vport_protected(struct datapath *dp, u16 port_no)
117 {
118         return rcu_dereference_rtnl(dp->ports[port_no]);
119 }
120
121 /* Must be called with rcu_read_lock or RTNL lock. */
122 const char *dp_name(const struct datapath *dp)
123 {
124         return vport_get_name(rcu_dereference_rtnl(dp->ports[OVSP_LOCAL]));
125 }
126
127 static inline size_t br_nlmsg_size(void)
128 {
129         return NLMSG_ALIGN(sizeof(struct ifinfomsg))
130                + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
131                + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
132                + nla_total_size(4) /* IFLA_MASTER */
133                + nla_total_size(4) /* IFLA_MTU */
134                + nla_total_size(4) /* IFLA_LINK */
135                + nla_total_size(1); /* IFLA_OPERSTATE */
136 }
137
138 /* Caller must hold RTNL lock. */
139 static int dp_fill_ifinfo(struct sk_buff *skb,
140                           const struct vport *port,
141                           int event, unsigned int flags)
142 {
143         struct datapath *dp = port->dp;
144         int ifindex = vport_get_ifindex(port);
145         struct ifinfomsg *hdr;
146         struct nlmsghdr *nlh;
147
148         if (ifindex < 0)
149                 return ifindex;
150
151         nlh = nlmsg_put(skb, 0, 0, event, sizeof(*hdr), flags);
152         if (nlh == NULL)
153                 return -EMSGSIZE;
154
155         hdr = nlmsg_data(nlh);
156         hdr->ifi_family = AF_BRIDGE;
157         hdr->__ifi_pad = 0;
158         hdr->ifi_type = ARPHRD_ETHER;
159         hdr->ifi_index = ifindex;
160         hdr->ifi_flags = vport_get_flags(port);
161         hdr->ifi_change = 0;
162
163         NLA_PUT_STRING(skb, IFLA_IFNAME, vport_get_name(port));
164         NLA_PUT_U32(skb, IFLA_MASTER,
165                 vport_get_ifindex(get_vport_protected(dp, OVSP_LOCAL)));
166         NLA_PUT_U32(skb, IFLA_MTU, vport_get_mtu(port));
167 #ifdef IFLA_OPERSTATE
168         NLA_PUT_U8(skb, IFLA_OPERSTATE,
169                    vport_is_running(port)
170                         ? vport_get_operstate(port)
171                         : IF_OPER_DOWN);
172 #endif
173
174         NLA_PUT(skb, IFLA_ADDRESS, ETH_ALEN, vport_get_addr(port));
175
176         return nlmsg_end(skb, nlh);
177
178 nla_put_failure:
179         nlmsg_cancel(skb, nlh);
180         return -EMSGSIZE;
181 }
182
183 /* Caller must hold RTNL lock. */
184 static void dp_ifinfo_notify(int event, struct vport *port)
185 {
186         struct sk_buff *skb;
187         int err = -ENOBUFS;
188
189         skb = nlmsg_new(br_nlmsg_size(), GFP_KERNEL);
190         if (skb == NULL)
191                 goto errout;
192
193         err = dp_fill_ifinfo(skb, port, event, 0);
194         if (err < 0) {
195                 /* -EMSGSIZE implies BUG in br_nlmsg_size() */
196                 WARN_ON(err == -EMSGSIZE);
197                 kfree_skb(skb);
198                 goto errout;
199         }
200         rtnl_notify(skb, &init_net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
201         return;
202 errout:
203         if (err < 0)
204                 rtnl_set_sk_err(&init_net, RTNLGRP_LINK, err);
205 }
206
207 static void release_dp(struct kobject *kobj)
208 {
209         struct datapath *dp = container_of(kobj, struct datapath, ifobj);
210         kfree(dp);
211 }
212
213 static struct kobj_type dp_ktype = {
214         .release = release_dp
215 };
216
217 static void destroy_dp_rcu(struct rcu_head *rcu)
218 {
219         struct datapath *dp = container_of(rcu, struct datapath, rcu);
220
221         tbl_destroy((struct tbl __force *)dp->table, flow_free_tbl);
222         free_percpu(dp->stats_percpu);
223         kobject_put(&dp->ifobj);
224 }
225
226 /* Called with RTNL lock and genl_lock. */
227 static struct vport *new_vport(const struct vport_parms *parms)
228 {
229         struct vport *vport;
230
231         vport = vport_add(parms);
232         if (!IS_ERR(vport)) {
233                 struct datapath *dp = parms->dp;
234
235                 rcu_assign_pointer(dp->ports[parms->port_no], vport);
236                 list_add(&vport->node, &dp->port_list);
237
238                 dp_ifinfo_notify(RTM_NEWLINK, vport);
239         }
240
241         return vport;
242 }
243
244 /* Called with RTNL lock. */
245 int dp_detach_port(struct vport *p)
246 {
247         ASSERT_RTNL();
248
249         if (p->port_no != OVSP_LOCAL)
250                 dp_sysfs_del_if(p);
251         dp_ifinfo_notify(RTM_DELLINK, p);
252
253         /* First drop references to device. */
254         list_del(&p->node);
255         rcu_assign_pointer(p->dp->ports[p->port_no], NULL);
256
257         /* Then destroy it. */
258         return vport_del(p);
259 }
260
261 /* Must be called with rcu_read_lock. */
262 void dp_process_received_packet(struct vport *p, struct sk_buff *skb)
263 {
264         struct datapath *dp = p->dp;
265         struct dp_stats_percpu *stats;
266         int stats_counter_off;
267         int error;
268
269         OVS_CB(skb)->vport = p;
270
271         if (!OVS_CB(skb)->flow) {
272                 struct sw_flow_key key;
273                 struct tbl_node *flow_node;
274                 int key_len;
275                 bool is_frag;
276
277                 /* Extract flow from 'skb' into 'key'. */
278                 error = flow_extract(skb, p->port_no, &key, &key_len, &is_frag);
279                 if (unlikely(error)) {
280                         kfree_skb(skb);
281                         return;
282                 }
283
284                 if (is_frag && dp->drop_frags) {
285                         consume_skb(skb);
286                         stats_counter_off = offsetof(struct dp_stats_percpu, n_frags);
287                         goto out;
288                 }
289
290                 /* Look up flow. */
291                 flow_node = tbl_lookup(rcu_dereference(dp->table), &key, key_len,
292                                        flow_hash(&key, key_len), flow_cmp);
293                 if (unlikely(!flow_node)) {
294                         struct dp_upcall_info upcall;
295
296                         upcall.cmd = OVS_PACKET_CMD_MISS;
297                         upcall.key = &key;
298                         upcall.userdata = 0;
299                         upcall.sample_pool = 0;
300                         upcall.actions = NULL;
301                         upcall.actions_len = 0;
302                         dp_upcall(dp, skb, &upcall);
303                         stats_counter_off = offsetof(struct dp_stats_percpu, n_missed);
304                         goto out;
305                 }
306
307                 OVS_CB(skb)->flow = flow_cast(flow_node);
308         }
309
310         stats_counter_off = offsetof(struct dp_stats_percpu, n_hit);
311         flow_used(OVS_CB(skb)->flow, skb);
312         execute_actions(dp, skb);
313
314 out:
315         /* Update datapath statistics. */
316         local_bh_disable();
317         stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
318
319         write_seqcount_begin(&stats->seqlock);
320         (*(u64 *)((u8 *)stats + stats_counter_off))++;
321         write_seqcount_end(&stats->seqlock);
322
323         local_bh_enable();
324 }
325
326 static void copy_and_csum_skb(struct sk_buff *skb, void *to)
327 {
328         u16 csum_start, csum_offset;
329         __wsum csum;
330
331         get_skb_csum_pointers(skb, &csum_start, &csum_offset);
332         csum_start -= skb_headroom(skb);
333
334         skb_copy_bits(skb, 0, to, csum_start);
335
336         csum = skb_copy_and_csum_bits(skb, csum_start, to + csum_start,
337                                       skb->len - csum_start, 0);
338         *(__sum16 *)(to + csum_start + csum_offset) = csum_fold(csum);
339 }
340
341 static struct genl_family dp_packet_genl_family = {
342         .id = GENL_ID_GENERATE,
343         .hdrsize = sizeof(struct ovs_header),
344         .name = OVS_PACKET_FAMILY,
345         .version = 1,
346         .maxattr = OVS_PACKET_ATTR_MAX
347 };
348
349 /* Generic Netlink multicast groups for upcalls.
350  *
351  * We really want three unique multicast groups per datapath, but we can't even
352  * get one, because genl_register_mc_group() takes genl_lock, which is also
353  * held during Generic Netlink message processing, so trying to acquire
354  * multicast groups during OVS_DP_NEW processing deadlocks.  Instead, we
355  * preallocate a few groups and use them round-robin for datapaths.  Collision
356  * isn't fatal--multicast listeners should check that the family is the one
357  * that they want and discard others--but it wastes time and memory to receive
358  * unwanted messages.
359  */
360 #define PACKET_N_MC_GROUPS 16
361 static struct genl_multicast_group packet_mc_groups[PACKET_N_MC_GROUPS];
362
363 static u32 packet_mc_group(struct datapath *dp, u8 cmd)
364 {
365         u32 idx;
366         BUILD_BUG_ON_NOT_POWER_OF_2(PACKET_N_MC_GROUPS);
367
368         idx = jhash_2words(dp->dp_ifindex, cmd, 0) & (PACKET_N_MC_GROUPS - 1);
369         return packet_mc_groups[idx].id;
370 }
371
372 static int packet_register_mc_groups(void)
373 {
374         int i;
375
376         for (i = 0; i < PACKET_N_MC_GROUPS; i++) {
377                 struct genl_multicast_group *group = &packet_mc_groups[i];
378                 int error;
379
380                 sprintf(group->name, "packet%d", i);
381                 error = genl_register_mc_group(&dp_packet_genl_family, group);
382                 if (error)
383                         return error;
384         }
385         return 0;
386 }
387
388 int dp_upcall(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info)
389 {
390         struct dp_stats_percpu *stats;
391         int err;
392
393         WARN_ON_ONCE(skb_shared(skb));
394
395         forward_ip_summed(skb, true);
396
397         /* Break apart GSO packets into their component pieces.  Otherwise
398          * userspace may try to stuff a 64kB packet into a 1500-byte MTU. */
399         if (skb_is_gso(skb)) {
400                 struct sk_buff *nskb = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM);
401                 
402                 if (IS_ERR(nskb)) {
403                         kfree_skb(skb);
404                         err = PTR_ERR(nskb);
405                         goto err;
406                 }
407                 consume_skb(skb);
408                 skb = nskb;
409         }
410
411         err = queue_userspace_packets(dp, skb, upcall_info);
412         if (err)
413                 goto err;
414
415         return 0;
416
417 err:
418         local_bh_disable();
419         stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
420
421         write_seqcount_begin(&stats->seqlock);
422         stats->n_lost++;
423         write_seqcount_end(&stats->seqlock);
424
425         local_bh_enable();
426
427         return err;
428 }
429
430 /* Send each packet in the 'skb' list to userspace for 'dp' as directed by
431  * 'upcall_info'.  There will be only one packet unless we broke up a GSO
432  * packet.
433  */
434 static int queue_userspace_packets(struct datapath *dp, struct sk_buff *skb,
435                                  const struct dp_upcall_info *upcall_info)
436 {
437         u32 group = packet_mc_group(dp, upcall_info->cmd);
438         struct sk_buff *nskb;
439         int err;
440
441         do {
442                 struct ovs_header *upcall;
443                 struct sk_buff *user_skb; /* to be queued to userspace */
444                 struct nlattr *nla;
445                 unsigned int len;
446
447                 nskb = skb->next;
448                 skb->next = NULL;
449
450                 err = vlan_deaccel_tag(skb);
451                 if (unlikely(err))
452                         goto err_kfree_skbs;
453
454                 if (nla_attr_size(skb->len) > USHRT_MAX)
455                         goto err_kfree_skbs;
456
457                 len = sizeof(struct ovs_header);
458                 len += nla_total_size(skb->len);
459                 len += nla_total_size(FLOW_BUFSIZE);
460                 if (upcall_info->userdata)
461                         len += nla_total_size(8);
462                 if (upcall_info->sample_pool)
463                         len += nla_total_size(4);
464                 if (upcall_info->actions_len)
465                         len += nla_total_size(upcall_info->actions_len);
466
467                 user_skb = genlmsg_new(len, GFP_ATOMIC);
468                 if (!user_skb) {
469                         netlink_set_err(INIT_NET_GENL_SOCK, 0, group, -ENOBUFS);
470                         goto err_kfree_skbs;
471                 }
472
473                 upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family, 0, upcall_info->cmd);
474                 upcall->dp_ifindex = dp->dp_ifindex;
475
476                 nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY);
477                 flow_to_nlattrs(upcall_info->key, user_skb);
478                 nla_nest_end(user_skb, nla);
479
480                 if (upcall_info->userdata)
481                         nla_put_u64(user_skb, OVS_PACKET_ATTR_USERDATA, upcall_info->userdata);
482                 if (upcall_info->sample_pool)
483                         nla_put_u32(user_skb, OVS_PACKET_ATTR_SAMPLE_POOL, upcall_info->sample_pool);
484                 if (upcall_info->actions_len) {
485                         const struct nlattr *actions = upcall_info->actions;
486                         u32 actions_len = upcall_info->actions_len;
487
488                         nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_ACTIONS);
489                         memcpy(__skb_put(user_skb, actions_len), actions, actions_len);
490                         nla_nest_end(user_skb, nla);
491                 }
492
493                 nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len);
494                 if (skb->ip_summed == CHECKSUM_PARTIAL)
495                         copy_and_csum_skb(skb, nla_data(nla));
496                 else
497                         skb_copy_bits(skb, 0, nla_data(nla), skb->len);
498
499                 err = genlmsg_multicast(user_skb, 0, group, GFP_ATOMIC);
500                 if (err)
501                         goto err_kfree_skbs;
502
503                 consume_skb(skb);
504                 skb = nskb;
505         } while (skb);
506         return 0;
507
508 err_kfree_skbs:
509         kfree_skb(skb);
510         while ((skb = nskb) != NULL) {
511                 nskb = skb->next;
512                 kfree_skb(skb);
513         }
514         return err;
515 }
516
517 /* Called with genl_mutex. */
518 static int flush_flows(int dp_ifindex)
519 {
520         struct tbl *old_table;
521         struct tbl *new_table;
522         struct datapath *dp;
523
524         dp = get_dp(dp_ifindex);
525         if (!dp)
526                 return -ENODEV;
527
528         old_table = get_table_protected(dp);
529         new_table = tbl_create(TBL_MIN_BUCKETS);
530         if (!new_table)
531                 return -ENOMEM;
532
533         rcu_assign_pointer(dp->table, new_table);
534
535         tbl_deferred_destroy(old_table, flow_free_tbl);
536
537         return 0;
538 }
539
540 static int validate_actions(const struct nlattr *attr)
541 {
542         const struct nlattr *a;
543         int rem;
544
545         nla_for_each_nested(a, attr, rem) {
546                 static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
547                         [OVS_ACTION_ATTR_OUTPUT] = 4,
548                         [OVS_ACTION_ATTR_USERSPACE] = 8,
549                         [OVS_ACTION_ATTR_SET_DL_TCI] = 2,
550                         [OVS_ACTION_ATTR_STRIP_VLAN] = 0,
551                         [OVS_ACTION_ATTR_SET_DL_SRC] = ETH_ALEN,
552                         [OVS_ACTION_ATTR_SET_DL_DST] = ETH_ALEN,
553                         [OVS_ACTION_ATTR_SET_NW_SRC] = 4,
554                         [OVS_ACTION_ATTR_SET_NW_DST] = 4,
555                         [OVS_ACTION_ATTR_SET_NW_TOS] = 1,
556                         [OVS_ACTION_ATTR_SET_TP_SRC] = 2,
557                         [OVS_ACTION_ATTR_SET_TP_DST] = 2,
558                         [OVS_ACTION_ATTR_SET_TUNNEL] = 8,
559                         [OVS_ACTION_ATTR_SET_PRIORITY] = 4,
560                         [OVS_ACTION_ATTR_POP_PRIORITY] = 0,
561                 };
562                 int type = nla_type(a);
563
564                 if (type > OVS_ACTION_ATTR_MAX || nla_len(a) != action_lens[type])
565                         return -EINVAL;
566
567                 switch (type) {
568                 case OVS_ACTION_ATTR_UNSPEC:
569                         return -EINVAL;
570
571                 case OVS_ACTION_ATTR_USERSPACE:
572                 case OVS_ACTION_ATTR_STRIP_VLAN:
573                 case OVS_ACTION_ATTR_SET_DL_SRC:
574                 case OVS_ACTION_ATTR_SET_DL_DST:
575                 case OVS_ACTION_ATTR_SET_NW_SRC:
576                 case OVS_ACTION_ATTR_SET_NW_DST:
577                 case OVS_ACTION_ATTR_SET_TP_SRC:
578                 case OVS_ACTION_ATTR_SET_TP_DST:
579                 case OVS_ACTION_ATTR_SET_TUNNEL:
580                 case OVS_ACTION_ATTR_SET_PRIORITY:
581                 case OVS_ACTION_ATTR_POP_PRIORITY:
582                         /* No validation needed. */
583                         break;
584
585                 case OVS_ACTION_ATTR_OUTPUT:
586                         if (nla_get_u32(a) >= DP_MAX_PORTS)
587                                 return -EINVAL;
588                         break;
589
590                 case OVS_ACTION_ATTR_SET_DL_TCI:
591                         if (nla_get_be16(a) & htons(VLAN_CFI_MASK))
592                                 return -EINVAL;
593                         break;
594
595                 case OVS_ACTION_ATTR_SET_NW_TOS:
596                         if (nla_get_u8(a) & INET_ECN_MASK)
597                                 return -EINVAL;
598                         break;
599
600                 default:
601                         return -EOPNOTSUPP;
602                 }
603         }
604
605         if (rem > 0)
606                 return -EINVAL;
607
608         return 0;
609 }
610 static void clear_stats(struct sw_flow *flow)
611 {
612         flow->used = 0;
613         flow->tcp_flags = 0;
614         flow->packet_count = 0;
615         flow->byte_count = 0;
616 }
617
618 /* Called with genl_mutex. */
619 static int expand_table(struct datapath *dp)
620 {
621         struct tbl *old_table = get_table_protected(dp);
622         struct tbl *new_table;
623
624         new_table = tbl_expand(old_table);
625         if (IS_ERR(new_table)) {
626                 if (PTR_ERR(new_table) != -ENOSPC)
627                         return PTR_ERR(new_table);
628         } else {
629                 rcu_assign_pointer(dp->table, new_table);
630                 tbl_deferred_destroy(old_table, NULL);
631         }
632
633         return 0;
634 }
635
636 static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
637 {
638         struct ovs_header *ovs_header = info->userhdr;
639         struct nlattr **a = info->attrs;
640         struct sw_flow_actions *acts;
641         struct sk_buff *packet;
642         struct sw_flow *flow;
643         struct datapath *dp;
644         struct ethhdr *eth;
645         bool is_frag;
646         int len;
647         int err;
648         int key_len;
649
650         err = -EINVAL;
651         if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
652             !a[OVS_PACKET_ATTR_ACTIONS] ||
653             nla_len(a[OVS_PACKET_ATTR_PACKET]) < ETH_HLEN)
654                 goto err;
655
656         err = validate_actions(a[OVS_PACKET_ATTR_ACTIONS]);
657         if (err)
658                 goto err;
659
660         len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
661         packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
662         err = -ENOMEM;
663         if (!packet)
664                 goto err;
665         skb_reserve(packet, NET_IP_ALIGN);
666
667         memcpy(__skb_put(packet, len), nla_data(a[OVS_PACKET_ATTR_PACKET]), len);
668
669         skb_reset_mac_header(packet);
670         eth = eth_hdr(packet);
671
672         /* Normally, setting the skb 'protocol' field would be handled by a
673          * call to eth_type_trans(), but it assumes there's a sending
674          * device, which we may not have. */
675         if (ntohs(eth->h_proto) >= 1536)
676                 packet->protocol = eth->h_proto;
677         else
678                 packet->protocol = htons(ETH_P_802_2);
679
680         /* Build an sw_flow for sending this packet. */
681         flow = flow_alloc();
682         err = PTR_ERR(flow);
683         if (IS_ERR(flow))
684                 goto err_kfree_skb;
685
686         err = flow_extract(packet, -1, &flow->key, &key_len, &is_frag);
687         if (err)
688                 goto err_flow_put;
689         flow->tbl_node.hash = flow_hash(&flow->key, key_len);
690
691         err = flow_metadata_from_nlattrs(&flow->key.eth.in_port,
692                                          &flow->key.eth.tun_id,
693                                          a[OVS_PACKET_ATTR_KEY]);
694         if (err)
695                 goto err_flow_put;
696
697         acts = flow_actions_alloc(a[OVS_PACKET_ATTR_ACTIONS]);
698         err = PTR_ERR(acts);
699         if (IS_ERR(acts))
700                 goto err_flow_put;
701         rcu_assign_pointer(flow->sf_acts, acts);
702
703         OVS_CB(packet)->flow = flow;
704
705         rcu_read_lock();
706         dp = get_dp(ovs_header->dp_ifindex);
707         err = -ENODEV;
708         if (!dp)
709                 goto err_unlock;
710         err = execute_actions(dp, packet);
711         rcu_read_unlock();
712
713         flow_put(flow);
714         return err;
715
716 err_unlock:
717         rcu_read_unlock();
718 err_flow_put:
719         flow_put(flow);
720 err_kfree_skb:
721         kfree_skb(packet);
722 err:
723         return err;
724 }
725
726 static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
727         [OVS_PACKET_ATTR_PACKET] = { .type = NLA_UNSPEC },
728         [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
729         [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
730 };
731
732 static struct genl_ops dp_packet_genl_ops[] = {
733         { .cmd = OVS_PACKET_CMD_EXECUTE,
734           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
735           .policy = packet_policy,
736           .doit = ovs_packet_cmd_execute
737         }
738 };
739
740 static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats)
741 {
742         int i;
743         struct tbl *table = get_table_protected(dp);
744
745         stats->n_flows = tbl_count(table);
746
747         stats->n_frags = stats->n_hit = stats->n_missed = stats->n_lost = 0;
748         for_each_possible_cpu(i) {
749                 const struct dp_stats_percpu *percpu_stats;
750                 struct dp_stats_percpu local_stats;
751                 unsigned seqcount;
752
753                 percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
754
755                 do {
756                         seqcount = read_seqcount_begin(&percpu_stats->seqlock);
757                         local_stats = *percpu_stats;
758                 } while (read_seqcount_retry(&percpu_stats->seqlock, seqcount));
759
760                 stats->n_frags += local_stats.n_frags;
761                 stats->n_hit += local_stats.n_hit;
762                 stats->n_missed += local_stats.n_missed;
763                 stats->n_lost += local_stats.n_lost;
764         }
765 }
766
767 /* MTU of the dp pseudo-device: ETH_DATA_LEN or the minimum of the ports.
768  * Called with RTNL lock.
769  */
770 int dp_min_mtu(const struct datapath *dp)
771 {
772         struct vport *p;
773         int mtu = 0;
774
775         ASSERT_RTNL();
776
777         list_for_each_entry (p, &dp->port_list, node) {
778                 int dev_mtu;
779
780                 /* Skip any internal ports, since that's what we're trying to
781                  * set. */
782                 if (is_internal_vport(p))
783                         continue;
784
785                 dev_mtu = vport_get_mtu(p);
786                 if (!dev_mtu)
787                         continue;
788                 if (!mtu || dev_mtu < mtu)
789                         mtu = dev_mtu;
790         }
791
792         return mtu ? mtu : ETH_DATA_LEN;
793 }
794
795 /* Sets the MTU of all datapath devices to the minimum of the ports
796  * Called with RTNL lock.
797  */
798 void set_internal_devs_mtu(const struct datapath *dp)
799 {
800         struct vport *p;
801         int mtu;
802
803         ASSERT_RTNL();
804
805         mtu = dp_min_mtu(dp);
806
807         list_for_each_entry (p, &dp->port_list, node) {
808                 if (is_internal_vport(p))
809                         vport_set_mtu(p, mtu);
810         }
811 }
812
813 static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
814         [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
815         [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
816         [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
817 };
818
819 static struct genl_family dp_flow_genl_family = {
820         .id = GENL_ID_GENERATE,
821         .hdrsize = sizeof(struct ovs_header),
822         .name = OVS_FLOW_FAMILY,
823         .version = 1,
824         .maxattr = OVS_FLOW_ATTR_MAX
825 };
826
827 static struct genl_multicast_group dp_flow_multicast_group = {
828         .name = OVS_FLOW_MCGROUP
829 };
830
831 /* Called with genl_lock. */
832 static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
833                                   struct sk_buff *skb, u32 pid, u32 seq, u32 flags, u8 cmd)
834 {
835         const int skb_orig_len = skb->len;
836         const struct sw_flow_actions *sf_acts;
837         struct ovs_flow_stats stats;
838         struct ovs_header *ovs_header;
839         struct nlattr *nla;
840         unsigned long used;
841         u8 tcp_flags;
842         int err;
843
844         sf_acts = rcu_dereference_protected(flow->sf_acts,
845                                             lockdep_genl_is_held());
846
847         ovs_header = genlmsg_put(skb, pid, seq, &dp_flow_genl_family, flags, cmd);
848         if (!ovs_header)
849                 return -EMSGSIZE;
850
851         ovs_header->dp_ifindex = dp->dp_ifindex;
852
853         nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY);
854         if (!nla)
855                 goto nla_put_failure;
856         err = flow_to_nlattrs(&flow->key, skb);
857         if (err)
858                 goto error;
859         nla_nest_end(skb, nla);
860
861         spin_lock_bh(&flow->lock);
862         used = flow->used;
863         stats.n_packets = flow->packet_count;
864         stats.n_bytes = flow->byte_count;
865         tcp_flags = flow->tcp_flags;
866         spin_unlock_bh(&flow->lock);
867
868         if (used)
869                 NLA_PUT_U64(skb, OVS_FLOW_ATTR_USED, flow_used_time(used));
870
871         if (stats.n_packets)
872                 NLA_PUT(skb, OVS_FLOW_ATTR_STATS, sizeof(struct ovs_flow_stats), &stats);
873
874         if (tcp_flags)
875                 NLA_PUT_U8(skb, OVS_FLOW_ATTR_TCP_FLAGS, tcp_flags);
876
877         /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
878          * this is the first flow to be dumped into 'skb'.  This is unusual for
879          * Netlink but individual action lists can be longer than
880          * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
881          * The userspace caller can always fetch the actions separately if it
882          * really wants them.  (Most userspace callers in fact don't care.)
883          *
884          * This can only fail for dump operations because the skb is always
885          * properly sized for single flows.
886          */
887         err = nla_put(skb, OVS_FLOW_ATTR_ACTIONS, sf_acts->actions_len,
888                       sf_acts->actions);
889         if (err < 0 && skb_orig_len)
890                 goto error;
891
892         return genlmsg_end(skb, ovs_header);
893
894 nla_put_failure:
895         err = -EMSGSIZE;
896 error:
897         genlmsg_cancel(skb, ovs_header);
898         return err;
899 }
900
901 static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow)
902 {
903         const struct sw_flow_actions *sf_acts;
904         int len;
905
906         sf_acts = rcu_dereference_protected(flow->sf_acts,
907                                             lockdep_genl_is_held());
908
909         len = nla_total_size(FLOW_BUFSIZE); /* OVS_FLOW_ATTR_KEY */
910         len += nla_total_size(sf_acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */
911         len += nla_total_size(sizeof(struct ovs_flow_stats)); /* OVS_FLOW_ATTR_STATS */
912         len += nla_total_size(1); /* OVS_FLOW_ATTR_TCP_FLAGS */
913         len += nla_total_size(8); /* OVS_FLOW_ATTR_USED */
914         return genlmsg_new(NLMSG_ALIGN(sizeof(struct ovs_header)) + len, GFP_KERNEL);
915 }
916
917 static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow, struct datapath *dp,
918                                                u32 pid, u32 seq, u8 cmd)
919 {
920         struct sk_buff *skb;
921         int retval;
922
923         skb = ovs_flow_cmd_alloc_info(flow);
924         if (!skb)
925                 return ERR_PTR(-ENOMEM);
926
927         retval = ovs_flow_cmd_fill_info(flow, dp, skb, pid, seq, 0, cmd);
928         BUG_ON(retval < 0);
929         return skb;
930 }
931
932 static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
933 {
934         struct nlattr **a = info->attrs;
935         struct ovs_header *ovs_header = info->userhdr;
936         struct tbl_node *flow_node;
937         struct sw_flow_key key;
938         struct sw_flow *flow;
939         struct sk_buff *reply;
940         struct datapath *dp;
941         struct tbl *table;
942         u32 hash;
943         int error;
944         int key_len;
945
946         /* Extract key. */
947         error = -EINVAL;
948         if (!a[OVS_FLOW_ATTR_KEY])
949                 goto error;
950         error = flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
951         if (error)
952                 goto error;
953
954         /* Validate actions. */
955         if (a[OVS_FLOW_ATTR_ACTIONS]) {
956                 error = validate_actions(a[OVS_FLOW_ATTR_ACTIONS]);
957                 if (error)
958                         goto error;
959         } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) {
960                 error = -EINVAL;
961                 goto error;
962         }
963
964         dp = get_dp(ovs_header->dp_ifindex);
965         error = -ENODEV;
966         if (!dp)
967                 goto error;
968
969         hash = flow_hash(&key, key_len);
970         table = get_table_protected(dp);
971         flow_node = tbl_lookup(table, &key, key_len, hash, flow_cmp);
972         if (!flow_node) {
973                 struct sw_flow_actions *acts;
974
975                 /* Bail out if we're not allowed to create a new flow. */
976                 error = -ENOENT;
977                 if (info->genlhdr->cmd == OVS_FLOW_CMD_SET)
978                         goto error;
979
980                 /* Expand table, if necessary, to make room. */
981                 if (tbl_count(table) >= tbl_n_buckets(table)) {
982                         error = expand_table(dp);
983                         if (error)
984                                 goto error;
985                         table = get_table_protected(dp);
986                 }
987
988                 /* Allocate flow. */
989                 flow = flow_alloc();
990                 if (IS_ERR(flow)) {
991                         error = PTR_ERR(flow);
992                         goto error;
993                 }
994                 flow->key = key;
995                 clear_stats(flow);
996
997                 /* Obtain actions. */
998                 acts = flow_actions_alloc(a[OVS_FLOW_ATTR_ACTIONS]);
999                 error = PTR_ERR(acts);
1000                 if (IS_ERR(acts))
1001                         goto error_free_flow;
1002                 rcu_assign_pointer(flow->sf_acts, acts);
1003
1004                 /* Put flow in bucket. */
1005                 error = tbl_insert(table, &flow->tbl_node, hash);
1006                 if (error)
1007                         goto error_free_flow;
1008
1009                 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid,
1010                                                 info->snd_seq, OVS_FLOW_CMD_NEW);
1011         } else {
1012                 /* We found a matching flow. */
1013                 struct sw_flow_actions *old_acts;
1014
1015                 /* Bail out if we're not allowed to modify an existing flow.
1016                  * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
1017                  * because Generic Netlink treats the latter as a dump
1018                  * request.  We also accept NLM_F_EXCL in case that bug ever
1019                  * gets fixed.
1020                  */
1021                 error = -EEXIST;
1022                 if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW &&
1023                     info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
1024                         goto error;
1025
1026                 /* Update actions. */
1027                 flow = flow_cast(flow_node);
1028                 old_acts = rcu_dereference_protected(flow->sf_acts,
1029                                                      lockdep_genl_is_held());
1030                 if (a[OVS_FLOW_ATTR_ACTIONS] &&
1031                     (old_acts->actions_len != nla_len(a[OVS_FLOW_ATTR_ACTIONS]) ||
1032                      memcmp(old_acts->actions, nla_data(a[OVS_FLOW_ATTR_ACTIONS]),
1033                             old_acts->actions_len))) {
1034                         struct sw_flow_actions *new_acts;
1035
1036                         new_acts = flow_actions_alloc(a[OVS_FLOW_ATTR_ACTIONS]);
1037                         error = PTR_ERR(new_acts);
1038                         if (IS_ERR(new_acts))
1039                                 goto error;
1040
1041                         rcu_assign_pointer(flow->sf_acts, new_acts);
1042                         flow_deferred_free_acts(old_acts);
1043                 }
1044
1045                 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid,
1046                                                 info->snd_seq, OVS_FLOW_CMD_NEW);
1047
1048                 /* Clear stats. */
1049                 if (a[OVS_FLOW_ATTR_CLEAR]) {
1050                         spin_lock_bh(&flow->lock);
1051                         clear_stats(flow);
1052                         spin_unlock_bh(&flow->lock);
1053                 }
1054         }
1055
1056         if (!IS_ERR(reply))
1057                 genl_notify(reply, genl_info_net(info), info->snd_pid,
1058                             dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL);
1059         else
1060                 netlink_set_err(INIT_NET_GENL_SOCK, 0,
1061                                 dp_flow_multicast_group.id, PTR_ERR(reply));
1062         return 0;
1063
1064 error_free_flow:
1065         flow_put(flow);
1066 error:
1067         return error;
1068 }
1069
1070 static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1071 {
1072         struct nlattr **a = info->attrs;
1073         struct ovs_header *ovs_header = info->userhdr;
1074         struct sw_flow_key key;
1075         struct tbl_node *flow_node;
1076         struct sk_buff *reply;
1077         struct sw_flow *flow;
1078         struct datapath *dp;
1079         struct tbl *table;
1080         int err;
1081         int key_len;
1082
1083         if (!a[OVS_FLOW_ATTR_KEY])
1084                 return -EINVAL;
1085         err = flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
1086         if (err)
1087                 return err;
1088
1089         dp = get_dp(ovs_header->dp_ifindex);
1090         if (!dp)
1091                 return -ENODEV;
1092
1093         table = get_table_protected(dp);
1094         flow_node = tbl_lookup(table, &key, key_len, flow_hash(&key, key_len),
1095                                flow_cmp);
1096         if (!flow_node)
1097                 return -ENOENT;
1098
1099         flow = flow_cast(flow_node);
1100         reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid, info->snd_seq, OVS_FLOW_CMD_NEW);
1101         if (IS_ERR(reply))
1102                 return PTR_ERR(reply);
1103
1104         return genlmsg_reply(reply, info);
1105 }
1106
1107 static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1108 {
1109         struct nlattr **a = info->attrs;
1110         struct ovs_header *ovs_header = info->userhdr;
1111         struct sw_flow_key key;
1112         struct tbl_node *flow_node;
1113         struct sk_buff *reply;
1114         struct sw_flow *flow;
1115         struct datapath *dp;
1116         struct tbl *table;
1117         int err;
1118         int key_len;
1119
1120         if (!a[OVS_FLOW_ATTR_KEY])
1121                 return flush_flows(ovs_header->dp_ifindex);
1122         err = flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
1123         if (err)
1124                 return err;
1125
1126         dp = get_dp(ovs_header->dp_ifindex);
1127         if (!dp)
1128                 return -ENODEV;
1129
1130         table = get_table_protected(dp);
1131         flow_node = tbl_lookup(table, &key, key_len, flow_hash(&key, key_len),
1132                                flow_cmp);
1133         if (!flow_node)
1134                 return -ENOENT;
1135         flow = flow_cast(flow_node);
1136
1137         reply = ovs_flow_cmd_alloc_info(flow);
1138         if (!reply)
1139                 return -ENOMEM;
1140
1141         err = tbl_remove(table, flow_node);
1142         if (err) {
1143                 kfree_skb(reply);
1144                 return err;
1145         }
1146
1147         err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_pid,
1148                                      info->snd_seq, 0, OVS_FLOW_CMD_DEL);
1149         BUG_ON(err < 0);
1150
1151         flow_deferred_free(flow);
1152
1153         genl_notify(reply, genl_info_net(info), info->snd_pid,
1154                     dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL);
1155         return 0;
1156 }
1157
1158 static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1159 {
1160         struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
1161         struct datapath *dp;
1162
1163         dp = get_dp(ovs_header->dp_ifindex);
1164         if (!dp)
1165                 return -ENODEV;
1166
1167         for (;;) {
1168                 struct tbl_node *flow_node;
1169                 struct sw_flow *flow;
1170                 u32 bucket, obj;
1171
1172                 bucket = cb->args[0];
1173                 obj = cb->args[1];
1174                 flow_node = tbl_next(get_table_protected(dp), &bucket, &obj);
1175                 if (!flow_node)
1176                         break;
1177
1178                 flow = flow_cast(flow_node);
1179                 if (ovs_flow_cmd_fill_info(flow, dp, skb, NETLINK_CB(cb->skb).pid,
1180                                            cb->nlh->nlmsg_seq, NLM_F_MULTI,
1181                                            OVS_FLOW_CMD_NEW) < 0)
1182                         break;
1183
1184                 cb->args[0] = bucket;
1185                 cb->args[1] = obj;
1186         }
1187         return skb->len;
1188 }
1189
1190 static struct genl_ops dp_flow_genl_ops[] = {
1191         { .cmd = OVS_FLOW_CMD_NEW,
1192           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1193           .policy = flow_policy,
1194           .doit = ovs_flow_cmd_new_or_set
1195         },
1196         { .cmd = OVS_FLOW_CMD_DEL,
1197           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1198           .policy = flow_policy,
1199           .doit = ovs_flow_cmd_del
1200         },
1201         { .cmd = OVS_FLOW_CMD_GET,
1202           .flags = 0,               /* OK for unprivileged users. */
1203           .policy = flow_policy,
1204           .doit = ovs_flow_cmd_get,
1205           .dumpit = ovs_flow_cmd_dump
1206         },
1207         { .cmd = OVS_FLOW_CMD_SET,
1208           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1209           .policy = flow_policy,
1210           .doit = ovs_flow_cmd_new_or_set,
1211         },
1212 };
1213
1214 static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
1215 #ifdef HAVE_NLA_NUL_STRING
1216         [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1217 #endif
1218         [OVS_DP_ATTR_IPV4_FRAGS] = { .type = NLA_U32 },
1219         [OVS_DP_ATTR_SAMPLING] = { .type = NLA_U32 },
1220 };
1221
1222 static struct genl_family dp_datapath_genl_family = {
1223         .id = GENL_ID_GENERATE,
1224         .hdrsize = sizeof(struct ovs_header),
1225         .name = OVS_DATAPATH_FAMILY,
1226         .version = 1,
1227         .maxattr = OVS_DP_ATTR_MAX
1228 };
1229
1230 static struct genl_multicast_group dp_datapath_multicast_group = {
1231         .name = OVS_DATAPATH_MCGROUP
1232 };
1233
1234 static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
1235                                 u32 pid, u32 seq, u32 flags, u8 cmd)
1236 {
1237         struct ovs_header *ovs_header;
1238         struct nlattr *nla;
1239         int err;
1240
1241         ovs_header = genlmsg_put(skb, pid, seq, &dp_datapath_genl_family,
1242                                    flags, cmd);
1243         if (!ovs_header)
1244                 goto error;
1245
1246         ovs_header->dp_ifindex = dp->dp_ifindex;
1247
1248         rcu_read_lock();
1249         err = nla_put_string(skb, OVS_DP_ATTR_NAME, dp_name(dp));
1250         rcu_read_unlock();
1251         if (err)
1252                 goto nla_put_failure;
1253
1254         nla = nla_reserve(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats));
1255         if (!nla)
1256                 goto nla_put_failure;
1257         get_dp_stats(dp, nla_data(nla));
1258
1259         NLA_PUT_U32(skb, OVS_DP_ATTR_IPV4_FRAGS,
1260                     dp->drop_frags ? OVS_DP_FRAG_DROP : OVS_DP_FRAG_ZERO);
1261
1262         if (dp->sflow_probability)
1263                 NLA_PUT_U32(skb, OVS_DP_ATTR_SAMPLING, dp->sflow_probability);
1264
1265         nla = nla_nest_start(skb, OVS_DP_ATTR_MCGROUPS);
1266         if (!nla)
1267                 goto nla_put_failure;
1268         NLA_PUT_U32(skb, OVS_PACKET_CMD_MISS, packet_mc_group(dp, OVS_PACKET_CMD_MISS));
1269         NLA_PUT_U32(skb, OVS_PACKET_CMD_ACTION, packet_mc_group(dp, OVS_PACKET_CMD_ACTION));
1270         NLA_PUT_U32(skb, OVS_PACKET_CMD_SAMPLE, packet_mc_group(dp, OVS_PACKET_CMD_SAMPLE));
1271         nla_nest_end(skb, nla);
1272
1273         return genlmsg_end(skb, ovs_header);
1274
1275 nla_put_failure:
1276         genlmsg_cancel(skb, ovs_header);
1277 error:
1278         return -EMSGSIZE;
1279 }
1280
1281 static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 pid,
1282                                              u32 seq, u8 cmd)
1283 {
1284         struct sk_buff *skb;
1285         int retval;
1286
1287         skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1288         if (!skb)
1289                 return ERR_PTR(-ENOMEM);
1290
1291         retval = ovs_dp_cmd_fill_info(dp, skb, pid, seq, 0, cmd);
1292         if (retval < 0) {
1293                 kfree_skb(skb);
1294                 return ERR_PTR(retval);
1295         }
1296         return skb;
1297 }
1298
1299 static int ovs_dp_cmd_validate(struct nlattr *a[OVS_DP_ATTR_MAX + 1])
1300 {
1301         if (a[OVS_DP_ATTR_IPV4_FRAGS]) {
1302                 u32 frags = nla_get_u32(a[OVS_DP_ATTR_IPV4_FRAGS]);
1303
1304                 if (frags != OVS_DP_FRAG_ZERO && frags != OVS_DP_FRAG_DROP)
1305                         return -EINVAL;
1306         }
1307
1308         return CHECK_NUL_STRING(a[OVS_DP_ATTR_NAME], IFNAMSIZ - 1);
1309 }
1310
1311 /* Called with genl_mutex and optionally with RTNL lock also. */
1312 static struct datapath *lookup_datapath(struct ovs_header *ovs_header, struct nlattr *a[OVS_DP_ATTR_MAX + 1])
1313 {
1314         struct datapath *dp;
1315
1316         if (!a[OVS_DP_ATTR_NAME])
1317                 dp = get_dp(ovs_header->dp_ifindex);
1318         else {
1319                 struct vport *vport;
1320
1321                 rcu_read_lock();
1322                 vport = vport_locate(nla_data(a[OVS_DP_ATTR_NAME]));
1323                 dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
1324                 rcu_read_unlock();
1325         }
1326         return dp ? dp : ERR_PTR(-ENODEV);
1327 }
1328
1329 /* Called with genl_mutex. */
1330 static void change_datapath(struct datapath *dp, struct nlattr *a[OVS_DP_ATTR_MAX + 1])
1331 {
1332         if (a[OVS_DP_ATTR_IPV4_FRAGS])
1333                 dp->drop_frags = nla_get_u32(a[OVS_DP_ATTR_IPV4_FRAGS]) == OVS_DP_FRAG_DROP;
1334         if (a[OVS_DP_ATTR_SAMPLING])
1335                 dp->sflow_probability = nla_get_u32(a[OVS_DP_ATTR_SAMPLING]);
1336 }
1337
1338 static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1339 {
1340         struct nlattr **a = info->attrs;
1341         struct vport_parms parms;
1342         struct sk_buff *reply;
1343         struct datapath *dp;
1344         struct vport *vport;
1345         int err;
1346
1347         err = -EINVAL;
1348         if (!a[OVS_DP_ATTR_NAME])
1349                 goto err;
1350
1351         err = ovs_dp_cmd_validate(a);
1352         if (err)
1353                 goto err;
1354
1355         rtnl_lock();
1356         err = -ENODEV;
1357         if (!try_module_get(THIS_MODULE))
1358                 goto err_unlock_rtnl;
1359
1360         err = -ENOMEM;
1361         dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1362         if (dp == NULL)
1363                 goto err_put_module;
1364         INIT_LIST_HEAD(&dp->port_list);
1365
1366         /* Initialize kobject for bridge.  This will be added as
1367          * /sys/class/net/<devname>/brif later, if sysfs is enabled. */
1368         dp->ifobj.kset = NULL;
1369         kobject_init(&dp->ifobj, &dp_ktype);
1370
1371         /* Allocate table. */
1372         err = -ENOMEM;
1373         rcu_assign_pointer(dp->table, tbl_create(TBL_MIN_BUCKETS));
1374         if (!dp->table)
1375                 goto err_free_dp;
1376
1377         /* Set up our datapath device. */
1378         parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
1379         parms.type = OVS_VPORT_TYPE_INTERNAL;
1380         parms.options = NULL;
1381         parms.dp = dp;
1382         parms.port_no = OVSP_LOCAL;
1383         vport = new_vport(&parms);
1384         if (IS_ERR(vport)) {
1385                 err = PTR_ERR(vport);
1386                 if (err == -EBUSY)
1387                         err = -EEXIST;
1388
1389                 goto err_destroy_table;
1390         }
1391         dp->dp_ifindex = vport_get_ifindex(vport);
1392
1393         dp->drop_frags = 0;
1394         dp->stats_percpu = alloc_percpu(struct dp_stats_percpu);
1395         if (!dp->stats_percpu) {
1396                 err = -ENOMEM;
1397                 goto err_destroy_local_port;
1398         }
1399
1400         change_datapath(dp, a);
1401
1402         reply = ovs_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, OVS_DP_CMD_NEW);
1403         err = PTR_ERR(reply);
1404         if (IS_ERR(reply))
1405                 goto err_destroy_local_port;
1406
1407         list_add_tail(&dp->list_node, &dps);
1408         dp_sysfs_add_dp(dp);
1409
1410         rtnl_unlock();
1411
1412         genl_notify(reply, genl_info_net(info), info->snd_pid,
1413                     dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL);
1414         return 0;
1415
1416 err_destroy_local_port:
1417         dp_detach_port(get_vport_protected(dp, OVSP_LOCAL));
1418 err_destroy_table:
1419         tbl_destroy(get_table_protected(dp), NULL);
1420 err_free_dp:
1421         kfree(dp);
1422 err_put_module:
1423         module_put(THIS_MODULE);
1424 err_unlock_rtnl:
1425         rtnl_unlock();
1426 err:
1427         return err;
1428 }
1429
1430 static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1431 {
1432         struct vport *vport, *next_vport;
1433         struct sk_buff *reply;
1434         struct datapath *dp;
1435         int err;
1436
1437         err = ovs_dp_cmd_validate(info->attrs);
1438         if (err)
1439                 goto exit;
1440
1441         rtnl_lock();
1442         dp = lookup_datapath(info->userhdr, info->attrs);
1443         err = PTR_ERR(dp);
1444         if (IS_ERR(dp))
1445                 goto exit_unlock;
1446
1447         reply = ovs_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, OVS_DP_CMD_DEL);
1448         err = PTR_ERR(reply);
1449         if (IS_ERR(reply))
1450                 goto exit_unlock;
1451
1452         list_for_each_entry_safe (vport, next_vport, &dp->port_list, node)
1453                 if (vport->port_no != OVSP_LOCAL)
1454                         dp_detach_port(vport);
1455
1456         dp_sysfs_del_dp(dp);
1457         list_del(&dp->list_node);
1458         dp_detach_port(get_vport_protected(dp, OVSP_LOCAL));
1459
1460         /* rtnl_unlock() will wait until all the references to devices that
1461          * are pending unregistration have been dropped.  We do it here to
1462          * ensure that any internal devices (which contain DP pointers) are
1463          * fully destroyed before freeing the datapath.
1464          */
1465         rtnl_unlock();
1466
1467         call_rcu(&dp->rcu, destroy_dp_rcu);
1468         module_put(THIS_MODULE);
1469
1470         genl_notify(reply, genl_info_net(info), info->snd_pid,
1471                     dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL);
1472
1473         return 0;
1474
1475 exit_unlock:
1476         rtnl_unlock();
1477 exit:
1478         return err;
1479 }
1480
1481 static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1482 {
1483         struct sk_buff *reply;
1484         struct datapath *dp;
1485         int err;
1486
1487         err = ovs_dp_cmd_validate(info->attrs);
1488         if (err)
1489                 return err;
1490
1491         dp = lookup_datapath(info->userhdr, info->attrs);
1492         if (IS_ERR(dp))
1493                 return PTR_ERR(dp);
1494
1495         change_datapath(dp, info->attrs);
1496
1497         reply = ovs_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, OVS_DP_CMD_NEW);
1498         if (IS_ERR(reply)) {
1499                 err = PTR_ERR(reply);
1500                 netlink_set_err(INIT_NET_GENL_SOCK, 0,
1501                                 dp_datapath_multicast_group.id, err);
1502                 return 0;
1503         }
1504
1505         genl_notify(reply, genl_info_net(info), info->snd_pid,
1506                     dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL);
1507         return 0;
1508 }
1509
1510 static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1511 {
1512         struct sk_buff *reply;
1513         struct datapath *dp;
1514         int err;
1515
1516         err = ovs_dp_cmd_validate(info->attrs);
1517         if (err)
1518                 return err;
1519
1520         dp = lookup_datapath(info->userhdr, info->attrs);
1521         if (IS_ERR(dp))
1522                 return PTR_ERR(dp);
1523
1524         reply = ovs_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, OVS_DP_CMD_NEW);
1525         if (IS_ERR(reply))
1526                 return PTR_ERR(reply);
1527
1528         return genlmsg_reply(reply, info);
1529 }
1530
1531 static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1532 {
1533         struct datapath *dp;
1534         int skip = cb->args[0];
1535         int i = 0;
1536
1537         list_for_each_entry (dp, &dps, list_node) {
1538                 if (i < skip)
1539                         continue;
1540                 if (ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).pid,
1541                                          cb->nlh->nlmsg_seq, NLM_F_MULTI,
1542                                          OVS_DP_CMD_NEW) < 0)
1543                         break;
1544                 i++;
1545         }
1546
1547         cb->args[0] = i;
1548
1549         return skb->len;
1550 }
1551
1552 static struct genl_ops dp_datapath_genl_ops[] = {
1553         { .cmd = OVS_DP_CMD_NEW,
1554           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1555           .policy = datapath_policy,
1556           .doit = ovs_dp_cmd_new
1557         },
1558         { .cmd = OVS_DP_CMD_DEL,
1559           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1560           .policy = datapath_policy,
1561           .doit = ovs_dp_cmd_del
1562         },
1563         { .cmd = OVS_DP_CMD_GET,
1564           .flags = 0,               /* OK for unprivileged users. */
1565           .policy = datapath_policy,
1566           .doit = ovs_dp_cmd_get,
1567           .dumpit = ovs_dp_cmd_dump
1568         },
1569         { .cmd = OVS_DP_CMD_SET,
1570           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1571           .policy = datapath_policy,
1572           .doit = ovs_dp_cmd_set,
1573         },
1574 };
1575
1576 static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
1577 #ifdef HAVE_NLA_NUL_STRING
1578         [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1579         [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
1580         [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
1581         [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct rtnl_link_stats64) },
1582         [OVS_VPORT_ATTR_ADDRESS] = { .len = ETH_ALEN },
1583 #else
1584         [OVS_VPORT_ATTR_STATS] = { .minlen = sizeof(struct rtnl_link_stats64) },
1585         [OVS_VPORT_ATTR_ADDRESS] = { .minlen = ETH_ALEN },
1586 #endif
1587         [OVS_VPORT_ATTR_MTU] = { .type = NLA_U32 },
1588         [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
1589 };
1590
1591 static struct genl_family dp_vport_genl_family = {
1592         .id = GENL_ID_GENERATE,
1593         .hdrsize = sizeof(struct ovs_header),
1594         .name = OVS_VPORT_FAMILY,
1595         .version = 1,
1596         .maxattr = OVS_VPORT_ATTR_MAX
1597 };
1598
1599 struct genl_multicast_group dp_vport_multicast_group = {
1600         .name = OVS_VPORT_MCGROUP
1601 };
1602
1603 /* Called with RTNL lock or RCU read lock. */
1604 static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
1605                                    u32 pid, u32 seq, u32 flags, u8 cmd)
1606 {
1607         struct ovs_header *ovs_header;
1608         struct nlattr *nla;
1609         int ifindex;
1610         int mtu;
1611         int err;
1612
1613         ovs_header = genlmsg_put(skb, pid, seq, &dp_vport_genl_family,
1614                                  flags, cmd);
1615         if (!ovs_header)
1616                 return -EMSGSIZE;
1617
1618         ovs_header->dp_ifindex = vport->dp->dp_ifindex;
1619
1620         NLA_PUT_U32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no);
1621         NLA_PUT_U32(skb, OVS_VPORT_ATTR_TYPE, vport_get_type(vport));
1622         NLA_PUT_STRING(skb, OVS_VPORT_ATTR_NAME, vport_get_name(vport));
1623
1624         nla = nla_reserve(skb, OVS_VPORT_ATTR_STATS, sizeof(struct rtnl_link_stats64));
1625         if (!nla)
1626                 goto nla_put_failure;
1627         if (vport_get_stats(vport, nla_data(nla)))
1628                 __skb_trim(skb, skb->len - nla->nla_len);
1629
1630         NLA_PUT(skb, OVS_VPORT_ATTR_ADDRESS, ETH_ALEN, vport_get_addr(vport));
1631
1632         mtu = vport_get_mtu(vport);
1633         if (mtu)
1634                 NLA_PUT_U32(skb, OVS_VPORT_ATTR_MTU, mtu);
1635
1636         err = vport_get_options(vport, skb);
1637         if (err == -EMSGSIZE)
1638                 goto error;
1639
1640         ifindex = vport_get_ifindex(vport);
1641         if (ifindex > 0)
1642                 NLA_PUT_U32(skb, OVS_VPORT_ATTR_IFINDEX, ifindex);
1643
1644         return genlmsg_end(skb, ovs_header);
1645
1646 nla_put_failure:
1647         err = -EMSGSIZE;
1648 error:
1649         genlmsg_cancel(skb, ovs_header);
1650         return err;
1651 }
1652
1653 /* Called with RTNL lock or RCU read lock. */
1654 struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 pid,
1655                                          u32 seq, u8 cmd)
1656 {
1657         struct sk_buff *skb;
1658         int retval;
1659
1660         skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
1661         if (!skb)
1662                 return ERR_PTR(-ENOMEM);
1663
1664         retval = ovs_vport_cmd_fill_info(vport, skb, pid, seq, 0, cmd);
1665         if (retval < 0) {
1666                 kfree_skb(skb);
1667                 return ERR_PTR(retval);
1668         }
1669         return skb;
1670 }
1671
1672 static int ovs_vport_cmd_validate(struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
1673 {
1674         return CHECK_NUL_STRING(a[OVS_VPORT_ATTR_NAME], IFNAMSIZ - 1);
1675 }
1676
1677 /* Called with RTNL lock or RCU read lock. */
1678 static struct vport *lookup_vport(struct ovs_header *ovs_header,
1679                                   struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
1680 {
1681         struct datapath *dp;
1682         struct vport *vport;
1683
1684         if (a[OVS_VPORT_ATTR_NAME]) {
1685                 vport = vport_locate(nla_data(a[OVS_VPORT_ATTR_NAME]));
1686                 if (!vport)
1687                         return ERR_PTR(-ENODEV);
1688                 return vport;
1689         } else if (a[OVS_VPORT_ATTR_PORT_NO]) {
1690                 u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
1691
1692                 if (port_no >= DP_MAX_PORTS)
1693                         return ERR_PTR(-EFBIG);
1694
1695                 dp = get_dp(ovs_header->dp_ifindex);
1696                 if (!dp)
1697                         return ERR_PTR(-ENODEV);
1698
1699                 vport = get_vport_protected(dp, port_no);
1700                 if (!vport)
1701                         return ERR_PTR(-ENOENT);
1702                 return vport;
1703         } else
1704                 return ERR_PTR(-EINVAL);
1705 }
1706
1707 /* Called with RTNL lock. */
1708 static int change_vport(struct vport *vport, struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
1709 {
1710         int err = 0;
1711         if (a[OVS_VPORT_ATTR_STATS])
1712                 err = vport_set_stats(vport, nla_data(a[OVS_VPORT_ATTR_STATS]));
1713         if (!err && a[OVS_VPORT_ATTR_ADDRESS])
1714                 err = vport_set_addr(vport, nla_data(a[OVS_VPORT_ATTR_ADDRESS]));
1715         if (!err && a[OVS_VPORT_ATTR_MTU])
1716                 err = vport_set_mtu(vport, nla_get_u32(a[OVS_VPORT_ATTR_MTU]));
1717         return err;
1718 }
1719
1720 static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1721 {
1722         struct nlattr **a = info->attrs;
1723         struct ovs_header *ovs_header = info->userhdr;
1724         struct vport_parms parms;
1725         struct sk_buff *reply;
1726         struct vport *vport;
1727         struct datapath *dp;
1728         u32 port_no;
1729         int err;
1730
1731         err = -EINVAL;
1732         if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE])
1733                 goto exit;
1734
1735         err = ovs_vport_cmd_validate(a);
1736         if (err)
1737                 goto exit;
1738
1739         rtnl_lock();
1740         dp = get_dp(ovs_header->dp_ifindex);
1741         err = -ENODEV;
1742         if (!dp)
1743                 goto exit_unlock;
1744
1745         if (a[OVS_VPORT_ATTR_PORT_NO]) {
1746                 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
1747
1748                 err = -EFBIG;
1749                 if (port_no >= DP_MAX_PORTS)
1750                         goto exit_unlock;
1751
1752                 vport = get_vport_protected(dp, port_no);
1753                 err = -EBUSY;
1754                 if (vport)
1755                         goto exit_unlock;
1756         } else {
1757                 for (port_no = 1; ; port_no++) {
1758                         if (port_no >= DP_MAX_PORTS) {
1759                                 err = -EFBIG;
1760                                 goto exit_unlock;
1761                         }
1762                         vport = get_vport_protected(dp, port_no);
1763                         if (!vport)
1764                                 break;
1765                 }
1766         }
1767
1768         parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
1769         parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
1770         parms.options = a[OVS_VPORT_ATTR_OPTIONS];
1771         parms.dp = dp;
1772         parms.port_no = port_no;
1773
1774         vport = new_vport(&parms);
1775         err = PTR_ERR(vport);
1776         if (IS_ERR(vport))
1777                 goto exit_unlock;
1778
1779         set_internal_devs_mtu(dp);
1780         dp_sysfs_add_if(vport);
1781
1782         err = change_vport(vport, a);
1783         if (!err) {
1784                 reply = ovs_vport_cmd_build_info(vport, info->snd_pid,
1785                                                  info->snd_seq, OVS_VPORT_CMD_NEW);
1786                 if (IS_ERR(reply))
1787                         err = PTR_ERR(reply);
1788         }
1789         if (err) {
1790                 dp_detach_port(vport);
1791                 goto exit_unlock;
1792         }
1793         genl_notify(reply, genl_info_net(info), info->snd_pid,
1794                     dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1795
1796
1797 exit_unlock:
1798         rtnl_unlock();
1799 exit:
1800         return err;
1801 }
1802
1803 static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
1804 {
1805         struct nlattr **a = info->attrs;
1806         struct sk_buff *reply;
1807         struct vport *vport;
1808         int err;
1809
1810         err = ovs_vport_cmd_validate(a);
1811         if (err)
1812                 goto exit;
1813
1814         rtnl_lock();
1815         vport = lookup_vport(info->userhdr, a);
1816         err = PTR_ERR(vport);
1817         if (IS_ERR(vport))
1818                 goto exit_unlock;
1819
1820         err = 0;
1821         if (a[OVS_VPORT_ATTR_OPTIONS])
1822                 err = vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
1823         if (!err)
1824                 err = change_vport(vport, a);
1825
1826         reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
1827                                          OVS_VPORT_CMD_NEW);
1828         if (IS_ERR(reply)) {
1829                 err = PTR_ERR(reply);
1830                 netlink_set_err(INIT_NET_GENL_SOCK, 0,
1831                                 dp_vport_multicast_group.id, err);
1832                 return 0;
1833         }
1834
1835         genl_notify(reply, genl_info_net(info), info->snd_pid,
1836                     dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1837
1838 exit_unlock:
1839         rtnl_unlock();
1840 exit:
1841         return err;
1842 }
1843
1844 static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
1845 {
1846         struct nlattr **a = info->attrs;
1847         struct sk_buff *reply;
1848         struct vport *vport;
1849         int err;
1850
1851         err = ovs_vport_cmd_validate(a);
1852         if (err)
1853                 goto exit;
1854
1855         rtnl_lock();
1856         vport = lookup_vport(info->userhdr, a);
1857         err = PTR_ERR(vport);
1858         if (IS_ERR(vport))
1859                 goto exit_unlock;
1860
1861         if (vport->port_no == OVSP_LOCAL) {
1862                 err = -EINVAL;
1863                 goto exit_unlock;
1864         }
1865
1866         reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
1867                                          OVS_VPORT_CMD_DEL);
1868         err = PTR_ERR(reply);
1869         if (IS_ERR(reply))
1870                 goto exit_unlock;
1871
1872         err = dp_detach_port(vport);
1873
1874         genl_notify(reply, genl_info_net(info), info->snd_pid,
1875                     dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1876
1877 exit_unlock:
1878         rtnl_unlock();
1879 exit:
1880         return err;
1881 }
1882
1883 static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
1884 {
1885         struct nlattr **a = info->attrs;
1886         struct ovs_header *ovs_header = info->userhdr;
1887         struct sk_buff *reply;
1888         struct vport *vport;
1889         int err;
1890
1891         err = ovs_vport_cmd_validate(a);
1892         if (err)
1893                 goto exit;
1894
1895         rcu_read_lock();
1896         vport = lookup_vport(ovs_header, a);
1897         err = PTR_ERR(vport);
1898         if (IS_ERR(vport))
1899                 goto exit_unlock;
1900
1901         reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
1902                                          OVS_VPORT_CMD_NEW);
1903         err = PTR_ERR(reply);
1904         if (IS_ERR(reply))
1905                 goto exit_unlock;
1906
1907         rcu_read_unlock();
1908
1909         return genlmsg_reply(reply, info);
1910
1911 exit_unlock:
1912         rcu_read_unlock();
1913 exit:
1914         return err;
1915 }
1916
1917 static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1918 {
1919         struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
1920         struct datapath *dp;
1921         u32 port_no;
1922         int retval;
1923
1924         dp = get_dp(ovs_header->dp_ifindex);
1925         if (!dp)
1926                 return -ENODEV;
1927
1928         rcu_read_lock();
1929         for (port_no = cb->args[0]; port_no < DP_MAX_PORTS; port_no++) {
1930                 struct vport *vport;
1931
1932                 vport = get_vport_protected(dp, port_no);
1933                 if (!vport)
1934                         continue;
1935
1936                 if (ovs_vport_cmd_fill_info(vport, skb, NETLINK_CB(cb->skb).pid,
1937                                             cb->nlh->nlmsg_seq, NLM_F_MULTI,
1938                                             OVS_VPORT_CMD_NEW) < 0)
1939                         break;
1940         }
1941         rcu_read_unlock();
1942
1943         cb->args[0] = port_no;
1944         retval = skb->len;
1945
1946         return retval;
1947 }
1948
1949 static struct genl_ops dp_vport_genl_ops[] = {
1950         { .cmd = OVS_VPORT_CMD_NEW,
1951           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1952           .policy = vport_policy,
1953           .doit = ovs_vport_cmd_new
1954         },
1955         { .cmd = OVS_VPORT_CMD_DEL,
1956           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1957           .policy = vport_policy,
1958           .doit = ovs_vport_cmd_del
1959         },
1960         { .cmd = OVS_VPORT_CMD_GET,
1961           .flags = 0,               /* OK for unprivileged users. */
1962           .policy = vport_policy,
1963           .doit = ovs_vport_cmd_get,
1964           .dumpit = ovs_vport_cmd_dump
1965         },
1966         { .cmd = OVS_VPORT_CMD_SET,
1967           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1968           .policy = vport_policy,
1969           .doit = ovs_vport_cmd_set,
1970         },
1971 };
1972
1973 struct genl_family_and_ops {
1974         struct genl_family *family;
1975         struct genl_ops *ops;
1976         int n_ops;
1977         struct genl_multicast_group *group;
1978 };
1979
1980 static const struct genl_family_and_ops dp_genl_families[] = {
1981         { &dp_datapath_genl_family,
1982           dp_datapath_genl_ops, ARRAY_SIZE(dp_datapath_genl_ops),
1983           &dp_datapath_multicast_group },
1984         { &dp_vport_genl_family,
1985           dp_vport_genl_ops, ARRAY_SIZE(dp_vport_genl_ops),
1986           &dp_vport_multicast_group },
1987         { &dp_flow_genl_family,
1988           dp_flow_genl_ops, ARRAY_SIZE(dp_flow_genl_ops),
1989           &dp_flow_multicast_group },
1990         { &dp_packet_genl_family,
1991           dp_packet_genl_ops, ARRAY_SIZE(dp_packet_genl_ops),
1992           NULL },
1993 };
1994
1995 static void dp_unregister_genl(int n_families)
1996 {
1997         int i;
1998
1999         for (i = 0; i < n_families; i++)
2000                 genl_unregister_family(dp_genl_families[i].family);
2001 }
2002
2003 static int dp_register_genl(void)
2004 {
2005         int n_registered;
2006         int err;
2007         int i;
2008
2009         n_registered = 0;
2010         for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
2011                 const struct genl_family_and_ops *f = &dp_genl_families[i];
2012
2013                 err = genl_register_family_with_ops(f->family, f->ops,
2014                                                     f->n_ops);
2015                 if (err)
2016                         goto error;
2017                 n_registered++;
2018
2019                 if (f->group) {
2020                         err = genl_register_mc_group(f->family, f->group);
2021                         if (err)
2022                                 goto error;
2023                 }
2024         }
2025
2026         err = packet_register_mc_groups();
2027         if (err)
2028                 goto error;
2029         return 0;
2030
2031 error:
2032         dp_unregister_genl(n_registered);
2033         return err;
2034 }
2035
2036 static int __init dp_init(void)
2037 {
2038         struct sk_buff *dummy_skb;
2039         int err;
2040
2041         BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > sizeof(dummy_skb->cb));
2042
2043         printk("Open vSwitch %s, built "__DATE__" "__TIME__"\n", VERSION BUILDNR);
2044
2045         err = flow_init();
2046         if (err)
2047                 goto error;
2048
2049         err = vport_init();
2050         if (err)
2051                 goto error_flow_exit;
2052
2053         err = register_netdevice_notifier(&dp_device_notifier);
2054         if (err)
2055                 goto error_vport_exit;
2056
2057         err = dp_register_genl();
2058         if (err < 0)
2059                 goto error_unreg_notifier;
2060
2061         return 0;
2062
2063 error_unreg_notifier:
2064         unregister_netdevice_notifier(&dp_device_notifier);
2065 error_vport_exit:
2066         vport_exit();
2067 error_flow_exit:
2068         flow_exit();
2069 error:
2070         return err;
2071 }
2072
2073 static void dp_cleanup(void)
2074 {
2075         rcu_barrier();
2076         dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
2077         unregister_netdevice_notifier(&dp_device_notifier);
2078         vport_exit();
2079         flow_exit();
2080 }
2081
2082 module_init(dp_init);
2083 module_exit(dp_cleanup);
2084
2085 MODULE_DESCRIPTION("Open vSwitch switching datapath");
2086 MODULE_LICENSE("GPL");