datapath: Set vport in skb when executed from userspace.
[sliver-openvswitch.git] / datapath / datapath.c
1 /*
2  * Copyright (c) 2007, 2008, 2009, 2010, 2011 Nicira Networks.
3  * Distributed under the terms of the GNU GPL version 2.
4  *
5  * Significant portions of this file may be copied from parts of the Linux
6  * kernel, by Linus Torvalds and others.
7  */
8
9 /* Functions for managing the dp interface/device. */
10
11 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12
13 #include <linux/init.h>
14 #include <linux/module.h>
15 #include <linux/if_arp.h>
16 #include <linux/if_vlan.h>
17 #include <linux/in.h>
18 #include <linux/ip.h>
19 #include <linux/jhash.h>
20 #include <linux/delay.h>
21 #include <linux/time.h>
22 #include <linux/etherdevice.h>
23 #include <linux/genetlink.h>
24 #include <linux/kernel.h>
25 #include <linux/kthread.h>
26 #include <linux/mutex.h>
27 #include <linux/percpu.h>
28 #include <linux/rcupdate.h>
29 #include <linux/tcp.h>
30 #include <linux/udp.h>
31 #include <linux/version.h>
32 #include <linux/ethtool.h>
33 #include <linux/wait.h>
34 #include <asm/system.h>
35 #include <asm/div64.h>
36 #include <asm/bug.h>
37 #include <linux/highmem.h>
38 #include <linux/netfilter_bridge.h>
39 #include <linux/netfilter_ipv4.h>
40 #include <linux/inetdevice.h>
41 #include <linux/list.h>
42 #include <linux/rculist.h>
43 #include <linux/dmi.h>
44 #include <net/inet_ecn.h>
45 #include <net/genetlink.h>
46
47 #include "openvswitch/datapath-protocol.h"
48 #include "checksum.h"
49 #include "datapath.h"
50 #include "actions.h"
51 #include "flow.h"
52 #include "vlan.h"
53 #include "tunnel.h"
54 #include "vport-internal_dev.h"
55
56 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) || \
57     LINUX_VERSION_CODE >= KERNEL_VERSION(3,1,0)
58 #error Kernels before 2.6.18 or after 3.0 are not supported by this version of Open vSwitch.
59 #endif
60
61 int (*dp_ioctl_hook)(struct net_device *dev, struct ifreq *rq, int cmd);
62 EXPORT_SYMBOL(dp_ioctl_hook);
63
64 /**
65  * DOC: Locking:
66  *
67  * Writes to device state (add/remove datapath, port, set operations on vports,
68  * etc.) are protected by RTNL.
69  *
70  * Writes to other state (flow table modifications, set miscellaneous datapath
71  * parameters such as drop frags, etc.) are protected by genl_mutex.  The RTNL
72  * lock nests inside genl_mutex.
73  *
74  * Reads are protected by RCU.
75  *
76  * There are a few special cases (mostly stats) that have their own
77  * synchronization but they nest under all of above and don't interact with
78  * each other.
79  */
80
81 /* Global list of datapaths to enable dumping them all out.
82  * Protected by genl_mutex.
83  */
84 static LIST_HEAD(dps);
85
86 static struct vport *new_vport(const struct vport_parms *);
87 static int queue_userspace_packets(struct datapath *, struct sk_buff *,
88                                  const struct dp_upcall_info *);
89
90 /* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */
91 struct datapath *get_dp(int dp_ifindex)
92 {
93         struct datapath *dp = NULL;
94         struct net_device *dev;
95
96         rcu_read_lock();
97         dev = dev_get_by_index_rcu(&init_net, dp_ifindex);
98         if (dev) {
99                 struct vport *vport = internal_dev_get_vport(dev);
100                 if (vport)
101                         dp = vport->dp;
102         }
103         rcu_read_unlock();
104
105         return dp;
106 }
107 EXPORT_SYMBOL_GPL(get_dp);
108
109 /* Must be called with genl_mutex. */
110 static struct flow_table *get_table_protected(struct datapath *dp)
111 {
112         return rcu_dereference_protected(dp->table, lockdep_genl_is_held());
113 }
114
115 /* Must be called with rcu_read_lock or RTNL lock. */
116 static struct vport *get_vport_protected(struct datapath *dp, u16 port_no)
117 {
118         return rcu_dereference_rtnl(dp->ports[port_no]);
119 }
120
121 /* Must be called with rcu_read_lock or RTNL lock. */
122 const char *dp_name(const struct datapath *dp)
123 {
124         return vport_get_name(rcu_dereference_rtnl(dp->ports[OVSP_LOCAL]));
125 }
126
127 static inline size_t br_nlmsg_size(void)
128 {
129         return NLMSG_ALIGN(sizeof(struct ifinfomsg))
130                + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
131                + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
132                + nla_total_size(4) /* IFLA_MASTER */
133                + nla_total_size(4) /* IFLA_MTU */
134                + nla_total_size(1); /* IFLA_OPERSTATE */
135 }
136
137 /* Caller must hold RTNL lock. */
138 static int dp_fill_ifinfo(struct sk_buff *skb,
139                           const struct vport *port,
140                           int event, unsigned int flags)
141 {
142         struct datapath *dp = port->dp;
143         int ifindex = vport_get_ifindex(port);
144         struct ifinfomsg *hdr;
145         struct nlmsghdr *nlh;
146
147         if (ifindex < 0)
148                 return ifindex;
149
150         nlh = nlmsg_put(skb, 0, 0, event, sizeof(*hdr), flags);
151         if (nlh == NULL)
152                 return -EMSGSIZE;
153
154         hdr = nlmsg_data(nlh);
155         hdr->ifi_family = AF_BRIDGE;
156         hdr->__ifi_pad = 0;
157         hdr->ifi_type = ARPHRD_ETHER;
158         hdr->ifi_index = ifindex;
159         hdr->ifi_flags = vport_get_flags(port);
160         hdr->ifi_change = 0;
161
162         NLA_PUT_STRING(skb, IFLA_IFNAME, vport_get_name(port));
163         NLA_PUT_U32(skb, IFLA_MASTER,
164                 vport_get_ifindex(get_vport_protected(dp, OVSP_LOCAL)));
165         NLA_PUT_U32(skb, IFLA_MTU, vport_get_mtu(port));
166 #ifdef IFLA_OPERSTATE
167         NLA_PUT_U8(skb, IFLA_OPERSTATE,
168                    vport_is_running(port)
169                         ? vport_get_operstate(port)
170                         : IF_OPER_DOWN);
171 #endif
172
173         NLA_PUT(skb, IFLA_ADDRESS, ETH_ALEN, vport_get_addr(port));
174
175         return nlmsg_end(skb, nlh);
176
177 nla_put_failure:
178         nlmsg_cancel(skb, nlh);
179         return -EMSGSIZE;
180 }
181
182 /* Caller must hold RTNL lock. */
183 static void dp_ifinfo_notify(int event, struct vport *port)
184 {
185         struct sk_buff *skb;
186         int err = -ENOBUFS;
187
188         skb = nlmsg_new(br_nlmsg_size(), GFP_KERNEL);
189         if (skb == NULL)
190                 goto errout;
191
192         err = dp_fill_ifinfo(skb, port, event, 0);
193         if (err < 0) {
194                 /* -EMSGSIZE implies BUG in br_nlmsg_size() */
195                 WARN_ON(err == -EMSGSIZE);
196                 kfree_skb(skb);
197                 goto errout;
198         }
199         rtnl_notify(skb, &init_net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
200         return;
201 errout:
202         if (err < 0)
203                 rtnl_set_sk_err(&init_net, RTNLGRP_LINK, err);
204 }
205
206 static void release_dp(struct kobject *kobj)
207 {
208         struct datapath *dp = container_of(kobj, struct datapath, ifobj);
209         kfree(dp);
210 }
211
212 static struct kobj_type dp_ktype = {
213         .release = release_dp
214 };
215
216 static void destroy_dp_rcu(struct rcu_head *rcu)
217 {
218         struct datapath *dp = container_of(rcu, struct datapath, rcu);
219
220         flow_tbl_destroy(dp->table);
221         free_percpu(dp->stats_percpu);
222         kobject_put(&dp->ifobj);
223 }
224
225 /* Called with RTNL lock and genl_lock. */
226 static struct vport *new_vport(const struct vport_parms *parms)
227 {
228         struct vport *vport;
229
230         vport = vport_add(parms);
231         if (!IS_ERR(vport)) {
232                 struct datapath *dp = parms->dp;
233
234                 rcu_assign_pointer(dp->ports[parms->port_no], vport);
235                 list_add(&vport->node, &dp->port_list);
236
237                 dp_ifinfo_notify(RTM_NEWLINK, vport);
238         }
239
240         return vport;
241 }
242
243 /* Called with RTNL lock. */
244 void dp_detach_port(struct vport *p)
245 {
246         ASSERT_RTNL();
247
248         if (p->port_no != OVSP_LOCAL)
249                 dp_sysfs_del_if(p);
250         dp_ifinfo_notify(RTM_DELLINK, p);
251
252         /* First drop references to device. */
253         list_del(&p->node);
254         rcu_assign_pointer(p->dp->ports[p->port_no], NULL);
255
256         /* Then destroy it. */
257         vport_del(p);
258 }
259
260 /* Must be called with rcu_read_lock. */
261 void dp_process_received_packet(struct vport *p, struct sk_buff *skb)
262 {
263         struct datapath *dp = p->dp;
264         struct sw_flow *flow;
265         struct dp_stats_percpu *stats;
266         int stats_counter_off;
267         int error;
268
269         OVS_CB(skb)->vport = p;
270
271         if (!OVS_CB(skb)->flow) {
272                 struct sw_flow_key key;
273                 int key_len;
274                 bool is_frag;
275
276                 /* Extract flow from 'skb' into 'key'. */
277                 error = flow_extract(skb, p->port_no, &key, &key_len, &is_frag);
278                 if (unlikely(error)) {
279                         kfree_skb(skb);
280                         return;
281                 }
282
283                 if (is_frag && dp->drop_frags) {
284                         consume_skb(skb);
285                         stats_counter_off = offsetof(struct dp_stats_percpu, n_frags);
286                         goto out;
287                 }
288
289                 /* Look up flow. */
290                 flow = flow_tbl_lookup(rcu_dereference(dp->table), &key, key_len);
291                 if (unlikely(!flow)) {
292                         struct dp_upcall_info upcall;
293
294                         upcall.cmd = OVS_PACKET_CMD_MISS;
295                         upcall.key = &key;
296                         upcall.userdata = 0;
297                         upcall.sample_pool = 0;
298                         upcall.actions = NULL;
299                         upcall.actions_len = 0;
300                         dp_upcall(dp, skb, &upcall);
301                         stats_counter_off = offsetof(struct dp_stats_percpu, n_missed);
302                         goto out;
303                 }
304
305                 OVS_CB(skb)->flow = flow;
306         }
307
308         stats_counter_off = offsetof(struct dp_stats_percpu, n_hit);
309         flow_used(OVS_CB(skb)->flow, skb);
310         execute_actions(dp, skb);
311
312 out:
313         /* Update datapath statistics. */
314         local_bh_disable();
315         stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
316
317         write_seqcount_begin(&stats->seqlock);
318         (*(u64 *)((u8 *)stats + stats_counter_off))++;
319         write_seqcount_end(&stats->seqlock);
320
321         local_bh_enable();
322 }
323
324 static void copy_and_csum_skb(struct sk_buff *skb, void *to)
325 {
326         u16 csum_start, csum_offset;
327         __wsum csum;
328
329         get_skb_csum_pointers(skb, &csum_start, &csum_offset);
330         csum_start -= skb_headroom(skb);
331
332         skb_copy_bits(skb, 0, to, csum_start);
333
334         csum = skb_copy_and_csum_bits(skb, csum_start, to + csum_start,
335                                       skb->len - csum_start, 0);
336         *(__sum16 *)(to + csum_start + csum_offset) = csum_fold(csum);
337 }
338
339 static struct genl_family dp_packet_genl_family = {
340         .id = GENL_ID_GENERATE,
341         .hdrsize = sizeof(struct ovs_header),
342         .name = OVS_PACKET_FAMILY,
343         .version = 1,
344         .maxattr = OVS_PACKET_ATTR_MAX
345 };
346
347 /* Generic Netlink multicast groups for upcalls.
348  *
349  * We really want three unique multicast groups per datapath, but we can't even
350  * get one, because genl_register_mc_group() takes genl_lock, which is also
351  * held during Generic Netlink message processing, so trying to acquire
352  * multicast groups during OVS_DP_NEW processing deadlocks.  Instead, we
353  * preallocate a few groups and use them round-robin for datapaths.  Collision
354  * isn't fatal--multicast listeners should check that the family is the one
355  * that they want and discard others--but it wastes time and memory to receive
356  * unwanted messages.
357  */
358 #define PACKET_N_MC_GROUPS 16
359 static struct genl_multicast_group packet_mc_groups[PACKET_N_MC_GROUPS];
360
361 static u32 packet_mc_group(struct datapath *dp, u8 cmd)
362 {
363         u32 idx;
364         BUILD_BUG_ON_NOT_POWER_OF_2(PACKET_N_MC_GROUPS);
365
366         idx = jhash_2words(dp->dp_ifindex, cmd, 0) & (PACKET_N_MC_GROUPS - 1);
367         return packet_mc_groups[idx].id;
368 }
369
370 static int packet_register_mc_groups(void)
371 {
372         int i;
373
374         for (i = 0; i < PACKET_N_MC_GROUPS; i++) {
375                 struct genl_multicast_group *group = &packet_mc_groups[i];
376                 int error;
377
378                 sprintf(group->name, "packet%d", i);
379                 error = genl_register_mc_group(&dp_packet_genl_family, group);
380                 if (error)
381                         return error;
382         }
383         return 0;
384 }
385
386 int dp_upcall(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info)
387 {
388         struct dp_stats_percpu *stats;
389         int err;
390
391         WARN_ON_ONCE(skb_shared(skb));
392
393         forward_ip_summed(skb, true);
394
395         /* Break apart GSO packets into their component pieces.  Otherwise
396          * userspace may try to stuff a 64kB packet into a 1500-byte MTU. */
397         if (skb_is_gso(skb)) {
398                 struct sk_buff *nskb = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM);
399                 
400                 if (IS_ERR(nskb)) {
401                         kfree_skb(skb);
402                         err = PTR_ERR(nskb);
403                         goto err;
404                 }
405                 consume_skb(skb);
406                 skb = nskb;
407         }
408
409         err = queue_userspace_packets(dp, skb, upcall_info);
410         if (err)
411                 goto err;
412
413         return 0;
414
415 err:
416         local_bh_disable();
417         stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
418
419         write_seqcount_begin(&stats->seqlock);
420         stats->n_lost++;
421         write_seqcount_end(&stats->seqlock);
422
423         local_bh_enable();
424
425         return err;
426 }
427
428 /* Send each packet in the 'skb' list to userspace for 'dp' as directed by
429  * 'upcall_info'.  There will be only one packet unless we broke up a GSO
430  * packet.
431  */
432 static int queue_userspace_packets(struct datapath *dp, struct sk_buff *skb,
433                                  const struct dp_upcall_info *upcall_info)
434 {
435         u32 group = packet_mc_group(dp, upcall_info->cmd);
436         struct sk_buff *nskb;
437         int err;
438
439         do {
440                 struct ovs_header *upcall;
441                 struct sk_buff *user_skb; /* to be queued to userspace */
442                 struct nlattr *nla;
443                 unsigned int len;
444
445                 nskb = skb->next;
446                 skb->next = NULL;
447
448                 err = vlan_deaccel_tag(skb);
449                 if (unlikely(err))
450                         goto err_kfree_skbs;
451
452                 if (nla_attr_size(skb->len) > USHRT_MAX)
453                         goto err_kfree_skbs;
454
455                 len = sizeof(struct ovs_header);
456                 len += nla_total_size(skb->len);
457                 len += nla_total_size(FLOW_BUFSIZE);
458                 if (upcall_info->userdata)
459                         len += nla_total_size(8);
460                 if (upcall_info->sample_pool)
461                         len += nla_total_size(4);
462                 if (upcall_info->actions_len)
463                         len += nla_total_size(upcall_info->actions_len);
464
465                 user_skb = genlmsg_new(len, GFP_ATOMIC);
466                 if (!user_skb) {
467                         netlink_set_err(INIT_NET_GENL_SOCK, 0, group, -ENOBUFS);
468                         goto err_kfree_skbs;
469                 }
470
471                 upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family, 0, upcall_info->cmd);
472                 upcall->dp_ifindex = dp->dp_ifindex;
473
474                 nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY);
475                 flow_to_nlattrs(upcall_info->key, user_skb);
476                 nla_nest_end(user_skb, nla);
477
478                 if (upcall_info->userdata)
479                         nla_put_u64(user_skb, OVS_PACKET_ATTR_USERDATA, upcall_info->userdata);
480                 if (upcall_info->sample_pool)
481                         nla_put_u32(user_skb, OVS_PACKET_ATTR_SAMPLE_POOL, upcall_info->sample_pool);
482                 if (upcall_info->actions_len) {
483                         const struct nlattr *actions = upcall_info->actions;
484                         u32 actions_len = upcall_info->actions_len;
485
486                         nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_ACTIONS);
487                         memcpy(__skb_put(user_skb, actions_len), actions, actions_len);
488                         nla_nest_end(user_skb, nla);
489                 }
490
491                 nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len);
492                 if (skb->ip_summed == CHECKSUM_PARTIAL)
493                         copy_and_csum_skb(skb, nla_data(nla));
494                 else
495                         skb_copy_bits(skb, 0, nla_data(nla), skb->len);
496
497                 err = genlmsg_multicast(user_skb, 0, group, GFP_ATOMIC);
498                 if (err)
499                         goto err_kfree_skbs;
500
501                 consume_skb(skb);
502                 skb = nskb;
503         } while (skb);
504         return 0;
505
506 err_kfree_skbs:
507         kfree_skb(skb);
508         while ((skb = nskb) != NULL) {
509                 nskb = skb->next;
510                 kfree_skb(skb);
511         }
512         return err;
513 }
514
515 /* Called with genl_mutex. */
516 static int flush_flows(int dp_ifindex)
517 {
518         struct flow_table *old_table;
519         struct flow_table *new_table;
520         struct datapath *dp;
521
522         dp = get_dp(dp_ifindex);
523         if (!dp)
524                 return -ENODEV;
525
526         old_table = get_table_protected(dp);
527         new_table = flow_tbl_alloc(TBL_MIN_BUCKETS);
528         if (!new_table)
529                 return -ENOMEM;
530
531         rcu_assign_pointer(dp->table, new_table);
532
533         flow_tbl_deferred_destroy(old_table);
534         return 0;
535 }
536
537 static int validate_actions(const struct nlattr *attr)
538 {
539         const struct nlattr *a;
540         int rem;
541
542         nla_for_each_nested(a, attr, rem) {
543                 static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
544                         [OVS_ACTION_ATTR_OUTPUT] = 4,
545                         [OVS_ACTION_ATTR_USERSPACE] = 8,
546                         [OVS_ACTION_ATTR_PUSH_VLAN] = 2,
547                         [OVS_ACTION_ATTR_POP_VLAN] = 0,
548                         [OVS_ACTION_ATTR_SET_DL_SRC] = ETH_ALEN,
549                         [OVS_ACTION_ATTR_SET_DL_DST] = ETH_ALEN,
550                         [OVS_ACTION_ATTR_SET_NW_SRC] = 4,
551                         [OVS_ACTION_ATTR_SET_NW_DST] = 4,
552                         [OVS_ACTION_ATTR_SET_NW_TOS] = 1,
553                         [OVS_ACTION_ATTR_SET_TP_SRC] = 2,
554                         [OVS_ACTION_ATTR_SET_TP_DST] = 2,
555                         [OVS_ACTION_ATTR_SET_TUNNEL] = 8,
556                         [OVS_ACTION_ATTR_SET_PRIORITY] = 4,
557                         [OVS_ACTION_ATTR_POP_PRIORITY] = 0,
558                 };
559                 int type = nla_type(a);
560
561                 if (type > OVS_ACTION_ATTR_MAX || nla_len(a) != action_lens[type])
562                         return -EINVAL;
563
564                 switch (type) {
565                 case OVS_ACTION_ATTR_UNSPEC:
566                         return -EINVAL;
567
568                 case OVS_ACTION_ATTR_USERSPACE:
569                 case OVS_ACTION_ATTR_POP_VLAN:
570                 case OVS_ACTION_ATTR_SET_DL_SRC:
571                 case OVS_ACTION_ATTR_SET_DL_DST:
572                 case OVS_ACTION_ATTR_SET_NW_SRC:
573                 case OVS_ACTION_ATTR_SET_NW_DST:
574                 case OVS_ACTION_ATTR_SET_TP_SRC:
575                 case OVS_ACTION_ATTR_SET_TP_DST:
576                 case OVS_ACTION_ATTR_SET_TUNNEL:
577                 case OVS_ACTION_ATTR_SET_PRIORITY:
578                 case OVS_ACTION_ATTR_POP_PRIORITY:
579                         /* No validation needed. */
580                         break;
581
582                 case OVS_ACTION_ATTR_OUTPUT:
583                         if (nla_get_u32(a) >= DP_MAX_PORTS)
584                                 return -EINVAL;
585                         break;
586
587                 case OVS_ACTION_ATTR_PUSH_VLAN:
588                         if (nla_get_be16(a) & htons(VLAN_CFI_MASK))
589                                 return -EINVAL;
590                         break;
591
592                 case OVS_ACTION_ATTR_SET_NW_TOS:
593                         if (nla_get_u8(a) & INET_ECN_MASK)
594                                 return -EINVAL;
595                         break;
596
597                 default:
598                         return -EOPNOTSUPP;
599                 }
600         }
601
602         if (rem > 0)
603                 return -EINVAL;
604
605         return 0;
606 }
607 static void clear_stats(struct sw_flow *flow)
608 {
609         flow->used = 0;
610         flow->tcp_flags = 0;
611         flow->packet_count = 0;
612         flow->byte_count = 0;
613 }
614
615 static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
616 {
617         struct ovs_header *ovs_header = info->userhdr;
618         struct nlattr **a = info->attrs;
619         struct sw_flow_actions *acts;
620         struct sk_buff *packet;
621         struct sw_flow *flow;
622         struct datapath *dp;
623         struct ethhdr *eth;
624         bool is_frag;
625         int len;
626         int err;
627         int key_len;
628
629         err = -EINVAL;
630         if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
631             !a[OVS_PACKET_ATTR_ACTIONS] ||
632             nla_len(a[OVS_PACKET_ATTR_PACKET]) < ETH_HLEN)
633                 goto err;
634
635         err = validate_actions(a[OVS_PACKET_ATTR_ACTIONS]);
636         if (err)
637                 goto err;
638
639         len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
640         packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
641         err = -ENOMEM;
642         if (!packet)
643                 goto err;
644         skb_reserve(packet, NET_IP_ALIGN);
645
646         memcpy(__skb_put(packet, len), nla_data(a[OVS_PACKET_ATTR_PACKET]), len);
647
648         skb_reset_mac_header(packet);
649         eth = eth_hdr(packet);
650
651         /* Normally, setting the skb 'protocol' field would be handled by a
652          * call to eth_type_trans(), but it assumes there's a sending
653          * device, which we may not have. */
654         if (ntohs(eth->h_proto) >= 1536)
655                 packet->protocol = eth->h_proto;
656         else
657                 packet->protocol = htons(ETH_P_802_2);
658
659         /* Build an sw_flow for sending this packet. */
660         flow = flow_alloc();
661         err = PTR_ERR(flow);
662         if (IS_ERR(flow))
663                 goto err_kfree_skb;
664
665         err = flow_extract(packet, -1, &flow->key, &key_len, &is_frag);
666         if (err)
667                 goto err_flow_put;
668
669         err = flow_metadata_from_nlattrs(&flow->key.eth.in_port,
670                                          &flow->key.eth.tun_id,
671                                          a[OVS_PACKET_ATTR_KEY]);
672         if (err)
673                 goto err_flow_put;
674
675         flow->hash = flow_hash(&flow->key, key_len);
676
677         acts = flow_actions_alloc(a[OVS_PACKET_ATTR_ACTIONS]);
678         err = PTR_ERR(acts);
679         if (IS_ERR(acts))
680                 goto err_flow_put;
681         rcu_assign_pointer(flow->sf_acts, acts);
682
683         OVS_CB(packet)->flow = flow;
684
685         rcu_read_lock();
686         dp = get_dp(ovs_header->dp_ifindex);
687         err = -ENODEV;
688         if (!dp)
689                 goto err_unlock;
690
691         if (flow->key.eth.in_port < DP_MAX_PORTS)
692                 OVS_CB(packet)->vport = get_vport_protected(dp,
693                                                         flow->key.eth.in_port);
694
695         err = execute_actions(dp, packet);
696         rcu_read_unlock();
697
698         flow_put(flow);
699         return err;
700
701 err_unlock:
702         rcu_read_unlock();
703 err_flow_put:
704         flow_put(flow);
705 err_kfree_skb:
706         kfree_skb(packet);
707 err:
708         return err;
709 }
710
711 static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
712         [OVS_PACKET_ATTR_PACKET] = { .type = NLA_UNSPEC },
713         [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
714         [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
715 };
716
717 static struct genl_ops dp_packet_genl_ops[] = {
718         { .cmd = OVS_PACKET_CMD_EXECUTE,
719           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
720           .policy = packet_policy,
721           .doit = ovs_packet_cmd_execute
722         }
723 };
724
725 static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats)
726 {
727         int i;
728         struct flow_table *table = get_table_protected(dp);
729
730         stats->n_flows = flow_tbl_count(table);
731
732         stats->n_frags = stats->n_hit = stats->n_missed = stats->n_lost = 0;
733         for_each_possible_cpu(i) {
734                 const struct dp_stats_percpu *percpu_stats;
735                 struct dp_stats_percpu local_stats;
736                 unsigned seqcount;
737
738                 percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
739
740                 do {
741                         seqcount = read_seqcount_begin(&percpu_stats->seqlock);
742                         local_stats = *percpu_stats;
743                 } while (read_seqcount_retry(&percpu_stats->seqlock, seqcount));
744
745                 stats->n_frags += local_stats.n_frags;
746                 stats->n_hit += local_stats.n_hit;
747                 stats->n_missed += local_stats.n_missed;
748                 stats->n_lost += local_stats.n_lost;
749         }
750 }
751
752 /* MTU of the dp pseudo-device: ETH_DATA_LEN or the minimum of the ports.
753  * Called with RTNL lock.
754  */
755 int dp_min_mtu(const struct datapath *dp)
756 {
757         struct vport *p;
758         int mtu = 0;
759
760         ASSERT_RTNL();
761
762         list_for_each_entry (p, &dp->port_list, node) {
763                 int dev_mtu;
764
765                 /* Skip any internal ports, since that's what we're trying to
766                  * set. */
767                 if (is_internal_vport(p))
768                         continue;
769
770                 dev_mtu = vport_get_mtu(p);
771                 if (!dev_mtu)
772                         continue;
773                 if (!mtu || dev_mtu < mtu)
774                         mtu = dev_mtu;
775         }
776
777         return mtu ? mtu : ETH_DATA_LEN;
778 }
779
780 /* Sets the MTU of all datapath devices to the minimum of the ports
781  * Called with RTNL lock.
782  */
783 void set_internal_devs_mtu(const struct datapath *dp)
784 {
785         struct vport *p;
786         int mtu;
787
788         ASSERT_RTNL();
789
790         mtu = dp_min_mtu(dp);
791
792         list_for_each_entry (p, &dp->port_list, node) {
793                 if (is_internal_vport(p))
794                         vport_set_mtu(p, mtu);
795         }
796 }
797
798 static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
799         [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
800         [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
801         [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
802 };
803
804 static struct genl_family dp_flow_genl_family = {
805         .id = GENL_ID_GENERATE,
806         .hdrsize = sizeof(struct ovs_header),
807         .name = OVS_FLOW_FAMILY,
808         .version = 1,
809         .maxattr = OVS_FLOW_ATTR_MAX
810 };
811
812 static struct genl_multicast_group dp_flow_multicast_group = {
813         .name = OVS_FLOW_MCGROUP
814 };
815
816 /* Called with genl_lock. */
817 static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
818                                   struct sk_buff *skb, u32 pid, u32 seq, u32 flags, u8 cmd)
819 {
820         const int skb_orig_len = skb->len;
821         const struct sw_flow_actions *sf_acts;
822         struct ovs_flow_stats stats;
823         struct ovs_header *ovs_header;
824         struct nlattr *nla;
825         unsigned long used;
826         u8 tcp_flags;
827         int err;
828
829         sf_acts = rcu_dereference_protected(flow->sf_acts,
830                                             lockdep_genl_is_held());
831
832         ovs_header = genlmsg_put(skb, pid, seq, &dp_flow_genl_family, flags, cmd);
833         if (!ovs_header)
834                 return -EMSGSIZE;
835
836         ovs_header->dp_ifindex = dp->dp_ifindex;
837
838         nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY);
839         if (!nla)
840                 goto nla_put_failure;
841         err = flow_to_nlattrs(&flow->key, skb);
842         if (err)
843                 goto error;
844         nla_nest_end(skb, nla);
845
846         spin_lock_bh(&flow->lock);
847         used = flow->used;
848         stats.n_packets = flow->packet_count;
849         stats.n_bytes = flow->byte_count;
850         tcp_flags = flow->tcp_flags;
851         spin_unlock_bh(&flow->lock);
852
853         if (used)
854                 NLA_PUT_U64(skb, OVS_FLOW_ATTR_USED, flow_used_time(used));
855
856         if (stats.n_packets)
857                 NLA_PUT(skb, OVS_FLOW_ATTR_STATS, sizeof(struct ovs_flow_stats), &stats);
858
859         if (tcp_flags)
860                 NLA_PUT_U8(skb, OVS_FLOW_ATTR_TCP_FLAGS, tcp_flags);
861
862         /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
863          * this is the first flow to be dumped into 'skb'.  This is unusual for
864          * Netlink but individual action lists can be longer than
865          * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
866          * The userspace caller can always fetch the actions separately if it
867          * really wants them.  (Most userspace callers in fact don't care.)
868          *
869          * This can only fail for dump operations because the skb is always
870          * properly sized for single flows.
871          */
872         err = nla_put(skb, OVS_FLOW_ATTR_ACTIONS, sf_acts->actions_len,
873                       sf_acts->actions);
874         if (err < 0 && skb_orig_len)
875                 goto error;
876
877         return genlmsg_end(skb, ovs_header);
878
879 nla_put_failure:
880         err = -EMSGSIZE;
881 error:
882         genlmsg_cancel(skb, ovs_header);
883         return err;
884 }
885
886 static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow)
887 {
888         const struct sw_flow_actions *sf_acts;
889         int len;
890
891         sf_acts = rcu_dereference_protected(flow->sf_acts,
892                                             lockdep_genl_is_held());
893
894         len = nla_total_size(FLOW_BUFSIZE); /* OVS_FLOW_ATTR_KEY */
895         len += nla_total_size(sf_acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */
896         len += nla_total_size(sizeof(struct ovs_flow_stats)); /* OVS_FLOW_ATTR_STATS */
897         len += nla_total_size(1); /* OVS_FLOW_ATTR_TCP_FLAGS */
898         len += nla_total_size(8); /* OVS_FLOW_ATTR_USED */
899         return genlmsg_new(NLMSG_ALIGN(sizeof(struct ovs_header)) + len, GFP_KERNEL);
900 }
901
902 static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow, struct datapath *dp,
903                                                u32 pid, u32 seq, u8 cmd)
904 {
905         struct sk_buff *skb;
906         int retval;
907
908         skb = ovs_flow_cmd_alloc_info(flow);
909         if (!skb)
910                 return ERR_PTR(-ENOMEM);
911
912         retval = ovs_flow_cmd_fill_info(flow, dp, skb, pid, seq, 0, cmd);
913         BUG_ON(retval < 0);
914         return skb;
915 }
916
917 static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
918 {
919         struct nlattr **a = info->attrs;
920         struct ovs_header *ovs_header = info->userhdr;
921         struct sw_flow_key key;
922         struct sw_flow *flow;
923         struct sk_buff *reply;
924         struct datapath *dp;
925         struct flow_table *table;
926         int error;
927         int key_len;
928
929         /* Extract key. */
930         error = -EINVAL;
931         if (!a[OVS_FLOW_ATTR_KEY])
932                 goto error;
933         error = flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
934         if (error)
935                 goto error;
936
937         /* Validate actions. */
938         if (a[OVS_FLOW_ATTR_ACTIONS]) {
939                 error = validate_actions(a[OVS_FLOW_ATTR_ACTIONS]);
940                 if (error)
941                         goto error;
942         } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) {
943                 error = -EINVAL;
944                 goto error;
945         }
946
947         dp = get_dp(ovs_header->dp_ifindex);
948         error = -ENODEV;
949         if (!dp)
950                 goto error;
951
952         table = get_table_protected(dp);
953         flow = flow_tbl_lookup(table, &key, key_len);
954         if (!flow) {
955                 struct sw_flow_actions *acts;
956
957                 /* Bail out if we're not allowed to create a new flow. */
958                 error = -ENOENT;
959                 if (info->genlhdr->cmd == OVS_FLOW_CMD_SET)
960                         goto error;
961
962                 /* Expand table, if necessary, to make room. */
963                 if (flow_tbl_need_to_expand(table)) {
964                         struct flow_table *new_table;
965
966                         new_table = flow_tbl_expand(table);
967                         if (!IS_ERR(new_table)) {
968                                 rcu_assign_pointer(dp->table, new_table);
969                                 flow_tbl_deferred_destroy(table);
970                                 table = get_table_protected(dp);
971                         }
972                 }
973
974                 /* Allocate flow. */
975                 flow = flow_alloc();
976                 if (IS_ERR(flow)) {
977                         error = PTR_ERR(flow);
978                         goto error;
979                 }
980                 flow->key = key;
981                 clear_stats(flow);
982
983                 /* Obtain actions. */
984                 acts = flow_actions_alloc(a[OVS_FLOW_ATTR_ACTIONS]);
985                 error = PTR_ERR(acts);
986                 if (IS_ERR(acts))
987                         goto error_free_flow;
988                 rcu_assign_pointer(flow->sf_acts, acts);
989
990                 /* Put flow in bucket. */
991                 flow->hash = flow_hash(&key, key_len);
992                 flow_tbl_insert(table, flow);
993
994                 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid,
995                                                 info->snd_seq, OVS_FLOW_CMD_NEW);
996         } else {
997                 /* We found a matching flow. */
998                 struct sw_flow_actions *old_acts;
999
1000                 /* Bail out if we're not allowed to modify an existing flow.
1001                  * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
1002                  * because Generic Netlink treats the latter as a dump
1003                  * request.  We also accept NLM_F_EXCL in case that bug ever
1004                  * gets fixed.
1005                  */
1006                 error = -EEXIST;
1007                 if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW &&
1008                     info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
1009                         goto error;
1010
1011                 /* Update actions. */
1012                 old_acts = rcu_dereference_protected(flow->sf_acts,
1013                                                      lockdep_genl_is_held());
1014                 if (a[OVS_FLOW_ATTR_ACTIONS] &&
1015                     (old_acts->actions_len != nla_len(a[OVS_FLOW_ATTR_ACTIONS]) ||
1016                      memcmp(old_acts->actions, nla_data(a[OVS_FLOW_ATTR_ACTIONS]),
1017                             old_acts->actions_len))) {
1018                         struct sw_flow_actions *new_acts;
1019
1020                         new_acts = flow_actions_alloc(a[OVS_FLOW_ATTR_ACTIONS]);
1021                         error = PTR_ERR(new_acts);
1022                         if (IS_ERR(new_acts))
1023                                 goto error;
1024
1025                         rcu_assign_pointer(flow->sf_acts, new_acts);
1026                         flow_deferred_free_acts(old_acts);
1027                 }
1028
1029                 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid,
1030                                                 info->snd_seq, OVS_FLOW_CMD_NEW);
1031
1032                 /* Clear stats. */
1033                 if (a[OVS_FLOW_ATTR_CLEAR]) {
1034                         spin_lock_bh(&flow->lock);
1035                         clear_stats(flow);
1036                         spin_unlock_bh(&flow->lock);
1037                 }
1038         }
1039
1040         if (!IS_ERR(reply))
1041                 genl_notify(reply, genl_info_net(info), info->snd_pid,
1042                             dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL);
1043         else
1044                 netlink_set_err(INIT_NET_GENL_SOCK, 0,
1045                                 dp_flow_multicast_group.id, PTR_ERR(reply));
1046         return 0;
1047
1048 error_free_flow:
1049         flow_put(flow);
1050 error:
1051         return error;
1052 }
1053
1054 static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1055 {
1056         struct nlattr **a = info->attrs;
1057         struct ovs_header *ovs_header = info->userhdr;
1058         struct sw_flow_key key;
1059         struct sk_buff *reply;
1060         struct sw_flow *flow;
1061         struct datapath *dp;
1062         struct flow_table *table;
1063         int err;
1064         int key_len;
1065
1066         if (!a[OVS_FLOW_ATTR_KEY])
1067                 return -EINVAL;
1068         err = flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
1069         if (err)
1070                 return err;
1071
1072         dp = get_dp(ovs_header->dp_ifindex);
1073         if (!dp)
1074                 return -ENODEV;
1075
1076         table = get_table_protected(dp);
1077         flow = flow_tbl_lookup(table, &key, key_len);
1078         if (!flow)
1079                 return -ENOENT;
1080
1081         reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid, info->snd_seq, OVS_FLOW_CMD_NEW);
1082         if (IS_ERR(reply))
1083                 return PTR_ERR(reply);
1084
1085         return genlmsg_reply(reply, info);
1086 }
1087
1088 static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1089 {
1090         struct nlattr **a = info->attrs;
1091         struct ovs_header *ovs_header = info->userhdr;
1092         struct sw_flow_key key;
1093         struct sk_buff *reply;
1094         struct sw_flow *flow;
1095         struct datapath *dp;
1096         struct flow_table *table;
1097         int err;
1098         int key_len;
1099
1100         if (!a[OVS_FLOW_ATTR_KEY])
1101                 return flush_flows(ovs_header->dp_ifindex);
1102         err = flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
1103         if (err)
1104                 return err;
1105
1106         dp = get_dp(ovs_header->dp_ifindex);
1107         if (!dp)
1108                 return -ENODEV;
1109
1110         table = get_table_protected(dp);
1111         flow = flow_tbl_lookup(table, &key, key_len);
1112         if (!flow)
1113                 return -ENOENT;
1114
1115         reply = ovs_flow_cmd_alloc_info(flow);
1116         if (!reply)
1117                 return -ENOMEM;
1118
1119         flow_tbl_remove(table, flow);
1120
1121         err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_pid,
1122                                      info->snd_seq, 0, OVS_FLOW_CMD_DEL);
1123         BUG_ON(err < 0);
1124
1125         flow_deferred_free(flow);
1126
1127         genl_notify(reply, genl_info_net(info), info->snd_pid,
1128                     dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL);
1129         return 0;
1130 }
1131
1132 static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1133 {
1134         struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
1135         struct datapath *dp;
1136
1137         dp = get_dp(ovs_header->dp_ifindex);
1138         if (!dp)
1139                 return -ENODEV;
1140
1141         for (;;) {
1142                 struct sw_flow *flow;
1143                 u32 bucket, obj;
1144
1145                 bucket = cb->args[0];
1146                 obj = cb->args[1];
1147                 flow = flow_tbl_next(get_table_protected(dp), &bucket, &obj);
1148                 if (!flow)
1149                         break;
1150
1151                 if (ovs_flow_cmd_fill_info(flow, dp, skb, NETLINK_CB(cb->skb).pid,
1152                                            cb->nlh->nlmsg_seq, NLM_F_MULTI,
1153                                            OVS_FLOW_CMD_NEW) < 0)
1154                         break;
1155
1156                 cb->args[0] = bucket;
1157                 cb->args[1] = obj;
1158         }
1159         return skb->len;
1160 }
1161
1162 static struct genl_ops dp_flow_genl_ops[] = {
1163         { .cmd = OVS_FLOW_CMD_NEW,
1164           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1165           .policy = flow_policy,
1166           .doit = ovs_flow_cmd_new_or_set
1167         },
1168         { .cmd = OVS_FLOW_CMD_DEL,
1169           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1170           .policy = flow_policy,
1171           .doit = ovs_flow_cmd_del
1172         },
1173         { .cmd = OVS_FLOW_CMD_GET,
1174           .flags = 0,               /* OK for unprivileged users. */
1175           .policy = flow_policy,
1176           .doit = ovs_flow_cmd_get,
1177           .dumpit = ovs_flow_cmd_dump
1178         },
1179         { .cmd = OVS_FLOW_CMD_SET,
1180           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1181           .policy = flow_policy,
1182           .doit = ovs_flow_cmd_new_or_set,
1183         },
1184 };
1185
1186 static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
1187 #ifdef HAVE_NLA_NUL_STRING
1188         [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1189 #endif
1190         [OVS_DP_ATTR_IPV4_FRAGS] = { .type = NLA_U32 },
1191         [OVS_DP_ATTR_SAMPLING] = { .type = NLA_U32 },
1192 };
1193
1194 static struct genl_family dp_datapath_genl_family = {
1195         .id = GENL_ID_GENERATE,
1196         .hdrsize = sizeof(struct ovs_header),
1197         .name = OVS_DATAPATH_FAMILY,
1198         .version = 1,
1199         .maxattr = OVS_DP_ATTR_MAX
1200 };
1201
1202 static struct genl_multicast_group dp_datapath_multicast_group = {
1203         .name = OVS_DATAPATH_MCGROUP
1204 };
1205
1206 static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
1207                                 u32 pid, u32 seq, u32 flags, u8 cmd)
1208 {
1209         struct ovs_header *ovs_header;
1210         struct nlattr *nla;
1211         int err;
1212
1213         ovs_header = genlmsg_put(skb, pid, seq, &dp_datapath_genl_family,
1214                                    flags, cmd);
1215         if (!ovs_header)
1216                 goto error;
1217
1218         ovs_header->dp_ifindex = dp->dp_ifindex;
1219
1220         rcu_read_lock();
1221         err = nla_put_string(skb, OVS_DP_ATTR_NAME, dp_name(dp));
1222         rcu_read_unlock();
1223         if (err)
1224                 goto nla_put_failure;
1225
1226         nla = nla_reserve(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats));
1227         if (!nla)
1228                 goto nla_put_failure;
1229         get_dp_stats(dp, nla_data(nla));
1230
1231         NLA_PUT_U32(skb, OVS_DP_ATTR_IPV4_FRAGS,
1232                     dp->drop_frags ? OVS_DP_FRAG_DROP : OVS_DP_FRAG_ZERO);
1233
1234         if (dp->sflow_probability)
1235                 NLA_PUT_U32(skb, OVS_DP_ATTR_SAMPLING, dp->sflow_probability);
1236
1237         nla = nla_nest_start(skb, OVS_DP_ATTR_MCGROUPS);
1238         if (!nla)
1239                 goto nla_put_failure;
1240         NLA_PUT_U32(skb, OVS_PACKET_CMD_MISS, packet_mc_group(dp, OVS_PACKET_CMD_MISS));
1241         NLA_PUT_U32(skb, OVS_PACKET_CMD_ACTION, packet_mc_group(dp, OVS_PACKET_CMD_ACTION));
1242         NLA_PUT_U32(skb, OVS_PACKET_CMD_SAMPLE, packet_mc_group(dp, OVS_PACKET_CMD_SAMPLE));
1243         nla_nest_end(skb, nla);
1244
1245         return genlmsg_end(skb, ovs_header);
1246
1247 nla_put_failure:
1248         genlmsg_cancel(skb, ovs_header);
1249 error:
1250         return -EMSGSIZE;
1251 }
1252
1253 static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 pid,
1254                                              u32 seq, u8 cmd)
1255 {
1256         struct sk_buff *skb;
1257         int retval;
1258
1259         skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1260         if (!skb)
1261                 return ERR_PTR(-ENOMEM);
1262
1263         retval = ovs_dp_cmd_fill_info(dp, skb, pid, seq, 0, cmd);
1264         if (retval < 0) {
1265                 kfree_skb(skb);
1266                 return ERR_PTR(retval);
1267         }
1268         return skb;
1269 }
1270
1271 static int ovs_dp_cmd_validate(struct nlattr *a[OVS_DP_ATTR_MAX + 1])
1272 {
1273         if (a[OVS_DP_ATTR_IPV4_FRAGS]) {
1274                 u32 frags = nla_get_u32(a[OVS_DP_ATTR_IPV4_FRAGS]);
1275
1276                 if (frags != OVS_DP_FRAG_ZERO && frags != OVS_DP_FRAG_DROP)
1277                         return -EINVAL;
1278         }
1279
1280         return CHECK_NUL_STRING(a[OVS_DP_ATTR_NAME], IFNAMSIZ - 1);
1281 }
1282
1283 /* Called with genl_mutex and optionally with RTNL lock also. */
1284 static struct datapath *lookup_datapath(struct ovs_header *ovs_header, struct nlattr *a[OVS_DP_ATTR_MAX + 1])
1285 {
1286         struct datapath *dp;
1287
1288         if (!a[OVS_DP_ATTR_NAME])
1289                 dp = get_dp(ovs_header->dp_ifindex);
1290         else {
1291                 struct vport *vport;
1292
1293                 rcu_read_lock();
1294                 vport = vport_locate(nla_data(a[OVS_DP_ATTR_NAME]));
1295                 dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
1296                 rcu_read_unlock();
1297         }
1298         return dp ? dp : ERR_PTR(-ENODEV);
1299 }
1300
1301 /* Called with genl_mutex. */
1302 static void change_datapath(struct datapath *dp, struct nlattr *a[OVS_DP_ATTR_MAX + 1])
1303 {
1304         if (a[OVS_DP_ATTR_IPV4_FRAGS])
1305                 dp->drop_frags = nla_get_u32(a[OVS_DP_ATTR_IPV4_FRAGS]) == OVS_DP_FRAG_DROP;
1306         if (a[OVS_DP_ATTR_SAMPLING])
1307                 dp->sflow_probability = nla_get_u32(a[OVS_DP_ATTR_SAMPLING]);
1308 }
1309
1310 static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1311 {
1312         struct nlattr **a = info->attrs;
1313         struct vport_parms parms;
1314         struct sk_buff *reply;
1315         struct datapath *dp;
1316         struct vport *vport;
1317         int err;
1318
1319         err = -EINVAL;
1320         if (!a[OVS_DP_ATTR_NAME])
1321                 goto err;
1322
1323         err = ovs_dp_cmd_validate(a);
1324         if (err)
1325                 goto err;
1326
1327         rtnl_lock();
1328         err = -ENODEV;
1329         if (!try_module_get(THIS_MODULE))
1330                 goto err_unlock_rtnl;
1331
1332         err = -ENOMEM;
1333         dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1334         if (dp == NULL)
1335                 goto err_put_module;
1336         INIT_LIST_HEAD(&dp->port_list);
1337
1338         /* Initialize kobject for bridge.  This will be added as
1339          * /sys/class/net/<devname>/brif later, if sysfs is enabled. */
1340         dp->ifobj.kset = NULL;
1341         kobject_init(&dp->ifobj, &dp_ktype);
1342
1343         /* Allocate table. */
1344         err = -ENOMEM;
1345         rcu_assign_pointer(dp->table, flow_tbl_alloc(TBL_MIN_BUCKETS));
1346         if (!dp->table)
1347                 goto err_free_dp;
1348
1349         /* Set up our datapath device. */
1350         parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
1351         parms.type = OVS_VPORT_TYPE_INTERNAL;
1352         parms.options = NULL;
1353         parms.dp = dp;
1354         parms.port_no = OVSP_LOCAL;
1355         vport = new_vport(&parms);
1356         if (IS_ERR(vport)) {
1357                 err = PTR_ERR(vport);
1358                 if (err == -EBUSY)
1359                         err = -EEXIST;
1360
1361                 goto err_destroy_table;
1362         }
1363         dp->dp_ifindex = vport_get_ifindex(vport);
1364
1365         dp->drop_frags = 0;
1366         dp->stats_percpu = alloc_percpu(struct dp_stats_percpu);
1367         if (!dp->stats_percpu) {
1368                 err = -ENOMEM;
1369                 goto err_destroy_local_port;
1370         }
1371
1372         change_datapath(dp, a);
1373
1374         reply = ovs_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, OVS_DP_CMD_NEW);
1375         err = PTR_ERR(reply);
1376         if (IS_ERR(reply))
1377                 goto err_destroy_local_port;
1378
1379         list_add_tail(&dp->list_node, &dps);
1380         dp_sysfs_add_dp(dp);
1381
1382         rtnl_unlock();
1383
1384         genl_notify(reply, genl_info_net(info), info->snd_pid,
1385                     dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL);
1386         return 0;
1387
1388 err_destroy_local_port:
1389         dp_detach_port(get_vport_protected(dp, OVSP_LOCAL));
1390 err_destroy_table:
1391         flow_tbl_destroy(get_table_protected(dp));
1392 err_free_dp:
1393         kfree(dp);
1394 err_put_module:
1395         module_put(THIS_MODULE);
1396 err_unlock_rtnl:
1397         rtnl_unlock();
1398 err:
1399         return err;
1400 }
1401
1402 static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1403 {
1404         struct vport *vport, *next_vport;
1405         struct sk_buff *reply;
1406         struct datapath *dp;
1407         int err;
1408
1409         err = ovs_dp_cmd_validate(info->attrs);
1410         if (err)
1411                 goto exit;
1412
1413         rtnl_lock();
1414         dp = lookup_datapath(info->userhdr, info->attrs);
1415         err = PTR_ERR(dp);
1416         if (IS_ERR(dp))
1417                 goto exit_unlock;
1418
1419         reply = ovs_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, OVS_DP_CMD_DEL);
1420         err = PTR_ERR(reply);
1421         if (IS_ERR(reply))
1422                 goto exit_unlock;
1423
1424         list_for_each_entry_safe (vport, next_vport, &dp->port_list, node)
1425                 if (vport->port_no != OVSP_LOCAL)
1426                         dp_detach_port(vport);
1427
1428         dp_sysfs_del_dp(dp);
1429         list_del(&dp->list_node);
1430         dp_detach_port(get_vport_protected(dp, OVSP_LOCAL));
1431
1432         /* rtnl_unlock() will wait until all the references to devices that
1433          * are pending unregistration have been dropped.  We do it here to
1434          * ensure that any internal devices (which contain DP pointers) are
1435          * fully destroyed before freeing the datapath.
1436          */
1437         rtnl_unlock();
1438
1439         call_rcu(&dp->rcu, destroy_dp_rcu);
1440         module_put(THIS_MODULE);
1441
1442         genl_notify(reply, genl_info_net(info), info->snd_pid,
1443                     dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL);
1444
1445         return 0;
1446
1447 exit_unlock:
1448         rtnl_unlock();
1449 exit:
1450         return err;
1451 }
1452
1453 static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1454 {
1455         struct sk_buff *reply;
1456         struct datapath *dp;
1457         int err;
1458
1459         err = ovs_dp_cmd_validate(info->attrs);
1460         if (err)
1461                 return err;
1462
1463         dp = lookup_datapath(info->userhdr, info->attrs);
1464         if (IS_ERR(dp))
1465                 return PTR_ERR(dp);
1466
1467         change_datapath(dp, info->attrs);
1468
1469         reply = ovs_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, OVS_DP_CMD_NEW);
1470         if (IS_ERR(reply)) {
1471                 err = PTR_ERR(reply);
1472                 netlink_set_err(INIT_NET_GENL_SOCK, 0,
1473                                 dp_datapath_multicast_group.id, err);
1474                 return 0;
1475         }
1476
1477         genl_notify(reply, genl_info_net(info), info->snd_pid,
1478                     dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL);
1479         return 0;
1480 }
1481
1482 static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1483 {
1484         struct sk_buff *reply;
1485         struct datapath *dp;
1486         int err;
1487
1488         err = ovs_dp_cmd_validate(info->attrs);
1489         if (err)
1490                 return err;
1491
1492         dp = lookup_datapath(info->userhdr, info->attrs);
1493         if (IS_ERR(dp))
1494                 return PTR_ERR(dp);
1495
1496         reply = ovs_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, OVS_DP_CMD_NEW);
1497         if (IS_ERR(reply))
1498                 return PTR_ERR(reply);
1499
1500         return genlmsg_reply(reply, info);
1501 }
1502
1503 static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1504 {
1505         struct datapath *dp;
1506         int skip = cb->args[0];
1507         int i = 0;
1508
1509         list_for_each_entry (dp, &dps, list_node) {
1510                 if (i < skip)
1511                         continue;
1512                 if (ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).pid,
1513                                          cb->nlh->nlmsg_seq, NLM_F_MULTI,
1514                                          OVS_DP_CMD_NEW) < 0)
1515                         break;
1516                 i++;
1517         }
1518
1519         cb->args[0] = i;
1520
1521         return skb->len;
1522 }
1523
1524 static struct genl_ops dp_datapath_genl_ops[] = {
1525         { .cmd = OVS_DP_CMD_NEW,
1526           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1527           .policy = datapath_policy,
1528           .doit = ovs_dp_cmd_new
1529         },
1530         { .cmd = OVS_DP_CMD_DEL,
1531           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1532           .policy = datapath_policy,
1533           .doit = ovs_dp_cmd_del
1534         },
1535         { .cmd = OVS_DP_CMD_GET,
1536           .flags = 0,               /* OK for unprivileged users. */
1537           .policy = datapath_policy,
1538           .doit = ovs_dp_cmd_get,
1539           .dumpit = ovs_dp_cmd_dump
1540         },
1541         { .cmd = OVS_DP_CMD_SET,
1542           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1543           .policy = datapath_policy,
1544           .doit = ovs_dp_cmd_set,
1545         },
1546 };
1547
1548 static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
1549 #ifdef HAVE_NLA_NUL_STRING
1550         [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1551         [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
1552         [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
1553         [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct rtnl_link_stats64) },
1554         [OVS_VPORT_ATTR_ADDRESS] = { .len = ETH_ALEN },
1555 #else
1556         [OVS_VPORT_ATTR_STATS] = { .minlen = sizeof(struct rtnl_link_stats64) },
1557         [OVS_VPORT_ATTR_ADDRESS] = { .minlen = ETH_ALEN },
1558 #endif
1559         [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
1560 };
1561
1562 static struct genl_family dp_vport_genl_family = {
1563         .id = GENL_ID_GENERATE,
1564         .hdrsize = sizeof(struct ovs_header),
1565         .name = OVS_VPORT_FAMILY,
1566         .version = 1,
1567         .maxattr = OVS_VPORT_ATTR_MAX
1568 };
1569
1570 struct genl_multicast_group dp_vport_multicast_group = {
1571         .name = OVS_VPORT_MCGROUP
1572 };
1573
1574 /* Called with RTNL lock or RCU read lock. */
1575 static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
1576                                    u32 pid, u32 seq, u32 flags, u8 cmd)
1577 {
1578         struct ovs_header *ovs_header;
1579         struct nlattr *nla;
1580         int ifindex;
1581         int err;
1582
1583         ovs_header = genlmsg_put(skb, pid, seq, &dp_vport_genl_family,
1584                                  flags, cmd);
1585         if (!ovs_header)
1586                 return -EMSGSIZE;
1587
1588         ovs_header->dp_ifindex = vport->dp->dp_ifindex;
1589
1590         NLA_PUT_U32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no);
1591         NLA_PUT_U32(skb, OVS_VPORT_ATTR_TYPE, vport_get_type(vport));
1592         NLA_PUT_STRING(skb, OVS_VPORT_ATTR_NAME, vport_get_name(vport));
1593
1594         nla = nla_reserve(skb, OVS_VPORT_ATTR_STATS, sizeof(struct rtnl_link_stats64));
1595         if (!nla)
1596                 goto nla_put_failure;
1597         if (vport_get_stats(vport, nla_data(nla)))
1598                 __skb_trim(skb, skb->len - nla->nla_len);
1599
1600         NLA_PUT(skb, OVS_VPORT_ATTR_ADDRESS, ETH_ALEN, vport_get_addr(vport));
1601
1602         err = vport_get_options(vport, skb);
1603         if (err == -EMSGSIZE)
1604                 goto error;
1605
1606         ifindex = vport_get_ifindex(vport);
1607         if (ifindex > 0)
1608                 NLA_PUT_U32(skb, OVS_VPORT_ATTR_IFINDEX, ifindex);
1609
1610         return genlmsg_end(skb, ovs_header);
1611
1612 nla_put_failure:
1613         err = -EMSGSIZE;
1614 error:
1615         genlmsg_cancel(skb, ovs_header);
1616         return err;
1617 }
1618
1619 /* Called with RTNL lock or RCU read lock. */
1620 struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 pid,
1621                                          u32 seq, u8 cmd)
1622 {
1623         struct sk_buff *skb;
1624         int retval;
1625
1626         skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
1627         if (!skb)
1628                 return ERR_PTR(-ENOMEM);
1629
1630         retval = ovs_vport_cmd_fill_info(vport, skb, pid, seq, 0, cmd);
1631         if (retval < 0) {
1632                 kfree_skb(skb);
1633                 return ERR_PTR(retval);
1634         }
1635         return skb;
1636 }
1637
1638 static int ovs_vport_cmd_validate(struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
1639 {
1640         return CHECK_NUL_STRING(a[OVS_VPORT_ATTR_NAME], IFNAMSIZ - 1);
1641 }
1642
1643 /* Called with RTNL lock or RCU read lock. */
1644 static struct vport *lookup_vport(struct ovs_header *ovs_header,
1645                                   struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
1646 {
1647         struct datapath *dp;
1648         struct vport *vport;
1649
1650         if (a[OVS_VPORT_ATTR_NAME]) {
1651                 vport = vport_locate(nla_data(a[OVS_VPORT_ATTR_NAME]));
1652                 if (!vport)
1653                         return ERR_PTR(-ENODEV);
1654                 return vport;
1655         } else if (a[OVS_VPORT_ATTR_PORT_NO]) {
1656                 u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
1657
1658                 if (port_no >= DP_MAX_PORTS)
1659                         return ERR_PTR(-EFBIG);
1660
1661                 dp = get_dp(ovs_header->dp_ifindex);
1662                 if (!dp)
1663                         return ERR_PTR(-ENODEV);
1664
1665                 vport = get_vport_protected(dp, port_no);
1666                 if (!vport)
1667                         return ERR_PTR(-ENOENT);
1668                 return vport;
1669         } else
1670                 return ERR_PTR(-EINVAL);
1671 }
1672
1673 /* Called with RTNL lock. */
1674 static int change_vport(struct vport *vport, struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
1675 {
1676         int err = 0;
1677         if (a[OVS_VPORT_ATTR_STATS])
1678                 err = vport_set_stats(vport, nla_data(a[OVS_VPORT_ATTR_STATS]));
1679         if (!err && a[OVS_VPORT_ATTR_ADDRESS])
1680                 err = vport_set_addr(vport, nla_data(a[OVS_VPORT_ATTR_ADDRESS]));
1681         return err;
1682 }
1683
1684 static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1685 {
1686         struct nlattr **a = info->attrs;
1687         struct ovs_header *ovs_header = info->userhdr;
1688         struct vport_parms parms;
1689         struct sk_buff *reply;
1690         struct vport *vport;
1691         struct datapath *dp;
1692         u32 port_no;
1693         int err;
1694
1695         err = -EINVAL;
1696         if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE])
1697                 goto exit;
1698
1699         err = ovs_vport_cmd_validate(a);
1700         if (err)
1701                 goto exit;
1702
1703         rtnl_lock();
1704         dp = get_dp(ovs_header->dp_ifindex);
1705         err = -ENODEV;
1706         if (!dp)
1707                 goto exit_unlock;
1708
1709         if (a[OVS_VPORT_ATTR_PORT_NO]) {
1710                 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
1711
1712                 err = -EFBIG;
1713                 if (port_no >= DP_MAX_PORTS)
1714                         goto exit_unlock;
1715
1716                 vport = get_vport_protected(dp, port_no);
1717                 err = -EBUSY;
1718                 if (vport)
1719                         goto exit_unlock;
1720         } else {
1721                 for (port_no = 1; ; port_no++) {
1722                         if (port_no >= DP_MAX_PORTS) {
1723                                 err = -EFBIG;
1724                                 goto exit_unlock;
1725                         }
1726                         vport = get_vport_protected(dp, port_no);
1727                         if (!vport)
1728                                 break;
1729                 }
1730         }
1731
1732         parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
1733         parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
1734         parms.options = a[OVS_VPORT_ATTR_OPTIONS];
1735         parms.dp = dp;
1736         parms.port_no = port_no;
1737
1738         vport = new_vport(&parms);
1739         err = PTR_ERR(vport);
1740         if (IS_ERR(vport))
1741                 goto exit_unlock;
1742
1743         set_internal_devs_mtu(dp);
1744         dp_sysfs_add_if(vport);
1745
1746         err = change_vport(vport, a);
1747         if (!err) {
1748                 reply = ovs_vport_cmd_build_info(vport, info->snd_pid,
1749                                                  info->snd_seq, OVS_VPORT_CMD_NEW);
1750                 if (IS_ERR(reply))
1751                         err = PTR_ERR(reply);
1752         }
1753         if (err) {
1754                 dp_detach_port(vport);
1755                 goto exit_unlock;
1756         }
1757         genl_notify(reply, genl_info_net(info), info->snd_pid,
1758                     dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1759
1760
1761 exit_unlock:
1762         rtnl_unlock();
1763 exit:
1764         return err;
1765 }
1766
1767 static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
1768 {
1769         struct nlattr **a = info->attrs;
1770         struct sk_buff *reply;
1771         struct vport *vport;
1772         int err;
1773
1774         err = ovs_vport_cmd_validate(a);
1775         if (err)
1776                 goto exit;
1777
1778         rtnl_lock();
1779         vport = lookup_vport(info->userhdr, a);
1780         err = PTR_ERR(vport);
1781         if (IS_ERR(vport))
1782                 goto exit_unlock;
1783
1784         err = 0;
1785         if (a[OVS_VPORT_ATTR_OPTIONS])
1786                 err = vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
1787         if (!err)
1788                 err = change_vport(vport, a);
1789
1790         reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
1791                                          OVS_VPORT_CMD_NEW);
1792         if (IS_ERR(reply)) {
1793                 err = PTR_ERR(reply);
1794                 netlink_set_err(INIT_NET_GENL_SOCK, 0,
1795                                 dp_vport_multicast_group.id, err);
1796                 return 0;
1797         }
1798
1799         genl_notify(reply, genl_info_net(info), info->snd_pid,
1800                     dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1801
1802 exit_unlock:
1803         rtnl_unlock();
1804 exit:
1805         return err;
1806 }
1807
1808 static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
1809 {
1810         struct nlattr **a = info->attrs;
1811         struct sk_buff *reply;
1812         struct vport *vport;
1813         int err;
1814
1815         err = ovs_vport_cmd_validate(a);
1816         if (err)
1817                 goto exit;
1818
1819         rtnl_lock();
1820         vport = lookup_vport(info->userhdr, a);
1821         err = PTR_ERR(vport);
1822         if (IS_ERR(vport))
1823                 goto exit_unlock;
1824
1825         if (vport->port_no == OVSP_LOCAL) {
1826                 err = -EINVAL;
1827                 goto exit_unlock;
1828         }
1829
1830         reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
1831                                          OVS_VPORT_CMD_DEL);
1832         err = PTR_ERR(reply);
1833         if (IS_ERR(reply))
1834                 goto exit_unlock;
1835
1836         dp_detach_port(vport);
1837
1838         genl_notify(reply, genl_info_net(info), info->snd_pid,
1839                     dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1840
1841 exit_unlock:
1842         rtnl_unlock();
1843 exit:
1844         return err;
1845 }
1846
1847 static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
1848 {
1849         struct nlattr **a = info->attrs;
1850         struct ovs_header *ovs_header = info->userhdr;
1851         struct sk_buff *reply;
1852         struct vport *vport;
1853         int err;
1854
1855         err = ovs_vport_cmd_validate(a);
1856         if (err)
1857                 goto exit;
1858
1859         rcu_read_lock();
1860         vport = lookup_vport(ovs_header, a);
1861         err = PTR_ERR(vport);
1862         if (IS_ERR(vport))
1863                 goto exit_unlock;
1864
1865         reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
1866                                          OVS_VPORT_CMD_NEW);
1867         err = PTR_ERR(reply);
1868         if (IS_ERR(reply))
1869                 goto exit_unlock;
1870
1871         rcu_read_unlock();
1872
1873         return genlmsg_reply(reply, info);
1874
1875 exit_unlock:
1876         rcu_read_unlock();
1877 exit:
1878         return err;
1879 }
1880
1881 static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1882 {
1883         struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
1884         struct datapath *dp;
1885         u32 port_no;
1886         int retval;
1887
1888         dp = get_dp(ovs_header->dp_ifindex);
1889         if (!dp)
1890                 return -ENODEV;
1891
1892         rcu_read_lock();
1893         for (port_no = cb->args[0]; port_no < DP_MAX_PORTS; port_no++) {
1894                 struct vport *vport;
1895
1896                 vport = get_vport_protected(dp, port_no);
1897                 if (!vport)
1898                         continue;
1899
1900                 if (ovs_vport_cmd_fill_info(vport, skb, NETLINK_CB(cb->skb).pid,
1901                                             cb->nlh->nlmsg_seq, NLM_F_MULTI,
1902                                             OVS_VPORT_CMD_NEW) < 0)
1903                         break;
1904         }
1905         rcu_read_unlock();
1906
1907         cb->args[0] = port_no;
1908         retval = skb->len;
1909
1910         return retval;
1911 }
1912
1913 static struct genl_ops dp_vport_genl_ops[] = {
1914         { .cmd = OVS_VPORT_CMD_NEW,
1915           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1916           .policy = vport_policy,
1917           .doit = ovs_vport_cmd_new
1918         },
1919         { .cmd = OVS_VPORT_CMD_DEL,
1920           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1921           .policy = vport_policy,
1922           .doit = ovs_vport_cmd_del
1923         },
1924         { .cmd = OVS_VPORT_CMD_GET,
1925           .flags = 0,               /* OK for unprivileged users. */
1926           .policy = vport_policy,
1927           .doit = ovs_vport_cmd_get,
1928           .dumpit = ovs_vport_cmd_dump
1929         },
1930         { .cmd = OVS_VPORT_CMD_SET,
1931           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1932           .policy = vport_policy,
1933           .doit = ovs_vport_cmd_set,
1934         },
1935 };
1936
1937 struct genl_family_and_ops {
1938         struct genl_family *family;
1939         struct genl_ops *ops;
1940         int n_ops;
1941         struct genl_multicast_group *group;
1942 };
1943
1944 static const struct genl_family_and_ops dp_genl_families[] = {
1945         { &dp_datapath_genl_family,
1946           dp_datapath_genl_ops, ARRAY_SIZE(dp_datapath_genl_ops),
1947           &dp_datapath_multicast_group },
1948         { &dp_vport_genl_family,
1949           dp_vport_genl_ops, ARRAY_SIZE(dp_vport_genl_ops),
1950           &dp_vport_multicast_group },
1951         { &dp_flow_genl_family,
1952           dp_flow_genl_ops, ARRAY_SIZE(dp_flow_genl_ops),
1953           &dp_flow_multicast_group },
1954         { &dp_packet_genl_family,
1955           dp_packet_genl_ops, ARRAY_SIZE(dp_packet_genl_ops),
1956           NULL },
1957 };
1958
1959 static void dp_unregister_genl(int n_families)
1960 {
1961         int i;
1962
1963         for (i = 0; i < n_families; i++)
1964                 genl_unregister_family(dp_genl_families[i].family);
1965 }
1966
1967 static int dp_register_genl(void)
1968 {
1969         int n_registered;
1970         int err;
1971         int i;
1972
1973         n_registered = 0;
1974         for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
1975                 const struct genl_family_and_ops *f = &dp_genl_families[i];
1976
1977                 err = genl_register_family_with_ops(f->family, f->ops,
1978                                                     f->n_ops);
1979                 if (err)
1980                         goto error;
1981                 n_registered++;
1982
1983                 if (f->group) {
1984                         err = genl_register_mc_group(f->family, f->group);
1985                         if (err)
1986                                 goto error;
1987                 }
1988         }
1989
1990         err = packet_register_mc_groups();
1991         if (err)
1992                 goto error;
1993         return 0;
1994
1995 error:
1996         dp_unregister_genl(n_registered);
1997         return err;
1998 }
1999
2000 static int __init dp_init(void)
2001 {
2002         struct sk_buff *dummy_skb;
2003         int err;
2004
2005         BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > sizeof(dummy_skb->cb));
2006
2007         printk("Open vSwitch %s, built "__DATE__" "__TIME__"\n", VERSION BUILDNR);
2008
2009         err = tnl_init();
2010         if (err)
2011                 goto error;
2012
2013         err = flow_init();
2014         if (err)
2015                 goto error_tnl_exit;
2016
2017         err = vport_init();
2018         if (err)
2019                 goto error_flow_exit;
2020
2021         err = register_netdevice_notifier(&dp_device_notifier);
2022         if (err)
2023                 goto error_vport_exit;
2024
2025         err = dp_register_genl();
2026         if (err < 0)
2027                 goto error_unreg_notifier;
2028
2029         return 0;
2030
2031 error_unreg_notifier:
2032         unregister_netdevice_notifier(&dp_device_notifier);
2033 error_vport_exit:
2034         vport_exit();
2035 error_flow_exit:
2036         flow_exit();
2037 error_tnl_exit:
2038         tnl_exit();
2039 error:
2040         return err;
2041 }
2042
2043 static void dp_cleanup(void)
2044 {
2045         rcu_barrier();
2046         dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
2047         unregister_netdevice_notifier(&dp_device_notifier);
2048         vport_exit();
2049         flow_exit();
2050         tnl_exit();
2051 }
2052
2053 module_init(dp_init);
2054 module_exit(dp_cleanup);
2055
2056 MODULE_DESCRIPTION("Open vSwitch switching datapath");
2057 MODULE_LICENSE("GPL");