datapath: Use multicast groups allocated for upcalls.
[sliver-openvswitch.git] / datapath / datapath.c
1 /*
2  * Copyright (c) 2007, 2008, 2009, 2010, 2011 Nicira Networks.
3  * Distributed under the terms of the GNU GPL version 2.
4  *
5  * Significant portions of this file may be copied from parts of the Linux
6  * kernel, by Linus Torvalds and others.
7  */
8
9 /* Functions for managing the dp interface/device. */
10
11 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12
13 #include <linux/init.h>
14 #include <linux/module.h>
15 #include <linux/if_arp.h>
16 #include <linux/if_vlan.h>
17 #include <linux/in.h>
18 #include <linux/ip.h>
19 #include <linux/jhash.h>
20 #include <linux/delay.h>
21 #include <linux/time.h>
22 #include <linux/etherdevice.h>
23 #include <linux/genetlink.h>
24 #include <linux/kernel.h>
25 #include <linux/kthread.h>
26 #include <linux/mutex.h>
27 #include <linux/percpu.h>
28 #include <linux/rcupdate.h>
29 #include <linux/tcp.h>
30 #include <linux/udp.h>
31 #include <linux/version.h>
32 #include <linux/ethtool.h>
33 #include <linux/wait.h>
34 #include <asm/system.h>
35 #include <asm/div64.h>
36 #include <asm/bug.h>
37 #include <linux/highmem.h>
38 #include <linux/netfilter_bridge.h>
39 #include <linux/netfilter_ipv4.h>
40 #include <linux/inetdevice.h>
41 #include <linux/list.h>
42 #include <linux/rculist.h>
43 #include <linux/dmi.h>
44 #include <net/inet_ecn.h>
45 #include <net/genetlink.h>
46
47 #include "openvswitch/datapath-protocol.h"
48 #include "checksum.h"
49 #include "datapath.h"
50 #include "actions.h"
51 #include "flow.h"
52 #include "loop_counter.h"
53 #include "table.h"
54 #include "vport-internal_dev.h"
55
56 int (*dp_ioctl_hook)(struct net_device *dev, struct ifreq *rq, int cmd);
57 EXPORT_SYMBOL(dp_ioctl_hook);
58
59 /**
60  * DOC: Locking:
61  *
62  * Writes to device state (add/remove datapath, port, set operations on vports,
63  * etc.) are protected by RTNL.
64  *
65  * Writes to other state (flow table modifications, set miscellaneous datapath
66  * parameters such as drop frags, etc.) are protected by genl_mutex.  The RTNL
67  * lock nests inside genl_mutex.
68  *
69  * Reads are protected by RCU.
70  *
71  * There are a few special cases (mostly stats) that have their own
72  * synchronization but they nest under all of above and don't interact with
73  * each other.
74  */
75
76 /* Global list of datapaths to enable dumping them all out.
77  * Protected by genl_mutex.
78  */
79 static LIST_HEAD(dps);
80
81 static struct vport *new_vport(const struct vport_parms *);
82 static int queue_control_packets(struct datapath *, struct sk_buff *,
83                                  const struct dp_upcall_info *);
84
85 /* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */
86 struct datapath *get_dp(int dp_ifindex)
87 {
88         struct datapath *dp = NULL;
89         struct net_device *dev;
90
91         rcu_read_lock();
92         dev = dev_get_by_index_rcu(&init_net, dp_ifindex);
93         if (dev) {
94                 struct vport *vport = internal_dev_get_vport(dev);
95                 if (vport)
96                         dp = vport->dp;
97         }
98         rcu_read_unlock();
99
100         return dp;
101 }
102 EXPORT_SYMBOL_GPL(get_dp);
103
104 /* Must be called with genl_mutex. */
105 static struct tbl *get_table_protected(struct datapath *dp)
106 {
107         return rcu_dereference_protected(dp->table, lockdep_genl_is_held());
108 }
109
110 /* Must be called with rcu_read_lock or RTNL lock. */
111 static struct vport *get_vport_protected(struct datapath *dp, u16 port_no)
112 {
113         return rcu_dereference_rtnl(dp->ports[port_no]);
114 }
115
116 /* Must be called with rcu_read_lock or RTNL lock. */
117 const char *dp_name(const struct datapath *dp)
118 {
119         return vport_get_name(rcu_dereference_rtnl(dp->ports[ODPP_LOCAL]));
120 }
121
122 static inline size_t br_nlmsg_size(void)
123 {
124         return NLMSG_ALIGN(sizeof(struct ifinfomsg))
125                + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
126                + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
127                + nla_total_size(4) /* IFLA_MASTER */
128                + nla_total_size(4) /* IFLA_MTU */
129                + nla_total_size(4) /* IFLA_LINK */
130                + nla_total_size(1); /* IFLA_OPERSTATE */
131 }
132
133 /* Caller must hold RTNL lock. */
134 static int dp_fill_ifinfo(struct sk_buff *skb,
135                           const struct vport *port,
136                           int event, unsigned int flags)
137 {
138         struct datapath *dp = port->dp;
139         int ifindex = vport_get_ifindex(port);
140         int iflink = vport_get_iflink(port);
141         struct ifinfomsg *hdr;
142         struct nlmsghdr *nlh;
143
144         if (ifindex < 0)
145                 return ifindex;
146
147         if (iflink < 0)
148                 return iflink;
149
150         nlh = nlmsg_put(skb, 0, 0, event, sizeof(*hdr), flags);
151         if (nlh == NULL)
152                 return -EMSGSIZE;
153
154         hdr = nlmsg_data(nlh);
155         hdr->ifi_family = AF_BRIDGE;
156         hdr->__ifi_pad = 0;
157         hdr->ifi_type = ARPHRD_ETHER;
158         hdr->ifi_index = ifindex;
159         hdr->ifi_flags = vport_get_flags(port);
160         hdr->ifi_change = 0;
161
162         NLA_PUT_STRING(skb, IFLA_IFNAME, vport_get_name(port));
163         NLA_PUT_U32(skb, IFLA_MASTER,
164                 vport_get_ifindex(get_vport_protected(dp, ODPP_LOCAL)));
165         NLA_PUT_U32(skb, IFLA_MTU, vport_get_mtu(port));
166 #ifdef IFLA_OPERSTATE
167         NLA_PUT_U8(skb, IFLA_OPERSTATE,
168                    vport_is_running(port)
169                         ? vport_get_operstate(port)
170                         : IF_OPER_DOWN);
171 #endif
172
173         NLA_PUT(skb, IFLA_ADDRESS, ETH_ALEN, vport_get_addr(port));
174
175         if (ifindex != iflink)
176                 NLA_PUT_U32(skb, IFLA_LINK,iflink);
177
178         return nlmsg_end(skb, nlh);
179
180 nla_put_failure:
181         nlmsg_cancel(skb, nlh);
182         return -EMSGSIZE;
183 }
184
185 /* Caller must hold RTNL lock. */
186 static void dp_ifinfo_notify(int event, struct vport *port)
187 {
188         struct sk_buff *skb;
189         int err = -ENOBUFS;
190
191         skb = nlmsg_new(br_nlmsg_size(), GFP_KERNEL);
192         if (skb == NULL)
193                 goto errout;
194
195         err = dp_fill_ifinfo(skb, port, event, 0);
196         if (err < 0) {
197                 /* -EMSGSIZE implies BUG in br_nlmsg_size() */
198                 WARN_ON(err == -EMSGSIZE);
199                 kfree_skb(skb);
200                 goto errout;
201         }
202         rtnl_notify(skb, &init_net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
203         return;
204 errout:
205         if (err < 0)
206                 rtnl_set_sk_err(&init_net, RTNLGRP_LINK, err);
207 }
208
209 static void release_dp(struct kobject *kobj)
210 {
211         struct datapath *dp = container_of(kobj, struct datapath, ifobj);
212         kfree(dp);
213 }
214
215 static struct kobj_type dp_ktype = {
216         .release = release_dp
217 };
218
219 static void destroy_dp_rcu(struct rcu_head *rcu)
220 {
221         struct datapath *dp = container_of(rcu, struct datapath, rcu);
222
223         tbl_destroy((struct tbl __force *)dp->table, flow_free_tbl);
224         free_percpu(dp->stats_percpu);
225         kobject_put(&dp->ifobj);
226 }
227
228 /* Called with RTNL lock and genl_lock. */
229 static struct vport *new_vport(const struct vport_parms *parms)
230 {
231         struct vport *vport;
232
233         vport = vport_add(parms);
234         if (!IS_ERR(vport)) {
235                 struct datapath *dp = parms->dp;
236
237                 rcu_assign_pointer(dp->ports[parms->port_no], vport);
238                 list_add(&vport->node, &dp->port_list);
239
240                 dp_ifinfo_notify(RTM_NEWLINK, vport);
241         }
242
243         return vport;
244 }
245
246 /* Called with RTNL lock. */
247 int dp_detach_port(struct vport *p)
248 {
249         ASSERT_RTNL();
250
251         if (p->port_no != ODPP_LOCAL)
252                 dp_sysfs_del_if(p);
253         dp_ifinfo_notify(RTM_DELLINK, p);
254
255         /* First drop references to device. */
256         list_del(&p->node);
257         rcu_assign_pointer(p->dp->ports[p->port_no], NULL);
258
259         /* Then destroy it. */
260         return vport_del(p);
261 }
262
263 /* Must be called with rcu_read_lock. */
264 void dp_process_received_packet(struct vport *p, struct sk_buff *skb)
265 {
266         struct datapath *dp = p->dp;
267         struct dp_stats_percpu *stats;
268         int stats_counter_off;
269         struct sw_flow_actions *acts;
270         struct loop_counter *loop;
271         int error;
272
273         OVS_CB(skb)->vport = p;
274
275         if (!OVS_CB(skb)->flow) {
276                 struct sw_flow_key key;
277                 struct tbl_node *flow_node;
278                 bool is_frag;
279
280                 /* Extract flow from 'skb' into 'key'. */
281                 error = flow_extract(skb, p->port_no, &key, &is_frag);
282                 if (unlikely(error)) {
283                         kfree_skb(skb);
284                         return;
285                 }
286
287                 if (is_frag && dp->drop_frags) {
288                         kfree_skb(skb);
289                         stats_counter_off = offsetof(struct dp_stats_percpu, n_frags);
290                         goto out;
291                 }
292
293                 /* Look up flow. */
294                 flow_node = tbl_lookup(rcu_dereference(dp->table), &key,
295                                         flow_hash(&key), flow_cmp);
296                 if (unlikely(!flow_node)) {
297                         struct dp_upcall_info upcall;
298
299                         upcall.cmd = ODP_PACKET_CMD_MISS;
300                         upcall.key = &key;
301                         upcall.userdata = 0;
302                         upcall.sample_pool = 0;
303                         upcall.actions = NULL;
304                         upcall.actions_len = 0;
305                         dp_upcall(dp, skb, &upcall);
306                         stats_counter_off = offsetof(struct dp_stats_percpu, n_missed);
307                         goto out;
308                 }
309
310                 OVS_CB(skb)->flow = flow_cast(flow_node);
311         }
312
313         stats_counter_off = offsetof(struct dp_stats_percpu, n_hit);
314         flow_used(OVS_CB(skb)->flow, skb);
315
316         acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts);
317
318         /* Check whether we've looped too much. */
319         loop = loop_get_counter();
320         if (unlikely(++loop->count > MAX_LOOPS))
321                 loop->looping = true;
322         if (unlikely(loop->looping)) {
323                 loop_suppress(dp, acts);
324                 kfree_skb(skb);
325                 goto out_loop;
326         }
327
328         /* Execute actions. */
329         execute_actions(dp, skb, &OVS_CB(skb)->flow->key, acts->actions,
330                         acts->actions_len);
331
332         /* Check whether sub-actions looped too much. */
333         if (unlikely(loop->looping))
334                 loop_suppress(dp, acts);
335
336 out_loop:
337         /* Decrement loop counter. */
338         if (!--loop->count)
339                 loop->looping = false;
340         loop_put_counter();
341
342 out:
343         /* Update datapath statistics. */
344         local_bh_disable();
345         stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
346
347         write_seqcount_begin(&stats->seqlock);
348         (*(u64 *)((u8 *)stats + stats_counter_off))++;
349         write_seqcount_end(&stats->seqlock);
350
351         local_bh_enable();
352 }
353
354 static void copy_and_csum_skb(struct sk_buff *skb, void *to)
355 {
356         u16 csum_start, csum_offset;
357         __wsum csum;
358
359         get_skb_csum_pointers(skb, &csum_start, &csum_offset);
360         csum_start -= skb_headroom(skb);
361         BUG_ON(csum_start >= skb_headlen(skb));
362
363         skb_copy_bits(skb, 0, to, csum_start);
364
365         csum = skb_copy_and_csum_bits(skb, csum_start, to + csum_start,
366                                       skb->len - csum_start, 0);
367         *(__sum16 *)(to + csum_start + csum_offset) = csum_fold(csum);
368 }
369
370 static struct genl_family dp_packet_genl_family = {
371         .id = GENL_ID_GENERATE,
372         .hdrsize = sizeof(struct odp_header),
373         .name = ODP_PACKET_FAMILY,
374         .version = 1,
375         .maxattr = ODP_PACKET_ATTR_MAX
376 };
377
378 /* Generic Netlink multicast groups for upcalls.
379  *
380  * We really want three unique multicast groups per datapath, but we can't even
381  * get one, because genl_register_mc_group() takes genl_lock, which is also
382  * held during Generic Netlink message processing, so trying to acquire
383  * multicast groups during ODP_DP_NEW processing deadlocks.  Instead, we
384  * preallocate a few groups and use them round-robin for datapaths.  Collision
385  * isn't fatal--multicast listeners should check that the family is the one
386  * that they want and discard others--but it wastes time and memory to receive
387  * unwanted messages.
388  */
389 #define PACKET_N_MC_GROUPS 16
390 static struct genl_multicast_group packet_mc_groups[PACKET_N_MC_GROUPS];
391
392 static u32 packet_mc_group(struct datapath *dp, u8 cmd)
393 {
394         u32 idx;
395         BUILD_BUG_ON_NOT_POWER_OF_2(PACKET_N_MC_GROUPS);
396
397         idx = jhash_2words(dp->dp_ifindex, cmd, 0) & (PACKET_N_MC_GROUPS - 1);
398         return packet_mc_groups[idx].id;
399 }
400
401 static int packet_register_mc_groups(void)
402 {
403         int i;
404
405         for (i = 0; i < PACKET_N_MC_GROUPS; i++) {
406                 struct genl_multicast_group *group = &packet_mc_groups[i];
407                 int error;
408
409                 sprintf(group->name, "packet%d", i);
410                 error = genl_register_mc_group(&dp_packet_genl_family, group);
411                 if (error)
412                         return error;
413         }
414         return 0;
415 }
416
417 int dp_upcall(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info)
418 {
419         struct dp_stats_percpu *stats;
420         int err;
421
422         WARN_ON_ONCE(skb_shared(skb));
423
424         forward_ip_summed(skb);
425
426         err = vswitch_skb_checksum_setup(skb);
427         if (err)
428                 goto err_kfree_skb;
429
430         /* Break apart GSO packets into their component pieces.  Otherwise
431          * userspace may try to stuff a 64kB packet into a 1500-byte MTU. */
432         if (skb_is_gso(skb)) {
433                 struct sk_buff *nskb = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM);
434                 
435                 kfree_skb(skb);
436                 skb = nskb;
437                 if (IS_ERR(skb)) {
438                         err = PTR_ERR(skb);
439                         goto err;
440                 }
441         }
442
443         return queue_control_packets(dp, skb, upcall_info);
444
445 err_kfree_skb:
446         kfree_skb(skb);
447 err:
448         local_bh_disable();
449         stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
450
451         write_seqcount_begin(&stats->seqlock);
452         stats->n_lost++;
453         write_seqcount_end(&stats->seqlock);
454
455         local_bh_enable();
456
457         return err;
458 }
459
460 /* Send each packet in the 'skb' list to userspace for 'dp' as directed by
461  * 'upcall_info'.  There will be only one packet unless we broke up a GSO
462  * packet.
463  */
464 static int queue_control_packets(struct datapath *dp, struct sk_buff *skb,
465                                  const struct dp_upcall_info *upcall_info)
466 {
467         u32 group = packet_mc_group(dp, upcall_info->cmd);
468         struct sk_buff *nskb;
469         int port_no;
470         int err;
471
472         if (OVS_CB(skb)->vport)
473                 port_no = OVS_CB(skb)->vport->port_no;
474         else
475                 port_no = ODPP_LOCAL;
476
477         do {
478                 struct odp_header *upcall;
479                 struct sk_buff *user_skb; /* to be queued to userspace */
480                 struct nlattr *nla;
481                 unsigned int len;
482
483                 nskb = skb->next;
484                 skb->next = NULL;
485
486                 len = sizeof(struct odp_header);
487                 len += nla_total_size(4); /* ODP_PACKET_ATTR_TYPE. */
488                 len += nla_total_size(skb->len);
489                 len += nla_total_size(FLOW_BUFSIZE);
490                 if (upcall_info->userdata)
491                         len += nla_total_size(8);
492                 if (upcall_info->sample_pool)
493                         len += nla_total_size(4);
494                 if (upcall_info->actions_len)
495                         len += nla_total_size(upcall_info->actions_len);
496
497                 user_skb = genlmsg_new(len, GFP_ATOMIC);
498                 if (!user_skb) {
499                         netlink_set_err(INIT_NET_GENL_SOCK, 0, group, -ENOBUFS);
500                         goto err_kfree_skbs;
501                 }
502
503                 upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family, 0, upcall_info->cmd);
504                 upcall->dp_ifindex = dp->dp_ifindex;
505
506                 nla = nla_nest_start(user_skb, ODP_PACKET_ATTR_KEY);
507                 flow_to_nlattrs(upcall_info->key, user_skb);
508                 nla_nest_end(user_skb, nla);
509
510                 if (upcall_info->userdata)
511                         nla_put_u64(user_skb, ODP_PACKET_ATTR_USERDATA, upcall_info->userdata);
512                 if (upcall_info->sample_pool)
513                         nla_put_u32(user_skb, ODP_PACKET_ATTR_SAMPLE_POOL, upcall_info->sample_pool);
514                 if (upcall_info->actions_len) {
515                         const struct nlattr *actions = upcall_info->actions;
516                         u32 actions_len = upcall_info->actions_len;
517
518                         nla = nla_nest_start(user_skb, ODP_PACKET_ATTR_ACTIONS);
519                         memcpy(__skb_put(user_skb, actions_len), actions, actions_len);
520                         nla_nest_end(user_skb, nla);
521                 }
522
523                 nla = __nla_reserve(user_skb, ODP_PACKET_ATTR_PACKET, skb->len);
524                 if (skb->ip_summed == CHECKSUM_PARTIAL)
525                         copy_and_csum_skb(skb, nla_data(nla));
526                 else
527                         skb_copy_bits(skb, 0, nla_data(nla), skb->len);
528
529                 err = genlmsg_multicast(user_skb, 0, group, GFP_ATOMIC);
530                 if (err)
531                         goto err_kfree_skbs;
532
533                 kfree_skb(skb);
534                 skb = nskb;
535         } while (skb);
536         return 0;
537
538 err_kfree_skbs:
539         kfree_skb(skb);
540         while ((skb = nskb) != NULL) {
541                 nskb = skb->next;
542                 kfree_skb(skb);
543         }
544         return err;
545 }
546
547 /* Called with genl_mutex. */
548 static int flush_flows(int dp_ifindex)
549 {
550         struct tbl *old_table;
551         struct tbl *new_table;
552         struct datapath *dp;
553
554         dp = get_dp(dp_ifindex);
555         if (!dp)
556                 return -ENODEV;
557
558         old_table = get_table_protected(dp);
559         new_table = tbl_create(TBL_MIN_BUCKETS);
560         if (!new_table)
561                 return -ENOMEM;
562
563         rcu_assign_pointer(dp->table, new_table);
564
565         tbl_deferred_destroy(old_table, flow_free_tbl);
566
567         return 0;
568 }
569
570 static int validate_actions(const struct nlattr *attr)
571 {
572         const struct nlattr *a;
573         int rem;
574
575         nla_for_each_nested(a, attr, rem) {
576                 static const u32 action_lens[ODP_ACTION_ATTR_MAX + 1] = {
577                         [ODP_ACTION_ATTR_OUTPUT] = 4,
578                         [ODP_ACTION_ATTR_CONTROLLER] = 8,
579                         [ODP_ACTION_ATTR_SET_DL_TCI] = 2,
580                         [ODP_ACTION_ATTR_STRIP_VLAN] = 0,
581                         [ODP_ACTION_ATTR_SET_DL_SRC] = ETH_ALEN,
582                         [ODP_ACTION_ATTR_SET_DL_DST] = ETH_ALEN,
583                         [ODP_ACTION_ATTR_SET_NW_SRC] = 4,
584                         [ODP_ACTION_ATTR_SET_NW_DST] = 4,
585                         [ODP_ACTION_ATTR_SET_NW_TOS] = 1,
586                         [ODP_ACTION_ATTR_SET_TP_SRC] = 2,
587                         [ODP_ACTION_ATTR_SET_TP_DST] = 2,
588                         [ODP_ACTION_ATTR_SET_TUNNEL] = 8,
589                         [ODP_ACTION_ATTR_SET_PRIORITY] = 4,
590                         [ODP_ACTION_ATTR_POP_PRIORITY] = 0,
591                         [ODP_ACTION_ATTR_DROP_SPOOFED_ARP] = 0,
592                 };
593                 int type = nla_type(a);
594
595                 if (type > ODP_ACTION_ATTR_MAX || nla_len(a) != action_lens[type])
596                         return -EINVAL;
597
598                 switch (type) {
599                 case ODP_ACTION_ATTR_UNSPEC:
600                         return -EINVAL;
601
602                 case ODP_ACTION_ATTR_CONTROLLER:
603                 case ODP_ACTION_ATTR_STRIP_VLAN:
604                 case ODP_ACTION_ATTR_SET_DL_SRC:
605                 case ODP_ACTION_ATTR_SET_DL_DST:
606                 case ODP_ACTION_ATTR_SET_NW_SRC:
607                 case ODP_ACTION_ATTR_SET_NW_DST:
608                 case ODP_ACTION_ATTR_SET_TP_SRC:
609                 case ODP_ACTION_ATTR_SET_TP_DST:
610                 case ODP_ACTION_ATTR_SET_TUNNEL:
611                 case ODP_ACTION_ATTR_SET_PRIORITY:
612                 case ODP_ACTION_ATTR_POP_PRIORITY:
613                 case ODP_ACTION_ATTR_DROP_SPOOFED_ARP:
614                         /* No validation needed. */
615                         break;
616
617                 case ODP_ACTION_ATTR_OUTPUT:
618                         if (nla_get_u32(a) >= DP_MAX_PORTS)
619                                 return -EINVAL;
620                         break;
621
622                 case ODP_ACTION_ATTR_SET_DL_TCI:
623                         if (nla_get_be16(a) & htons(VLAN_CFI_MASK))
624                                 return -EINVAL;
625                         break;
626
627                 case ODP_ACTION_ATTR_SET_NW_TOS:
628                         if (nla_get_u8(a) & INET_ECN_MASK)
629                                 return -EINVAL;
630                         break;
631
632                 default:
633                         return -EOPNOTSUPP;
634                 }
635         }
636
637         if (rem > 0)
638                 return -EINVAL;
639
640         return 0;
641 }
642 static void clear_stats(struct sw_flow *flow)
643 {
644         flow->used = 0;
645         flow->tcp_flags = 0;
646         flow->packet_count = 0;
647         flow->byte_count = 0;
648 }
649
650 /* Called with genl_mutex. */
651 static int expand_table(struct datapath *dp)
652 {
653         struct tbl *old_table = get_table_protected(dp);
654         struct tbl *new_table;
655
656         new_table = tbl_expand(old_table);
657         if (IS_ERR(new_table))
658                 return PTR_ERR(new_table);
659
660         rcu_assign_pointer(dp->table, new_table);
661         tbl_deferred_destroy(old_table, NULL);
662
663         return 0;
664 }
665
666 static int odp_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
667 {
668         struct odp_header *odp_header = info->userhdr;
669         struct nlattr **a = info->attrs;
670         struct sk_buff *packet;
671         struct sw_flow_key key;
672         struct datapath *dp;
673         struct ethhdr *eth;
674         bool is_frag;
675         int err;
676
677         err = -EINVAL;
678         if (!a[ODP_PACKET_ATTR_PACKET] || !a[ODP_PACKET_ATTR_ACTIONS] ||
679             nla_len(a[ODP_PACKET_ATTR_PACKET]) < ETH_HLEN)
680                 goto exit;
681
682         err = validate_actions(a[ODP_PACKET_ATTR_ACTIONS]);
683         if (err)
684                 goto exit;
685
686         packet = skb_clone(skb, GFP_KERNEL);
687         err = -ENOMEM;
688         if (!packet)
689                 goto exit;
690         packet->data = nla_data(a[ODP_PACKET_ATTR_PACKET]);
691         packet->len = nla_len(a[ODP_PACKET_ATTR_PACKET]);
692
693         skb_reset_mac_header(packet);
694         eth = eth_hdr(packet);
695
696         /* Normally, setting the skb 'protocol' field would be handled by a
697          * call to eth_type_trans(), but it assumes there's a sending
698          * device, which we may not have. */
699         if (ntohs(eth->h_proto) >= 1536)
700                 packet->protocol = eth->h_proto;
701         else
702                 packet->protocol = htons(ETH_P_802_2);
703
704         err = flow_extract(packet, -1, &key, &is_frag);
705         if (err)
706                 goto exit;
707
708         rcu_read_lock();
709         dp = get_dp(odp_header->dp_ifindex);
710         err = -ENODEV;
711         if (dp)
712                 err = execute_actions(dp, packet, &key,
713                                       nla_data(a[ODP_PACKET_ATTR_ACTIONS]),
714                                       nla_len(a[ODP_PACKET_ATTR_ACTIONS]));
715         rcu_read_unlock();
716
717 exit:
718         return err;
719 }
720
721 static const struct nla_policy packet_policy[ODP_PACKET_ATTR_MAX + 1] = {
722         [ODP_PACKET_ATTR_PACKET] = { .type = NLA_UNSPEC },
723         [ODP_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
724 };
725
726 static struct genl_ops dp_packet_genl_ops[] = {
727         { .cmd = ODP_PACKET_CMD_EXECUTE,
728           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
729           .policy = packet_policy,
730           .doit = odp_packet_cmd_execute
731         }
732 };
733
734 static void get_dp_stats(struct datapath *dp, struct odp_stats *stats)
735 {
736         int i;
737
738         stats->n_frags = stats->n_hit = stats->n_missed = stats->n_lost = 0;
739         for_each_possible_cpu(i) {
740                 const struct dp_stats_percpu *percpu_stats;
741                 struct dp_stats_percpu local_stats;
742                 unsigned seqcount;
743
744                 percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
745
746                 do {
747                         seqcount = read_seqcount_begin(&percpu_stats->seqlock);
748                         local_stats = *percpu_stats;
749                 } while (read_seqcount_retry(&percpu_stats->seqlock, seqcount));
750
751                 stats->n_frags += local_stats.n_frags;
752                 stats->n_hit += local_stats.n_hit;
753                 stats->n_missed += local_stats.n_missed;
754                 stats->n_lost += local_stats.n_lost;
755         }
756 }
757
758 /* MTU of the dp pseudo-device: ETH_DATA_LEN or the minimum of the ports.
759  * Called with RTNL lock.
760  */
761 int dp_min_mtu(const struct datapath *dp)
762 {
763         struct vport *p;
764         int mtu = 0;
765
766         ASSERT_RTNL();
767
768         list_for_each_entry (p, &dp->port_list, node) {
769                 int dev_mtu;
770
771                 /* Skip any internal ports, since that's what we're trying to
772                  * set. */
773                 if (is_internal_vport(p))
774                         continue;
775
776                 dev_mtu = vport_get_mtu(p);
777                 if (!mtu || dev_mtu < mtu)
778                         mtu = dev_mtu;
779         }
780
781         return mtu ? mtu : ETH_DATA_LEN;
782 }
783
784 /* Sets the MTU of all datapath devices to the minimum of the ports
785  * Called with RTNL lock.
786  */
787 void set_internal_devs_mtu(const struct datapath *dp)
788 {
789         struct vport *p;
790         int mtu;
791
792         ASSERT_RTNL();
793
794         mtu = dp_min_mtu(dp);
795
796         list_for_each_entry (p, &dp->port_list, node) {
797                 if (is_internal_vport(p))
798                         vport_set_mtu(p, mtu);
799         }
800 }
801
802 static const struct nla_policy flow_policy[ODP_FLOW_ATTR_MAX + 1] = {
803         [ODP_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
804         [ODP_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
805         [ODP_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
806 };
807
808 static struct genl_family dp_flow_genl_family = {
809         .id = GENL_ID_GENERATE,
810         .hdrsize = sizeof(struct odp_header),
811         .name = ODP_FLOW_FAMILY,
812         .version = 1,
813         .maxattr = ODP_FLOW_ATTR_MAX
814 };
815
816 static struct genl_multicast_group dp_flow_multicast_group = {
817         .name = ODP_FLOW_MCGROUP
818 };
819
820 /* Called with genl_lock. */
821 static int odp_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
822                                   struct sk_buff *skb, u32 pid, u32 seq, u32 flags, u8 cmd)
823 {
824         const int skb_orig_len = skb->len;
825         const struct sw_flow_actions *sf_acts;
826         struct odp_flow_stats stats;
827         struct odp_header *odp_header;
828         struct nlattr *nla;
829         unsigned long used;
830         u8 tcp_flags;
831         int nla_len;
832         int err;
833
834         sf_acts = rcu_dereference_protected(flow->sf_acts,
835                                             lockdep_genl_is_held());
836
837         odp_header = genlmsg_put(skb, pid, seq, &dp_flow_genl_family, flags, cmd);
838         if (!odp_header)
839                 return -EMSGSIZE;
840
841         odp_header->dp_ifindex = dp->dp_ifindex;
842
843         nla = nla_nest_start(skb, ODP_FLOW_ATTR_KEY);
844         if (!nla)
845                 goto nla_put_failure;
846         err = flow_to_nlattrs(&flow->key, skb);
847         if (err)
848                 goto error;
849         nla_nest_end(skb, nla);
850
851         spin_lock_bh(&flow->lock);
852         used = flow->used;
853         stats.n_packets = flow->packet_count;
854         stats.n_bytes = flow->byte_count;
855         tcp_flags = flow->tcp_flags;
856         spin_unlock_bh(&flow->lock);
857
858         if (used)
859                 NLA_PUT_MSECS(skb, ODP_FLOW_ATTR_USED, used);
860
861         if (stats.n_packets)
862                 NLA_PUT(skb, ODP_FLOW_ATTR_STATS, sizeof(struct odp_flow_stats), &stats);
863
864         if (tcp_flags)
865                 NLA_PUT_U8(skb, ODP_FLOW_ATTR_TCP_FLAGS, tcp_flags);
866
867         /* If ODP_FLOW_ATTR_ACTIONS doesn't fit, and this is the first flow to
868          * be dumped into 'skb', then expand the skb.  This is unusual for
869          * Netlink but individual action lists can be longer than a page and
870          * thus entirely undumpable if we didn't do this. */
871         nla_len = nla_total_size(sf_acts->actions_len);
872         if (nla_len > skb_tailroom(skb) && !skb_orig_len) {
873                 int hdr_off = (unsigned char *)odp_header - skb->data;
874
875                 err = pskb_expand_head(skb, 0, nla_len - skb_tailroom(skb), GFP_KERNEL);
876                 if (err)
877                         goto error;
878
879                 odp_header = (struct odp_header *)(skb->data + hdr_off);
880         }
881         nla = nla_nest_start(skb, ODP_FLOW_ATTR_ACTIONS);
882         memcpy(__skb_put(skb, sf_acts->actions_len), sf_acts->actions, sf_acts->actions_len);
883         nla_nest_end(skb, nla);
884
885         return genlmsg_end(skb, odp_header);
886
887 nla_put_failure:
888         err = -EMSGSIZE;
889 error:
890         genlmsg_cancel(skb, odp_header);
891         return err;
892 }
893
894 static struct sk_buff *odp_flow_cmd_alloc_info(struct sw_flow *flow)
895 {
896         const struct sw_flow_actions *sf_acts;
897         int len;
898
899         sf_acts = rcu_dereference_protected(flow->sf_acts,
900                                             lockdep_genl_is_held());
901
902         len = nla_total_size(FLOW_BUFSIZE); /* ODP_FLOW_ATTR_KEY */
903         len += nla_total_size(sf_acts->actions_len); /* ODP_FLOW_ATTR_ACTIONS */
904         len += nla_total_size(sizeof(struct odp_flow_stats)); /* ODP_FLOW_ATTR_STATS */
905         len += nla_total_size(1); /* ODP_FLOW_ATTR_TCP_FLAGS */
906         len += nla_total_size(8); /* ODP_FLOW_ATTR_USED */
907         return genlmsg_new(NLMSG_ALIGN(sizeof(struct odp_header)) + len, GFP_KERNEL);
908 }
909
910 static struct sk_buff *odp_flow_cmd_build_info(struct sw_flow *flow, struct datapath *dp,
911                                                u32 pid, u32 seq, u8 cmd)
912 {
913         struct sk_buff *skb;
914         int retval;
915
916         skb = odp_flow_cmd_alloc_info(flow);
917         if (!skb)
918                 return ERR_PTR(-ENOMEM);
919
920         retval = odp_flow_cmd_fill_info(flow, dp, skb, pid, seq, 0, cmd);
921         BUG_ON(retval < 0);
922         return skb;
923 }
924
925 static int odp_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
926 {
927         struct nlattr **a = info->attrs;
928         struct odp_header *odp_header = info->userhdr;
929         struct tbl_node *flow_node;
930         struct sw_flow_key key;
931         struct sw_flow *flow;
932         struct sk_buff *reply;
933         struct datapath *dp;
934         struct tbl *table;
935         u32 hash;
936         int error;
937
938         /* Extract key. */
939         error = -EINVAL;
940         if (!a[ODP_FLOW_ATTR_KEY])
941                 goto error;
942         error = flow_from_nlattrs(&key, a[ODP_FLOW_ATTR_KEY]);
943         if (error)
944                 goto error;
945
946         /* Validate actions. */
947         if (a[ODP_FLOW_ATTR_ACTIONS]) {
948                 error = validate_actions(a[ODP_FLOW_ATTR_ACTIONS]);
949                 if (error)
950                         goto error;
951         } else if (info->genlhdr->cmd == ODP_FLOW_CMD_NEW) {
952                 error = -EINVAL;
953                 goto error;
954         }
955
956         dp = get_dp(odp_header->dp_ifindex);
957         error = -ENODEV;
958         if (!dp)
959                 goto error;
960
961         hash = flow_hash(&key);
962         table = get_table_protected(dp);
963         flow_node = tbl_lookup(table, &key, hash, flow_cmp);
964         if (!flow_node) {
965                 struct sw_flow_actions *acts;
966
967                 /* Bail out if we're not allowed to create a new flow. */
968                 error = -ENOENT;
969                 if (info->genlhdr->cmd == ODP_FLOW_CMD_SET)
970                         goto error;
971
972                 /* Expand table, if necessary, to make room. */
973                 if (tbl_count(table) >= tbl_n_buckets(table)) {
974                         error = expand_table(dp);
975                         if (error)
976                                 goto error;
977                         table = get_table_protected(dp);
978                 }
979
980                 /* Allocate flow. */
981                 flow = flow_alloc();
982                 if (IS_ERR(flow)) {
983                         error = PTR_ERR(flow);
984                         goto error;
985                 }
986                 flow->key = key;
987                 clear_stats(flow);
988
989                 /* Obtain actions. */
990                 acts = flow_actions_alloc(a[ODP_FLOW_ATTR_ACTIONS]);
991                 error = PTR_ERR(acts);
992                 if (IS_ERR(acts))
993                         goto error_free_flow;
994                 rcu_assign_pointer(flow->sf_acts, acts);
995
996                 /* Put flow in bucket. */
997                 error = tbl_insert(table, &flow->tbl_node, hash);
998                 if (error)
999                         goto error_free_flow;
1000
1001                 reply = odp_flow_cmd_build_info(flow, dp, info->snd_pid,
1002                                                 info->snd_seq, ODP_FLOW_CMD_NEW);
1003         } else {
1004                 /* We found a matching flow. */
1005                 struct sw_flow_actions *old_acts;
1006
1007                 /* Bail out if we're not allowed to modify an existing flow.
1008                  * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
1009                  * because Generic Netlink treats the latter as a dump
1010                  * request.  We also accept NLM_F_EXCL in case that bug ever
1011                  * gets fixed.
1012                  */
1013                 error = -EEXIST;
1014                 if (info->genlhdr->cmd == ODP_FLOW_CMD_NEW &&
1015                     info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
1016                         goto error;
1017
1018                 /* Update actions. */
1019                 flow = flow_cast(flow_node);
1020                 old_acts = rcu_dereference_protected(flow->sf_acts,
1021                                                      lockdep_genl_is_held());
1022                 if (a[ODP_FLOW_ATTR_ACTIONS] &&
1023                     (old_acts->actions_len != nla_len(a[ODP_FLOW_ATTR_ACTIONS]) ||
1024                      memcmp(old_acts->actions, nla_data(a[ODP_FLOW_ATTR_ACTIONS]),
1025                             old_acts->actions_len))) {
1026                         struct sw_flow_actions *new_acts;
1027
1028                         new_acts = flow_actions_alloc(a[ODP_FLOW_ATTR_ACTIONS]);
1029                         error = PTR_ERR(new_acts);
1030                         if (IS_ERR(new_acts))
1031                                 goto error;
1032
1033                         rcu_assign_pointer(flow->sf_acts, new_acts);
1034                         flow_deferred_free_acts(old_acts);
1035                 }
1036
1037                 reply = odp_flow_cmd_build_info(flow, dp, info->snd_pid,
1038                                                 info->snd_seq, ODP_FLOW_CMD_NEW);
1039
1040                 /* Clear stats. */
1041                 if (a[ODP_FLOW_ATTR_CLEAR]) {
1042                         spin_lock_bh(&flow->lock);
1043                         clear_stats(flow);
1044                         spin_unlock_bh(&flow->lock);
1045                 }
1046         }
1047
1048         if (!IS_ERR(reply))
1049                 genl_notify(reply, genl_info_net(info), info->snd_pid,
1050                             dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL);
1051         else
1052                 netlink_set_err(INIT_NET_GENL_SOCK, 0,
1053                                 dp_flow_multicast_group.id, PTR_ERR(reply));
1054         return 0;
1055
1056 error_free_flow:
1057         flow_put(flow);
1058 error:
1059         return error;
1060 }
1061
1062 static int odp_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1063 {
1064         struct nlattr **a = info->attrs;
1065         struct odp_header *odp_header = info->userhdr;
1066         struct sw_flow_key key;
1067         struct tbl_node *flow_node;
1068         struct sk_buff *reply;
1069         struct sw_flow *flow;
1070         struct datapath *dp;
1071         struct tbl *table;
1072         int err;
1073
1074         if (!a[ODP_FLOW_ATTR_KEY])
1075                 return -EINVAL;
1076         err = flow_from_nlattrs(&key, a[ODP_FLOW_ATTR_KEY]);
1077         if (err)
1078                 return err;
1079
1080         dp = get_dp(odp_header->dp_ifindex);
1081         if (!dp)
1082                 return -ENODEV;
1083
1084         table = get_table_protected(dp);
1085         flow_node = tbl_lookup(table, &key, flow_hash(&key), flow_cmp);
1086         if (!flow_node)
1087                 return -ENOENT;
1088
1089         flow = flow_cast(flow_node);
1090         reply = odp_flow_cmd_build_info(flow, dp, info->snd_pid, info->snd_seq, ODP_FLOW_CMD_NEW);
1091         if (IS_ERR(reply))
1092                 return PTR_ERR(reply);
1093
1094         return genlmsg_reply(reply, info);
1095 }
1096
1097 static int odp_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1098 {
1099         struct nlattr **a = info->attrs;
1100         struct odp_header *odp_header = info->userhdr;
1101         struct sw_flow_key key;
1102         struct tbl_node *flow_node;
1103         struct sk_buff *reply;
1104         struct sw_flow *flow;
1105         struct datapath *dp;
1106         struct tbl *table;
1107         int err;
1108
1109         if (!a[ODP_FLOW_ATTR_KEY])
1110                 return flush_flows(odp_header->dp_ifindex);
1111         err = flow_from_nlattrs(&key, a[ODP_FLOW_ATTR_KEY]);
1112         if (err)
1113                 return err;
1114
1115         dp = get_dp(odp_header->dp_ifindex);
1116         if (!dp)
1117                 return -ENODEV;
1118
1119         table = get_table_protected(dp);
1120         flow_node = tbl_lookup(table, &key, flow_hash(&key), flow_cmp);
1121         if (!flow_node)
1122                 return -ENOENT;
1123         flow = flow_cast(flow_node);
1124
1125         reply = odp_flow_cmd_alloc_info(flow);
1126         if (!reply)
1127                 return -ENOMEM;
1128
1129         err = tbl_remove(table, flow_node);
1130         if (err) {
1131                 kfree_skb(reply);
1132                 return err;
1133         }
1134
1135         err = odp_flow_cmd_fill_info(flow, dp, reply, info->snd_pid,
1136                                      info->snd_seq, 0, ODP_FLOW_CMD_DEL);
1137         BUG_ON(err < 0);
1138
1139         flow_deferred_free(flow);
1140
1141         genl_notify(reply, genl_info_net(info), info->snd_pid,
1142                     dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL);
1143         return 0;
1144 }
1145
1146 static int odp_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1147 {
1148         struct odp_header *odp_header = genlmsg_data(nlmsg_data(cb->nlh));
1149         struct datapath *dp;
1150
1151         dp = get_dp(odp_header->dp_ifindex);
1152         if (!dp)
1153                 return -ENODEV;
1154
1155         for (;;) {
1156                 struct tbl_node *flow_node;
1157                 struct sw_flow *flow;
1158                 u32 bucket, obj;
1159
1160                 bucket = cb->args[0];
1161                 obj = cb->args[1];
1162                 flow_node = tbl_next(get_table_protected(dp), &bucket, &obj);
1163                 if (!flow_node)
1164                         break;
1165
1166                 flow = flow_cast(flow_node);
1167                 if (odp_flow_cmd_fill_info(flow, dp, skb, NETLINK_CB(cb->skb).pid,
1168                                            cb->nlh->nlmsg_seq, NLM_F_MULTI,
1169                                            ODP_FLOW_CMD_NEW) < 0)
1170                         break;
1171
1172                 cb->args[0] = bucket;
1173                 cb->args[1] = obj;
1174         }
1175         return skb->len;
1176 }
1177
1178 static struct genl_ops dp_flow_genl_ops[] = {
1179         { .cmd = ODP_FLOW_CMD_NEW,
1180           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1181           .policy = flow_policy,
1182           .doit = odp_flow_cmd_new_or_set
1183         },
1184         { .cmd = ODP_FLOW_CMD_DEL,
1185           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1186           .policy = flow_policy,
1187           .doit = odp_flow_cmd_del
1188         },
1189         { .cmd = ODP_FLOW_CMD_GET,
1190           .flags = 0,               /* OK for unprivileged users. */
1191           .policy = flow_policy,
1192           .doit = odp_flow_cmd_get,
1193           .dumpit = odp_flow_cmd_dump
1194         },
1195         { .cmd = ODP_FLOW_CMD_SET,
1196           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1197           .policy = flow_policy,
1198           .doit = odp_flow_cmd_new_or_set,
1199         },
1200 };
1201
1202 static const struct nla_policy datapath_policy[ODP_DP_ATTR_MAX + 1] = {
1203 #ifdef HAVE_NLA_NUL_STRING
1204         [ODP_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1205 #endif
1206         [ODP_DP_ATTR_IPV4_FRAGS] = { .type = NLA_U32 },
1207         [ODP_DP_ATTR_SAMPLING] = { .type = NLA_U32 },
1208 };
1209
1210 static struct genl_family dp_datapath_genl_family = {
1211         .id = GENL_ID_GENERATE,
1212         .hdrsize = sizeof(struct odp_header),
1213         .name = ODP_DATAPATH_FAMILY,
1214         .version = 1,
1215         .maxattr = ODP_DP_ATTR_MAX
1216 };
1217
1218 static struct genl_multicast_group dp_datapath_multicast_group = {
1219         .name = ODP_DATAPATH_MCGROUP
1220 };
1221
1222 static int odp_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
1223                                 u32 pid, u32 seq, u32 flags, u8 cmd)
1224 {
1225         struct odp_header *odp_header;
1226         struct nlattr *nla;
1227         int err;
1228
1229         odp_header = genlmsg_put(skb, pid, seq, &dp_datapath_genl_family,
1230                                    flags, cmd);
1231         if (!odp_header)
1232                 goto error;
1233
1234         odp_header->dp_ifindex = dp->dp_ifindex;
1235
1236         rcu_read_lock();
1237         err = nla_put_string(skb, ODP_DP_ATTR_NAME, dp_name(dp));
1238         rcu_read_unlock();
1239         if (err)
1240                 goto nla_put_failure;
1241
1242         nla = nla_reserve(skb, ODP_DP_ATTR_STATS, sizeof(struct odp_stats));
1243         if (!nla)
1244                 goto nla_put_failure;
1245         get_dp_stats(dp, nla_data(nla));
1246
1247         NLA_PUT_U32(skb, ODP_DP_ATTR_IPV4_FRAGS,
1248                     dp->drop_frags ? ODP_DP_FRAG_DROP : ODP_DP_FRAG_ZERO);
1249
1250         if (dp->sflow_probability)
1251                 NLA_PUT_U32(skb, ODP_DP_ATTR_SAMPLING, dp->sflow_probability);
1252
1253         nla = nla_nest_start(skb, ODP_DP_ATTR_MCGROUPS);
1254         if (!nla)
1255                 goto nla_put_failure;
1256         NLA_PUT_U32(skb, ODP_PACKET_CMD_MISS, packet_mc_group(dp, ODP_PACKET_CMD_MISS));
1257         NLA_PUT_U32(skb, ODP_PACKET_CMD_ACTION, packet_mc_group(dp, ODP_PACKET_CMD_ACTION));
1258         NLA_PUT_U32(skb, ODP_PACKET_CMD_SAMPLE, packet_mc_group(dp, ODP_PACKET_CMD_SAMPLE));
1259         nla_nest_end(skb, nla);
1260
1261         return genlmsg_end(skb, odp_header);
1262
1263 nla_put_failure:
1264         genlmsg_cancel(skb, odp_header);
1265 error:
1266         return -EMSGSIZE;
1267 }
1268
1269 static struct sk_buff *odp_dp_cmd_build_info(struct datapath *dp, u32 pid,
1270                                              u32 seq, u8 cmd)
1271 {
1272         struct sk_buff *skb;
1273         int retval;
1274
1275         skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1276         if (!skb)
1277                 return ERR_PTR(-ENOMEM);
1278
1279         retval = odp_dp_cmd_fill_info(dp, skb, pid, seq, 0, cmd);
1280         if (retval < 0) {
1281                 kfree_skb(skb);
1282                 return ERR_PTR(retval);
1283         }
1284         return skb;
1285 }
1286
1287 static int odp_dp_cmd_validate(struct nlattr *a[ODP_DP_ATTR_MAX + 1])
1288 {
1289         if (a[ODP_DP_ATTR_IPV4_FRAGS]) {
1290                 u32 frags = nla_get_u32(a[ODP_DP_ATTR_IPV4_FRAGS]);
1291
1292                 if (frags != ODP_DP_FRAG_ZERO && frags != ODP_DP_FRAG_DROP)
1293                         return -EINVAL;
1294         }
1295
1296         return VERIFY_NUL_STRING(a[ODP_DP_ATTR_NAME], IFNAMSIZ - 1);
1297 }
1298
1299 /* Called with genl_mutex and optionally with RTNL lock also. */
1300 static struct datapath *lookup_datapath(struct odp_header *odp_header, struct nlattr *a[ODP_DP_ATTR_MAX + 1])
1301 {
1302         struct datapath *dp;
1303
1304         if (!a[ODP_DP_ATTR_NAME])
1305                 dp = get_dp(odp_header->dp_ifindex);
1306         else {
1307                 struct vport *vport;
1308
1309                 rcu_read_lock();
1310                 vport = vport_locate(nla_data(a[ODP_DP_ATTR_NAME]));
1311                 dp = vport && vport->port_no == ODPP_LOCAL ? vport->dp : NULL;
1312                 rcu_read_unlock();
1313         }
1314         return dp ? dp : ERR_PTR(-ENODEV);
1315 }
1316
1317 /* Called with genl_mutex. */
1318 static void change_datapath(struct datapath *dp, struct nlattr *a[ODP_DP_ATTR_MAX + 1])
1319 {
1320         if (a[ODP_DP_ATTR_IPV4_FRAGS])
1321                 dp->drop_frags = nla_get_u32(a[ODP_DP_ATTR_IPV4_FRAGS]) == ODP_DP_FRAG_DROP;
1322         if (a[ODP_DP_ATTR_SAMPLING])
1323                 dp->sflow_probability = nla_get_u32(a[ODP_DP_ATTR_SAMPLING]);
1324 }
1325
1326 static int odp_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1327 {
1328         struct nlattr **a = info->attrs;
1329         struct vport_parms parms;
1330         struct sk_buff *reply;
1331         struct datapath *dp;
1332         struct vport *vport;
1333         int err;
1334
1335         err = -EINVAL;
1336         if (!a[ODP_DP_ATTR_NAME])
1337                 goto err;
1338
1339         err = odp_dp_cmd_validate(a);
1340         if (err)
1341                 goto err;
1342
1343         rtnl_lock();
1344         err = -ENODEV;
1345         if (!try_module_get(THIS_MODULE))
1346                 goto err_unlock_rtnl;
1347
1348         err = -ENOMEM;
1349         dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1350         if (dp == NULL)
1351                 goto err_put_module;
1352         INIT_LIST_HEAD(&dp->port_list);
1353
1354         /* Initialize kobject for bridge.  This will be added as
1355          * /sys/class/net/<devname>/brif later, if sysfs is enabled. */
1356         dp->ifobj.kset = NULL;
1357         kobject_init(&dp->ifobj, &dp_ktype);
1358
1359         /* Allocate table. */
1360         err = -ENOMEM;
1361         rcu_assign_pointer(dp->table, tbl_create(TBL_MIN_BUCKETS));
1362         if (!dp->table)
1363                 goto err_free_dp;
1364
1365         /* Set up our datapath device. */
1366         parms.name = nla_data(a[ODP_DP_ATTR_NAME]);
1367         parms.type = ODP_VPORT_TYPE_INTERNAL;
1368         parms.options = NULL;
1369         parms.dp = dp;
1370         parms.port_no = ODPP_LOCAL;
1371         vport = new_vport(&parms);
1372         if (IS_ERR(vport)) {
1373                 err = PTR_ERR(vport);
1374                 if (err == -EBUSY)
1375                         err = -EEXIST;
1376
1377                 goto err_destroy_table;
1378         }
1379         dp->dp_ifindex = vport_get_ifindex(vport);
1380
1381         dp->drop_frags = 0;
1382         dp->stats_percpu = alloc_percpu(struct dp_stats_percpu);
1383         if (!dp->stats_percpu) {
1384                 err = -ENOMEM;
1385                 goto err_destroy_local_port;
1386         }
1387
1388         change_datapath(dp, a);
1389
1390         reply = odp_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, ODP_DP_CMD_NEW);
1391         err = PTR_ERR(reply);
1392         if (IS_ERR(reply))
1393                 goto err_destroy_local_port;
1394
1395         list_add_tail(&dp->list_node, &dps);
1396         dp_sysfs_add_dp(dp);
1397
1398         rtnl_unlock();
1399
1400         genl_notify(reply, genl_info_net(info), info->snd_pid,
1401                     dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL);
1402         return 0;
1403
1404 err_destroy_local_port:
1405         dp_detach_port(get_vport_protected(dp, ODPP_LOCAL));
1406 err_destroy_table:
1407         tbl_destroy(get_table_protected(dp), NULL);
1408 err_free_dp:
1409         kfree(dp);
1410 err_put_module:
1411         module_put(THIS_MODULE);
1412 err_unlock_rtnl:
1413         rtnl_unlock();
1414 err:
1415         return err;
1416 }
1417
1418 static int odp_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1419 {
1420         struct vport *vport, *next_vport;
1421         struct sk_buff *reply;
1422         struct datapath *dp;
1423         int err;
1424
1425         err = odp_dp_cmd_validate(info->attrs);
1426         if (err)
1427                 goto exit;
1428
1429         rtnl_lock();
1430         dp = lookup_datapath(info->userhdr, info->attrs);
1431         err = PTR_ERR(dp);
1432         if (IS_ERR(dp))
1433                 goto exit_unlock;
1434
1435         reply = odp_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, ODP_DP_CMD_DEL);
1436         err = PTR_ERR(reply);
1437         if (IS_ERR(reply))
1438                 goto exit_unlock;
1439
1440         list_for_each_entry_safe (vport, next_vport, &dp->port_list, node)
1441                 if (vport->port_no != ODPP_LOCAL)
1442                         dp_detach_port(vport);
1443
1444         dp_sysfs_del_dp(dp);
1445         list_del(&dp->list_node);
1446         dp_detach_port(get_vport_protected(dp, ODPP_LOCAL));
1447
1448         call_rcu(&dp->rcu, destroy_dp_rcu);
1449         module_put(THIS_MODULE);
1450
1451         genl_notify(reply, genl_info_net(info), info->snd_pid,
1452                     dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL);
1453         err = 0;
1454
1455 exit_unlock:
1456         rtnl_unlock();
1457 exit:
1458         return err;
1459 }
1460
1461 static int odp_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1462 {
1463         struct sk_buff *reply;
1464         struct datapath *dp;
1465         int err;
1466
1467         err = odp_dp_cmd_validate(info->attrs);
1468         if (err)
1469                 return err;
1470
1471         dp = lookup_datapath(info->userhdr, info->attrs);
1472         if (IS_ERR(dp))
1473                 return PTR_ERR(dp);
1474
1475         change_datapath(dp, info->attrs);
1476
1477         reply = odp_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, ODP_DP_CMD_NEW);
1478         if (IS_ERR(reply)) {
1479                 err = PTR_ERR(reply);
1480                 netlink_set_err(INIT_NET_GENL_SOCK, 0,
1481                                 dp_datapath_multicast_group.id, err);
1482                 return 0;
1483         }
1484
1485         genl_notify(reply, genl_info_net(info), info->snd_pid,
1486                     dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL);
1487         return 0;
1488 }
1489
1490 static int odp_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1491 {
1492         struct sk_buff *reply;
1493         struct datapath *dp;
1494         int err;
1495
1496         err = odp_dp_cmd_validate(info->attrs);
1497         if (err)
1498                 return err;
1499
1500         dp = lookup_datapath(info->userhdr, info->attrs);
1501         if (IS_ERR(dp))
1502                 return PTR_ERR(dp);
1503
1504         reply = odp_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, ODP_DP_CMD_NEW);
1505         if (IS_ERR(reply))
1506                 return PTR_ERR(reply);
1507
1508         return genlmsg_reply(reply, info);
1509 }
1510
1511 static int odp_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1512 {
1513         struct datapath *dp;
1514         int skip = cb->args[0];
1515         int i = 0;
1516
1517         list_for_each_entry (dp, &dps, list_node) {
1518                 if (i < skip)
1519                         continue;
1520                 if (odp_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).pid,
1521                                          cb->nlh->nlmsg_seq, NLM_F_MULTI,
1522                                          ODP_DP_CMD_NEW) < 0)
1523                         break;
1524                 i++;
1525         }
1526
1527         cb->args[0] = i;
1528
1529         return skb->len;
1530 }
1531
1532 static struct genl_ops dp_datapath_genl_ops[] = {
1533         { .cmd = ODP_DP_CMD_NEW,
1534           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1535           .policy = datapath_policy,
1536           .doit = odp_dp_cmd_new
1537         },
1538         { .cmd = ODP_DP_CMD_DEL,
1539           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1540           .policy = datapath_policy,
1541           .doit = odp_dp_cmd_del
1542         },
1543         { .cmd = ODP_DP_CMD_GET,
1544           .flags = 0,               /* OK for unprivileged users. */
1545           .policy = datapath_policy,
1546           .doit = odp_dp_cmd_get,
1547           .dumpit = odp_dp_cmd_dump
1548         },
1549         { .cmd = ODP_DP_CMD_SET,
1550           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1551           .policy = datapath_policy,
1552           .doit = odp_dp_cmd_set,
1553         },
1554 };
1555
1556 static const struct nla_policy vport_policy[ODP_VPORT_ATTR_MAX + 1] = {
1557 #ifdef HAVE_NLA_NUL_STRING
1558         [ODP_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1559         [ODP_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
1560         [ODP_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
1561         [ODP_VPORT_ATTR_STATS] = { .len = sizeof(struct rtnl_link_stats64) },
1562         [ODP_VPORT_ATTR_ADDRESS] = { .len = ETH_ALEN },
1563 #else
1564         [ODP_VPORT_ATTR_STATS] = { .minlen = sizeof(struct rtnl_link_stats64) },
1565         [ODP_VPORT_ATTR_ADDRESS] = { .minlen = ETH_ALEN },
1566 #endif
1567         [ODP_VPORT_ATTR_MTU] = { .type = NLA_U32 },
1568         [ODP_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
1569 };
1570
1571 static struct genl_family dp_vport_genl_family = {
1572         .id = GENL_ID_GENERATE,
1573         .hdrsize = sizeof(struct odp_header),
1574         .name = ODP_VPORT_FAMILY,
1575         .version = 1,
1576         .maxattr = ODP_VPORT_ATTR_MAX
1577 };
1578
1579 static struct genl_multicast_group dp_vport_multicast_group = {
1580         .name = ODP_VPORT_MCGROUP
1581 };
1582
1583 /* Called with RTNL lock or RCU read lock. */
1584 static int odp_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
1585                                    u32 pid, u32 seq, u32 flags, u8 cmd)
1586 {
1587         struct odp_header *odp_header;
1588         struct nlattr *nla;
1589         int ifindex, iflink;
1590         int err;
1591
1592         odp_header = genlmsg_put(skb, pid, seq, &dp_vport_genl_family,
1593                                  flags, cmd);
1594         if (!odp_header)
1595                 return -EMSGSIZE;
1596
1597         odp_header->dp_ifindex = vport->dp->dp_ifindex;
1598
1599         NLA_PUT_U32(skb, ODP_VPORT_ATTR_PORT_NO, vport->port_no);
1600         NLA_PUT_U32(skb, ODP_VPORT_ATTR_TYPE, vport_get_type(vport));
1601         NLA_PUT_STRING(skb, ODP_VPORT_ATTR_NAME, vport_get_name(vport));
1602
1603         nla = nla_reserve(skb, ODP_VPORT_ATTR_STATS, sizeof(struct rtnl_link_stats64));
1604         if (!nla)
1605                 goto nla_put_failure;
1606         if (vport_get_stats(vport, nla_data(nla)))
1607                 __skb_trim(skb, skb->len - nla->nla_len);
1608
1609         NLA_PUT(skb, ODP_VPORT_ATTR_ADDRESS, ETH_ALEN, vport_get_addr(vport));
1610
1611         NLA_PUT_U32(skb, ODP_VPORT_ATTR_MTU, vport_get_mtu(vport));
1612
1613         err = vport_get_options(vport, skb);
1614         if (err == -EMSGSIZE)
1615                 goto error;
1616
1617         ifindex = vport_get_ifindex(vport);
1618         if (ifindex > 0)
1619                 NLA_PUT_U32(skb, ODP_VPORT_ATTR_IFINDEX, ifindex);
1620
1621         iflink = vport_get_iflink(vport);
1622         if (iflink > 0)
1623                 NLA_PUT_U32(skb, ODP_VPORT_ATTR_IFLINK, iflink);
1624
1625         return genlmsg_end(skb, odp_header);
1626
1627 nla_put_failure:
1628         err = -EMSGSIZE;
1629 error:
1630         genlmsg_cancel(skb, odp_header);
1631         return err;
1632 }
1633
1634 /* Called with RTNL lock or RCU read lock. */
1635 static struct sk_buff *odp_vport_cmd_build_info(struct vport *vport, u32 pid,
1636                                                 u32 seq, u8 cmd)
1637 {
1638         struct sk_buff *skb;
1639         int retval;
1640
1641         skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
1642         if (!skb)
1643                 return ERR_PTR(-ENOMEM);
1644
1645         retval = odp_vport_cmd_fill_info(vport, skb, pid, seq, 0, cmd);
1646         if (retval < 0) {
1647                 kfree_skb(skb);
1648                 return ERR_PTR(retval);
1649         }
1650         return skb;
1651 }
1652
1653 static int odp_vport_cmd_validate(struct nlattr *a[ODP_VPORT_ATTR_MAX + 1])
1654 {
1655         return VERIFY_NUL_STRING(a[ODP_VPORT_ATTR_NAME], IFNAMSIZ - 1);
1656 }
1657
1658 /* Called with RTNL lock or RCU read lock. */
1659 static struct vport *lookup_vport(struct odp_header *odp_header,
1660                                   struct nlattr *a[ODP_VPORT_ATTR_MAX + 1])
1661 {
1662         struct datapath *dp;
1663         struct vport *vport;
1664
1665         if (a[ODP_VPORT_ATTR_NAME]) {
1666                 vport = vport_locate(nla_data(a[ODP_VPORT_ATTR_NAME]));
1667                 if (!vport)
1668                         return ERR_PTR(-ENODEV);
1669                 return vport;
1670         } else if (a[ODP_VPORT_ATTR_PORT_NO]) {
1671                 u32 port_no = nla_get_u32(a[ODP_VPORT_ATTR_PORT_NO]);
1672
1673                 if (port_no >= DP_MAX_PORTS)
1674                         return ERR_PTR(-EFBIG);
1675
1676                 dp = get_dp(odp_header->dp_ifindex);
1677                 if (!dp)
1678                         return ERR_PTR(-ENODEV);
1679
1680                 vport = get_vport_protected(dp, port_no);
1681                 if (!vport)
1682                         return ERR_PTR(-ENOENT);
1683                 return vport;
1684         } else
1685                 return ERR_PTR(-EINVAL);
1686 }
1687
1688 /* Called with RTNL lock. */
1689 static int change_vport(struct vport *vport, struct nlattr *a[ODP_VPORT_ATTR_MAX + 1])
1690 {
1691         int err = 0;
1692         if (a[ODP_VPORT_ATTR_STATS])
1693                 err = vport_set_stats(vport, nla_data(a[ODP_VPORT_ATTR_STATS]));
1694         if (!err && a[ODP_VPORT_ATTR_ADDRESS])
1695                 err = vport_set_addr(vport, nla_data(a[ODP_VPORT_ATTR_ADDRESS]));
1696         if (!err && a[ODP_VPORT_ATTR_MTU])
1697                 err = vport_set_mtu(vport, nla_get_u32(a[ODP_VPORT_ATTR_MTU]));
1698         return err;
1699 }
1700
1701 static int odp_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1702 {
1703         struct nlattr **a = info->attrs;
1704         struct odp_header *odp_header = info->userhdr;
1705         struct vport_parms parms;
1706         struct sk_buff *reply;
1707         struct vport *vport;
1708         struct datapath *dp;
1709         u32 port_no;
1710         int err;
1711
1712         err = -EINVAL;
1713         if (!a[ODP_VPORT_ATTR_NAME] || !a[ODP_VPORT_ATTR_TYPE])
1714                 goto exit;
1715
1716         err = odp_vport_cmd_validate(a);
1717         if (err)
1718                 goto exit;
1719
1720         rtnl_lock();
1721         dp = get_dp(odp_header->dp_ifindex);
1722         err = -ENODEV;
1723         if (!dp)
1724                 goto exit_unlock;
1725
1726         if (a[ODP_VPORT_ATTR_PORT_NO]) {
1727                 port_no = nla_get_u32(a[ODP_VPORT_ATTR_PORT_NO]);
1728
1729                 err = -EFBIG;
1730                 if (port_no >= DP_MAX_PORTS)
1731                         goto exit_unlock;
1732
1733                 vport = get_vport_protected(dp, port_no);
1734                 err = -EBUSY;
1735                 if (vport)
1736                         goto exit_unlock;
1737         } else {
1738                 for (port_no = 1; ; port_no++) {
1739                         if (port_no >= DP_MAX_PORTS) {
1740                                 err = -EFBIG;
1741                                 goto exit_unlock;
1742                         }
1743                         vport = get_vport_protected(dp, port_no);
1744                         if (!vport)
1745                                 break;
1746                 }
1747         }
1748
1749         parms.name = nla_data(a[ODP_VPORT_ATTR_NAME]);
1750         parms.type = nla_get_u32(a[ODP_VPORT_ATTR_TYPE]);
1751         parms.options = a[ODP_VPORT_ATTR_OPTIONS];
1752         parms.dp = dp;
1753         parms.port_no = port_no;
1754
1755         vport = new_vport(&parms);
1756         err = PTR_ERR(vport);
1757         if (IS_ERR(vport))
1758                 goto exit_unlock;
1759
1760         set_internal_devs_mtu(dp);
1761         dp_sysfs_add_if(vport);
1762
1763         err = change_vport(vport, a);
1764         if (!err) {
1765                 reply = odp_vport_cmd_build_info(vport, info->snd_pid,
1766                                                  info->snd_seq, ODP_VPORT_CMD_NEW);
1767                 if (IS_ERR(reply))
1768                         err = PTR_ERR(reply);
1769         }
1770         if (err) {
1771                 dp_detach_port(vport);
1772                 goto exit_unlock;
1773         }
1774         genl_notify(reply, genl_info_net(info), info->snd_pid,
1775                     dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1776
1777
1778 exit_unlock:
1779         rtnl_unlock();
1780 exit:
1781         return err;
1782 }
1783
1784 static int odp_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
1785 {
1786         struct nlattr **a = info->attrs;
1787         struct sk_buff *reply;
1788         struct vport *vport;
1789         int err;
1790
1791         err = odp_vport_cmd_validate(a);
1792         if (err)
1793                 goto exit;
1794
1795         rtnl_lock();
1796         vport = lookup_vport(info->userhdr, a);
1797         err = PTR_ERR(vport);
1798         if (IS_ERR(vport))
1799                 goto exit_unlock;
1800
1801         err = 0;
1802         if (a[ODP_VPORT_ATTR_OPTIONS])
1803                 err = vport_set_options(vport, a[ODP_VPORT_ATTR_OPTIONS]);
1804         if (!err)
1805                 err = change_vport(vport, a);
1806
1807         reply = odp_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
1808                                          ODP_VPORT_CMD_NEW);
1809         if (IS_ERR(reply)) {
1810                 err = PTR_ERR(reply);
1811                 netlink_set_err(INIT_NET_GENL_SOCK, 0,
1812                                 dp_vport_multicast_group.id, err);
1813                 return 0;
1814         }
1815
1816         genl_notify(reply, genl_info_net(info), info->snd_pid,
1817                     dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1818
1819 exit_unlock:
1820         rtnl_unlock();
1821 exit:
1822         return err;
1823 }
1824
1825 static int odp_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
1826 {
1827         struct nlattr **a = info->attrs;
1828         struct sk_buff *reply;
1829         struct vport *vport;
1830         int err;
1831
1832         err = odp_vport_cmd_validate(a);
1833         if (err)
1834                 goto exit;
1835
1836         rtnl_lock();
1837         vport = lookup_vport(info->userhdr, a);
1838         err = PTR_ERR(vport);
1839         if (IS_ERR(vport))
1840                 goto exit_unlock;
1841
1842         if (vport->port_no == ODPP_LOCAL) {
1843                 err = -EINVAL;
1844                 goto exit_unlock;
1845         }
1846
1847         reply = odp_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
1848                                          ODP_VPORT_CMD_DEL);
1849         err = PTR_ERR(reply);
1850         if (IS_ERR(reply))
1851                 goto exit_unlock;
1852
1853         err = dp_detach_port(vport);
1854
1855         genl_notify(reply, genl_info_net(info), info->snd_pid,
1856                     dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1857
1858 exit_unlock:
1859         rtnl_unlock();
1860 exit:
1861         return err;
1862 }
1863
1864 static int odp_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
1865 {
1866         struct nlattr **a = info->attrs;
1867         struct odp_header *odp_header = info->userhdr;
1868         struct sk_buff *reply;
1869         struct vport *vport;
1870         int err;
1871
1872         err = odp_vport_cmd_validate(a);
1873         if (err)
1874                 goto exit;
1875
1876         rcu_read_lock();
1877         vport = lookup_vport(odp_header, a);
1878         err = PTR_ERR(vport);
1879         if (IS_ERR(vport))
1880                 goto exit_unlock;
1881
1882         reply = odp_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
1883                                          ODP_VPORT_CMD_NEW);
1884         err = PTR_ERR(reply);
1885         if (IS_ERR(reply))
1886                 goto exit_unlock;
1887
1888         err = genlmsg_reply(reply, info);
1889
1890 exit_unlock:
1891         rcu_read_unlock();
1892 exit:
1893         return err;
1894 }
1895
1896 static int odp_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1897 {
1898         struct odp_header *odp_header = genlmsg_data(nlmsg_data(cb->nlh));
1899         struct datapath *dp;
1900         u32 port_no;
1901         int retval;
1902
1903         dp = get_dp(odp_header->dp_ifindex);
1904         if (!dp)
1905                 return -ENODEV;
1906
1907         rcu_read_lock();
1908         for (port_no = cb->args[0]; port_no < DP_MAX_PORTS; port_no++) {
1909                 struct vport *vport;
1910
1911                 vport = get_vport_protected(dp, port_no);
1912                 if (!vport)
1913                         continue;
1914
1915                 if (odp_vport_cmd_fill_info(vport, skb, NETLINK_CB(cb->skb).pid,
1916                                             cb->nlh->nlmsg_seq, NLM_F_MULTI,
1917                                             ODP_VPORT_CMD_NEW) < 0)
1918                         break;
1919         }
1920         rcu_read_unlock();
1921
1922         cb->args[0] = port_no;
1923         retval = skb->len;
1924
1925         return retval;
1926 }
1927
1928 static struct genl_ops dp_vport_genl_ops[] = {
1929         { .cmd = ODP_VPORT_CMD_NEW,
1930           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1931           .policy = vport_policy,
1932           .doit = odp_vport_cmd_new
1933         },
1934         { .cmd = ODP_VPORT_CMD_DEL,
1935           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1936           .policy = vport_policy,
1937           .doit = odp_vport_cmd_del
1938         },
1939         { .cmd = ODP_VPORT_CMD_GET,
1940           .flags = 0,               /* OK for unprivileged users. */
1941           .policy = vport_policy,
1942           .doit = odp_vport_cmd_get,
1943           .dumpit = odp_vport_cmd_dump
1944         },
1945         { .cmd = ODP_VPORT_CMD_SET,
1946           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1947           .policy = vport_policy,
1948           .doit = odp_vport_cmd_set,
1949         },
1950 };
1951
1952 struct genl_family_and_ops {
1953         struct genl_family *family;
1954         struct genl_ops *ops;
1955         int n_ops;
1956         struct genl_multicast_group *group;
1957 };
1958
1959 static const struct genl_family_and_ops dp_genl_families[] = {
1960         { &dp_datapath_genl_family,
1961           dp_datapath_genl_ops, ARRAY_SIZE(dp_datapath_genl_ops),
1962           &dp_datapath_multicast_group },
1963         { &dp_vport_genl_family,
1964           dp_vport_genl_ops, ARRAY_SIZE(dp_vport_genl_ops),
1965           &dp_vport_multicast_group },
1966         { &dp_flow_genl_family,
1967           dp_flow_genl_ops, ARRAY_SIZE(dp_flow_genl_ops),
1968           &dp_flow_multicast_group },
1969         { &dp_packet_genl_family,
1970           dp_packet_genl_ops, ARRAY_SIZE(dp_packet_genl_ops),
1971           NULL },
1972 };
1973
1974 static void dp_unregister_genl(int n_families)
1975 {
1976         int i;
1977
1978         for (i = 0; i < n_families; i++) {
1979                 genl_unregister_family(dp_genl_families[i].family);
1980         }
1981 }
1982
1983 static int dp_register_genl(void)
1984 {
1985         int n_registered;
1986         int err;
1987         int i;
1988
1989         n_registered = 0;
1990         for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
1991                 const struct genl_family_and_ops *f = &dp_genl_families[i];
1992
1993                 err = genl_register_family_with_ops(f->family, f->ops,
1994                                                     f->n_ops);
1995                 if (err)
1996                         goto error;
1997                 n_registered++;
1998
1999                 if (f->group) {
2000                         err = genl_register_mc_group(f->family, f->group);
2001                         if (err)
2002                                 goto error;
2003                 }
2004         }
2005
2006         err = packet_register_mc_groups();
2007         if (err)
2008                 goto error;
2009         return 0;
2010
2011 error:
2012         dp_unregister_genl(n_registered);
2013         return err;
2014 }
2015
2016 static int __init dp_init(void)
2017 {
2018         struct sk_buff *dummy_skb;
2019         int err;
2020
2021         BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > sizeof(dummy_skb->cb));
2022
2023         printk("Open vSwitch %s, built "__DATE__" "__TIME__"\n", VERSION BUILDNR);
2024
2025         err = flow_init();
2026         if (err)
2027                 goto error;
2028
2029         err = vport_init();
2030         if (err)
2031                 goto error_flow_exit;
2032
2033         err = register_netdevice_notifier(&dp_device_notifier);
2034         if (err)
2035                 goto error_vport_exit;
2036
2037         err = dp_register_genl();
2038         if (err < 0)
2039                 goto error_unreg_notifier;
2040
2041         return 0;
2042
2043 error_unreg_notifier:
2044         unregister_netdevice_notifier(&dp_device_notifier);
2045 error_vport_exit:
2046         vport_exit();
2047 error_flow_exit:
2048         flow_exit();
2049 error:
2050         return err;
2051 }
2052
2053 static void dp_cleanup(void)
2054 {
2055         rcu_barrier();
2056         dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
2057         unregister_netdevice_notifier(&dp_device_notifier);
2058         vport_exit();
2059         flow_exit();
2060 }
2061
2062 module_init(dp_init);
2063 module_exit(dp_cleanup);
2064
2065 MODULE_DESCRIPTION("Open vSwitch switching datapath");
2066 MODULE_LICENSE("GPL");