Merge 'master' into 'next'.
[sliver-openvswitch.git] / datapath / datapath.c
1 /*
2  * Copyright (c) 2007, 2008, 2009, 2010, 2011 Nicira Networks.
3  * Distributed under the terms of the GNU GPL version 2.
4  *
5  * Significant portions of this file may be copied from parts of the Linux
6  * kernel, by Linus Torvalds and others.
7  */
8
9 /* Functions for managing the dp interface/device. */
10
11 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12
13 #include <linux/init.h>
14 #include <linux/module.h>
15 #include <linux/if_arp.h>
16 #include <linux/if_vlan.h>
17 #include <linux/in.h>
18 #include <linux/ip.h>
19 #include <linux/jhash.h>
20 #include <linux/delay.h>
21 #include <linux/time.h>
22 #include <linux/etherdevice.h>
23 #include <linux/genetlink.h>
24 #include <linux/kernel.h>
25 #include <linux/kthread.h>
26 #include <linux/mutex.h>
27 #include <linux/percpu.h>
28 #include <linux/rcupdate.h>
29 #include <linux/tcp.h>
30 #include <linux/udp.h>
31 #include <linux/version.h>
32 #include <linux/ethtool.h>
33 #include <linux/wait.h>
34 #include <asm/system.h>
35 #include <asm/div64.h>
36 #include <asm/bug.h>
37 #include <linux/highmem.h>
38 #include <linux/netfilter_bridge.h>
39 #include <linux/netfilter_ipv4.h>
40 #include <linux/inetdevice.h>
41 #include <linux/list.h>
42 #include <linux/rculist.h>
43 #include <linux/dmi.h>
44 #include <net/inet_ecn.h>
45 #include <net/genetlink.h>
46
47 #include "openvswitch/datapath-protocol.h"
48 #include "checksum.h"
49 #include "datapath.h"
50 #include "actions.h"
51 #include "flow.h"
52 #include "table.h"
53 #include "vlan.h"
54 #include "vport-internal_dev.h"
55
56 int (*dp_ioctl_hook)(struct net_device *dev, struct ifreq *rq, int cmd);
57 EXPORT_SYMBOL(dp_ioctl_hook);
58
59 /**
60  * DOC: Locking:
61  *
62  * Writes to device state (add/remove datapath, port, set operations on vports,
63  * etc.) are protected by RTNL.
64  *
65  * Writes to other state (flow table modifications, set miscellaneous datapath
66  * parameters such as drop frags, etc.) are protected by genl_mutex.  The RTNL
67  * lock nests inside genl_mutex.
68  *
69  * Reads are protected by RCU.
70  *
71  * There are a few special cases (mostly stats) that have their own
72  * synchronization but they nest under all of above and don't interact with
73  * each other.
74  */
75
76 /* Global list of datapaths to enable dumping them all out.
77  * Protected by genl_mutex.
78  */
79 static LIST_HEAD(dps);
80
81 static struct vport *new_vport(const struct vport_parms *);
82 static int queue_control_packets(struct datapath *, struct sk_buff *,
83                                  const struct dp_upcall_info *);
84
85 /* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */
86 struct datapath *get_dp(int dp_ifindex)
87 {
88         struct datapath *dp = NULL;
89         struct net_device *dev;
90
91         rcu_read_lock();
92         dev = dev_get_by_index_rcu(&init_net, dp_ifindex);
93         if (dev) {
94                 struct vport *vport = internal_dev_get_vport(dev);
95                 if (vport)
96                         dp = vport->dp;
97         }
98         rcu_read_unlock();
99
100         return dp;
101 }
102 EXPORT_SYMBOL_GPL(get_dp);
103
104 /* Must be called with genl_mutex. */
105 static struct tbl *get_table_protected(struct datapath *dp)
106 {
107         return rcu_dereference_protected(dp->table, lockdep_genl_is_held());
108 }
109
110 /* Must be called with rcu_read_lock or RTNL lock. */
111 static struct vport *get_vport_protected(struct datapath *dp, u16 port_no)
112 {
113         return rcu_dereference_rtnl(dp->ports[port_no]);
114 }
115
116 /* Must be called with rcu_read_lock or RTNL lock. */
117 const char *dp_name(const struct datapath *dp)
118 {
119         return vport_get_name(rcu_dereference_rtnl(dp->ports[ODPP_LOCAL]));
120 }
121
122 static inline size_t br_nlmsg_size(void)
123 {
124         return NLMSG_ALIGN(sizeof(struct ifinfomsg))
125                + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
126                + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
127                + nla_total_size(4) /* IFLA_MASTER */
128                + nla_total_size(4) /* IFLA_MTU */
129                + nla_total_size(4) /* IFLA_LINK */
130                + nla_total_size(1); /* IFLA_OPERSTATE */
131 }
132
133 /* Caller must hold RTNL lock. */
134 static int dp_fill_ifinfo(struct sk_buff *skb,
135                           const struct vport *port,
136                           int event, unsigned int flags)
137 {
138         struct datapath *dp = port->dp;
139         int ifindex = vport_get_ifindex(port);
140         int iflink = vport_get_iflink(port);
141         struct ifinfomsg *hdr;
142         struct nlmsghdr *nlh;
143
144         if (ifindex < 0)
145                 return ifindex;
146
147         if (iflink < 0)
148                 return iflink;
149
150         nlh = nlmsg_put(skb, 0, 0, event, sizeof(*hdr), flags);
151         if (nlh == NULL)
152                 return -EMSGSIZE;
153
154         hdr = nlmsg_data(nlh);
155         hdr->ifi_family = AF_BRIDGE;
156         hdr->__ifi_pad = 0;
157         hdr->ifi_type = ARPHRD_ETHER;
158         hdr->ifi_index = ifindex;
159         hdr->ifi_flags = vport_get_flags(port);
160         hdr->ifi_change = 0;
161
162         NLA_PUT_STRING(skb, IFLA_IFNAME, vport_get_name(port));
163         NLA_PUT_U32(skb, IFLA_MASTER,
164                 vport_get_ifindex(get_vport_protected(dp, ODPP_LOCAL)));
165         NLA_PUT_U32(skb, IFLA_MTU, vport_get_mtu(port));
166 #ifdef IFLA_OPERSTATE
167         NLA_PUT_U8(skb, IFLA_OPERSTATE,
168                    vport_is_running(port)
169                         ? vport_get_operstate(port)
170                         : IF_OPER_DOWN);
171 #endif
172
173         NLA_PUT(skb, IFLA_ADDRESS, ETH_ALEN, vport_get_addr(port));
174
175         if (ifindex != iflink)
176                 NLA_PUT_U32(skb, IFLA_LINK,iflink);
177
178         return nlmsg_end(skb, nlh);
179
180 nla_put_failure:
181         nlmsg_cancel(skb, nlh);
182         return -EMSGSIZE;
183 }
184
185 /* Caller must hold RTNL lock. */
186 static void dp_ifinfo_notify(int event, struct vport *port)
187 {
188         struct sk_buff *skb;
189         int err = -ENOBUFS;
190
191         skb = nlmsg_new(br_nlmsg_size(), GFP_KERNEL);
192         if (skb == NULL)
193                 goto errout;
194
195         err = dp_fill_ifinfo(skb, port, event, 0);
196         if (err < 0) {
197                 /* -EMSGSIZE implies BUG in br_nlmsg_size() */
198                 WARN_ON(err == -EMSGSIZE);
199                 kfree_skb(skb);
200                 goto errout;
201         }
202         rtnl_notify(skb, &init_net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
203         return;
204 errout:
205         if (err < 0)
206                 rtnl_set_sk_err(&init_net, RTNLGRP_LINK, err);
207 }
208
209 static void release_dp(struct kobject *kobj)
210 {
211         struct datapath *dp = container_of(kobj, struct datapath, ifobj);
212         kfree(dp);
213 }
214
215 static struct kobj_type dp_ktype = {
216         .release = release_dp
217 };
218
219 static void destroy_dp_rcu(struct rcu_head *rcu)
220 {
221         struct datapath *dp = container_of(rcu, struct datapath, rcu);
222
223         tbl_destroy((struct tbl __force *)dp->table, flow_free_tbl);
224         free_percpu(dp->stats_percpu);
225         kobject_put(&dp->ifobj);
226 }
227
228 /* Called with RTNL lock and genl_lock. */
229 static struct vport *new_vport(const struct vport_parms *parms)
230 {
231         struct vport *vport;
232
233         vport = vport_add(parms);
234         if (!IS_ERR(vport)) {
235                 struct datapath *dp = parms->dp;
236
237                 rcu_assign_pointer(dp->ports[parms->port_no], vport);
238                 list_add(&vport->node, &dp->port_list);
239
240                 dp_ifinfo_notify(RTM_NEWLINK, vport);
241         }
242
243         return vport;
244 }
245
246 /* Called with RTNL lock. */
247 int dp_detach_port(struct vport *p)
248 {
249         ASSERT_RTNL();
250
251         if (p->port_no != ODPP_LOCAL)
252                 dp_sysfs_del_if(p);
253         dp_ifinfo_notify(RTM_DELLINK, p);
254
255         /* First drop references to device. */
256         list_del(&p->node);
257         rcu_assign_pointer(p->dp->ports[p->port_no], NULL);
258
259         /* Then destroy it. */
260         return vport_del(p);
261 }
262
263 /* Must be called with rcu_read_lock. */
264 void dp_process_received_packet(struct vport *p, struct sk_buff *skb)
265 {
266         struct datapath *dp = p->dp;
267         struct dp_stats_percpu *stats;
268         int stats_counter_off;
269         int error;
270
271         OVS_CB(skb)->vport = p;
272
273         if (!OVS_CB(skb)->flow) {
274                 struct sw_flow_key key;
275                 struct tbl_node *flow_node;
276                 bool is_frag;
277
278                 /* Extract flow from 'skb' into 'key'. */
279                 error = flow_extract(skb, p->port_no, &key, &is_frag);
280                 if (unlikely(error)) {
281                         kfree_skb(skb);
282                         return;
283                 }
284
285                 if (is_frag && dp->drop_frags) {
286                         kfree_skb(skb);
287                         stats_counter_off = offsetof(struct dp_stats_percpu, n_frags);
288                         goto out;
289                 }
290
291                 /* Look up flow. */
292                 flow_node = tbl_lookup(rcu_dereference(dp->table), &key,
293                                         flow_hash(&key), flow_cmp);
294                 if (unlikely(!flow_node)) {
295                         struct dp_upcall_info upcall;
296
297                         upcall.cmd = ODP_PACKET_CMD_MISS;
298                         upcall.key = &key;
299                         upcall.userdata = 0;
300                         upcall.sample_pool = 0;
301                         upcall.actions = NULL;
302                         upcall.actions_len = 0;
303                         dp_upcall(dp, skb, &upcall);
304                         stats_counter_off = offsetof(struct dp_stats_percpu, n_missed);
305                         goto out;
306                 }
307
308                 OVS_CB(skb)->flow = flow_cast(flow_node);
309         }
310
311         stats_counter_off = offsetof(struct dp_stats_percpu, n_hit);
312         flow_used(OVS_CB(skb)->flow, skb);
313         execute_actions(dp, skb);
314
315 out:
316         /* Update datapath statistics. */
317         local_bh_disable();
318         stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
319
320         write_seqcount_begin(&stats->seqlock);
321         (*(u64 *)((u8 *)stats + stats_counter_off))++;
322         write_seqcount_end(&stats->seqlock);
323
324         local_bh_enable();
325 }
326
327 static void copy_and_csum_skb(struct sk_buff *skb, void *to)
328 {
329         u16 csum_start, csum_offset;
330         __wsum csum;
331
332         get_skb_csum_pointers(skb, &csum_start, &csum_offset);
333         csum_start -= skb_headroom(skb);
334
335         skb_copy_bits(skb, 0, to, csum_start);
336
337         csum = skb_copy_and_csum_bits(skb, csum_start, to + csum_start,
338                                       skb->len - csum_start, 0);
339         *(__sum16 *)(to + csum_start + csum_offset) = csum_fold(csum);
340 }
341
342 static struct genl_family dp_packet_genl_family = {
343         .id = GENL_ID_GENERATE,
344         .hdrsize = sizeof(struct odp_header),
345         .name = ODP_PACKET_FAMILY,
346         .version = 1,
347         .maxattr = ODP_PACKET_ATTR_MAX
348 };
349
350 /* Generic Netlink multicast groups for upcalls.
351  *
352  * We really want three unique multicast groups per datapath, but we can't even
353  * get one, because genl_register_mc_group() takes genl_lock, which is also
354  * held during Generic Netlink message processing, so trying to acquire
355  * multicast groups during ODP_DP_NEW processing deadlocks.  Instead, we
356  * preallocate a few groups and use them round-robin for datapaths.  Collision
357  * isn't fatal--multicast listeners should check that the family is the one
358  * that they want and discard others--but it wastes time and memory to receive
359  * unwanted messages.
360  */
361 #define PACKET_N_MC_GROUPS 16
362 static struct genl_multicast_group packet_mc_groups[PACKET_N_MC_GROUPS];
363
364 static u32 packet_mc_group(struct datapath *dp, u8 cmd)
365 {
366         u32 idx;
367         BUILD_BUG_ON_NOT_POWER_OF_2(PACKET_N_MC_GROUPS);
368
369         idx = jhash_2words(dp->dp_ifindex, cmd, 0) & (PACKET_N_MC_GROUPS - 1);
370         return packet_mc_groups[idx].id;
371 }
372
373 static int packet_register_mc_groups(void)
374 {
375         int i;
376
377         for (i = 0; i < PACKET_N_MC_GROUPS; i++) {
378                 struct genl_multicast_group *group = &packet_mc_groups[i];
379                 int error;
380
381                 sprintf(group->name, "packet%d", i);
382                 error = genl_register_mc_group(&dp_packet_genl_family, group);
383                 if (error)
384                         return error;
385         }
386         return 0;
387 }
388
389 int dp_upcall(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info)
390 {
391         struct dp_stats_percpu *stats;
392         int err;
393
394         WARN_ON_ONCE(skb_shared(skb));
395
396         forward_ip_summed(skb);
397
398         err = vswitch_skb_checksum_setup(skb);
399         if (err)
400                 goto err_kfree_skb;
401
402         /* Break apart GSO packets into their component pieces.  Otherwise
403          * userspace may try to stuff a 64kB packet into a 1500-byte MTU. */
404         if (skb_is_gso(skb)) {
405                 struct sk_buff *nskb = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM);
406                 
407                 kfree_skb(skb);
408                 skb = nskb;
409                 if (IS_ERR(skb)) {
410                         err = PTR_ERR(skb);
411                         goto err;
412                 }
413         }
414
415         err = queue_control_packets(dp, skb, upcall_info);
416         if (err)
417                 goto err;
418
419         return 0;
420
421 err_kfree_skb:
422         kfree_skb(skb);
423 err:
424         local_bh_disable();
425         stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
426
427         write_seqcount_begin(&stats->seqlock);
428         stats->n_lost++;
429         write_seqcount_end(&stats->seqlock);
430
431         local_bh_enable();
432
433         return err;
434 }
435
436 /* Send each packet in the 'skb' list to userspace for 'dp' as directed by
437  * 'upcall_info'.  There will be only one packet unless we broke up a GSO
438  * packet.
439  */
440 static int queue_control_packets(struct datapath *dp, struct sk_buff *skb,
441                                  const struct dp_upcall_info *upcall_info)
442 {
443         u32 group = packet_mc_group(dp, upcall_info->cmd);
444         struct sk_buff *nskb;
445         int err;
446
447         do {
448                 struct odp_header *upcall;
449                 struct sk_buff *user_skb; /* to be queued to userspace */
450                 struct nlattr *nla;
451                 unsigned int len;
452
453                 nskb = skb->next;
454                 skb->next = NULL;
455
456                 err = vlan_deaccel_tag(skb);
457                 if (unlikely(err))
458                         goto err_kfree_skbs;
459
460                 if (nla_attr_size(skb->len) > USHRT_MAX)
461                         goto err_kfree_skbs;
462
463                 len = sizeof(struct odp_header);
464                 len += nla_total_size(skb->len);
465                 len += nla_total_size(FLOW_BUFSIZE);
466                 if (upcall_info->userdata)
467                         len += nla_total_size(8);
468                 if (upcall_info->sample_pool)
469                         len += nla_total_size(4);
470                 if (upcall_info->actions_len)
471                         len += nla_total_size(upcall_info->actions_len);
472
473                 user_skb = genlmsg_new(len, GFP_ATOMIC);
474                 if (!user_skb) {
475                         netlink_set_err(INIT_NET_GENL_SOCK, 0, group, -ENOBUFS);
476                         goto err_kfree_skbs;
477                 }
478
479                 upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family, 0, upcall_info->cmd);
480                 upcall->dp_ifindex = dp->dp_ifindex;
481
482                 nla = nla_nest_start(user_skb, ODP_PACKET_ATTR_KEY);
483                 flow_to_nlattrs(upcall_info->key, user_skb);
484                 nla_nest_end(user_skb, nla);
485
486                 if (upcall_info->userdata)
487                         nla_put_u64(user_skb, ODP_PACKET_ATTR_USERDATA, upcall_info->userdata);
488                 if (upcall_info->sample_pool)
489                         nla_put_u32(user_skb, ODP_PACKET_ATTR_SAMPLE_POOL, upcall_info->sample_pool);
490                 if (upcall_info->actions_len) {
491                         const struct nlattr *actions = upcall_info->actions;
492                         u32 actions_len = upcall_info->actions_len;
493
494                         nla = nla_nest_start(user_skb, ODP_PACKET_ATTR_ACTIONS);
495                         memcpy(__skb_put(user_skb, actions_len), actions, actions_len);
496                         nla_nest_end(user_skb, nla);
497                 }
498
499                 nla = __nla_reserve(user_skb, ODP_PACKET_ATTR_PACKET, skb->len);
500                 if (skb->ip_summed == CHECKSUM_PARTIAL)
501                         copy_and_csum_skb(skb, nla_data(nla));
502                 else
503                         skb_copy_bits(skb, 0, nla_data(nla), skb->len);
504
505                 err = genlmsg_multicast(user_skb, 0, group, GFP_ATOMIC);
506                 if (err)
507                         goto err_kfree_skbs;
508
509                 kfree_skb(skb);
510                 skb = nskb;
511         } while (skb);
512         return 0;
513
514 err_kfree_skbs:
515         kfree_skb(skb);
516         while ((skb = nskb) != NULL) {
517                 nskb = skb->next;
518                 kfree_skb(skb);
519         }
520         return err;
521 }
522
523 /* Called with genl_mutex. */
524 static int flush_flows(int dp_ifindex)
525 {
526         struct tbl *old_table;
527         struct tbl *new_table;
528         struct datapath *dp;
529
530         dp = get_dp(dp_ifindex);
531         if (!dp)
532                 return -ENODEV;
533
534         old_table = get_table_protected(dp);
535         new_table = tbl_create(TBL_MIN_BUCKETS);
536         if (!new_table)
537                 return -ENOMEM;
538
539         rcu_assign_pointer(dp->table, new_table);
540
541         tbl_deferred_destroy(old_table, flow_free_tbl);
542
543         return 0;
544 }
545
546 static int validate_actions(const struct nlattr *attr)
547 {
548         const struct nlattr *a;
549         int rem;
550
551         nla_for_each_nested(a, attr, rem) {
552                 static const u32 action_lens[ODP_ACTION_ATTR_MAX + 1] = {
553                         [ODP_ACTION_ATTR_OUTPUT] = 4,
554                         [ODP_ACTION_ATTR_CONTROLLER] = 8,
555                         [ODP_ACTION_ATTR_SET_DL_TCI] = 2,
556                         [ODP_ACTION_ATTR_STRIP_VLAN] = 0,
557                         [ODP_ACTION_ATTR_SET_DL_SRC] = ETH_ALEN,
558                         [ODP_ACTION_ATTR_SET_DL_DST] = ETH_ALEN,
559                         [ODP_ACTION_ATTR_SET_NW_SRC] = 4,
560                         [ODP_ACTION_ATTR_SET_NW_DST] = 4,
561                         [ODP_ACTION_ATTR_SET_NW_TOS] = 1,
562                         [ODP_ACTION_ATTR_SET_TP_SRC] = 2,
563                         [ODP_ACTION_ATTR_SET_TP_DST] = 2,
564                         [ODP_ACTION_ATTR_SET_TUNNEL] = 8,
565                         [ODP_ACTION_ATTR_SET_PRIORITY] = 4,
566                         [ODP_ACTION_ATTR_POP_PRIORITY] = 0,
567                         [ODP_ACTION_ATTR_DROP_SPOOFED_ARP] = 0,
568                 };
569                 int type = nla_type(a);
570
571                 if (type > ODP_ACTION_ATTR_MAX || nla_len(a) != action_lens[type])
572                         return -EINVAL;
573
574                 switch (type) {
575                 case ODP_ACTION_ATTR_UNSPEC:
576                         return -EINVAL;
577
578                 case ODP_ACTION_ATTR_CONTROLLER:
579                 case ODP_ACTION_ATTR_STRIP_VLAN:
580                 case ODP_ACTION_ATTR_SET_DL_SRC:
581                 case ODP_ACTION_ATTR_SET_DL_DST:
582                 case ODP_ACTION_ATTR_SET_NW_SRC:
583                 case ODP_ACTION_ATTR_SET_NW_DST:
584                 case ODP_ACTION_ATTR_SET_TP_SRC:
585                 case ODP_ACTION_ATTR_SET_TP_DST:
586                 case ODP_ACTION_ATTR_SET_TUNNEL:
587                 case ODP_ACTION_ATTR_SET_PRIORITY:
588                 case ODP_ACTION_ATTR_POP_PRIORITY:
589                 case ODP_ACTION_ATTR_DROP_SPOOFED_ARP:
590                         /* No validation needed. */
591                         break;
592
593                 case ODP_ACTION_ATTR_OUTPUT:
594                         if (nla_get_u32(a) >= DP_MAX_PORTS)
595                                 return -EINVAL;
596                         break;
597
598                 case ODP_ACTION_ATTR_SET_DL_TCI:
599                         if (nla_get_be16(a) & htons(VLAN_CFI_MASK))
600                                 return -EINVAL;
601                         break;
602
603                 case ODP_ACTION_ATTR_SET_NW_TOS:
604                         if (nla_get_u8(a) & INET_ECN_MASK)
605                                 return -EINVAL;
606                         break;
607
608                 default:
609                         return -EOPNOTSUPP;
610                 }
611         }
612
613         if (rem > 0)
614                 return -EINVAL;
615
616         return 0;
617 }
618 static void clear_stats(struct sw_flow *flow)
619 {
620         flow->used = 0;
621         flow->tcp_flags = 0;
622         flow->packet_count = 0;
623         flow->byte_count = 0;
624 }
625
626 /* Called with genl_mutex. */
627 static int expand_table(struct datapath *dp)
628 {
629         struct tbl *old_table = get_table_protected(dp);
630         struct tbl *new_table;
631
632         new_table = tbl_expand(old_table);
633         if (IS_ERR(new_table))
634                 return PTR_ERR(new_table);
635
636         rcu_assign_pointer(dp->table, new_table);
637         tbl_deferred_destroy(old_table, NULL);
638
639         return 0;
640 }
641
642 static int odp_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
643 {
644         struct odp_header *odp_header = info->userhdr;
645         struct nlattr **a = info->attrs;
646         struct sw_flow_actions *acts;
647         struct sk_buff *packet;
648         struct sw_flow *flow;
649         struct datapath *dp;
650         struct ethhdr *eth;
651         bool is_frag;
652         int len;
653         int err;
654
655         err = -EINVAL;
656         if (!a[ODP_PACKET_ATTR_PACKET] || !a[ODP_PACKET_ATTR_ACTIONS] ||
657             nla_len(a[ODP_PACKET_ATTR_PACKET]) < ETH_HLEN)
658                 goto err;
659
660         err = validate_actions(a[ODP_PACKET_ATTR_ACTIONS]);
661         if (err)
662                 goto err;
663
664         len = nla_len(a[ODP_PACKET_ATTR_PACKET]);
665         packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
666         err = -ENOMEM;
667         if (!packet)
668                 goto err;
669         skb_reserve(packet, NET_IP_ALIGN);
670
671         memcpy(__skb_put(packet, len), nla_data(a[ODP_PACKET_ATTR_PACKET]), len);
672
673         skb_reset_mac_header(packet);
674         eth = eth_hdr(packet);
675
676         /* Normally, setting the skb 'protocol' field would be handled by a
677          * call to eth_type_trans(), but it assumes there's a sending
678          * device, which we may not have. */
679         if (ntohs(eth->h_proto) >= 1536)
680                 packet->protocol = eth->h_proto;
681         else
682                 packet->protocol = htons(ETH_P_802_2);
683
684         /* Build an sw_flow for sending this packet. */
685         flow = flow_alloc();
686         err = PTR_ERR(flow);
687         if (IS_ERR(flow))
688                 goto err_kfree_skb;
689
690         err = flow_extract(packet, -1, &flow->key, &is_frag);
691         if (err)
692                 goto err_flow_put;
693         flow->tbl_node.hash = flow_hash(&flow->key);
694
695         acts = flow_actions_alloc(a[ODP_PACKET_ATTR_ACTIONS]);
696         err = PTR_ERR(acts);
697         if (IS_ERR(acts))
698                 goto err_flow_put;
699         rcu_assign_pointer(flow->sf_acts, acts);
700
701         OVS_CB(packet)->flow = flow;
702
703         rcu_read_lock();
704         dp = get_dp(odp_header->dp_ifindex);
705         err = -ENODEV;
706         if (!dp)
707                 goto err_unlock;
708         err = execute_actions(dp, packet);
709         rcu_read_unlock();
710
711         flow_put(flow);
712         return err;
713
714 err_unlock:
715         rcu_read_unlock();
716 err_flow_put:
717         flow_put(flow);
718 err_kfree_skb:
719         kfree_skb(packet);
720 err:
721         return err;
722 }
723
724 static const struct nla_policy packet_policy[ODP_PACKET_ATTR_MAX + 1] = {
725         [ODP_PACKET_ATTR_PACKET] = { .type = NLA_UNSPEC },
726         [ODP_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
727 };
728
729 static struct genl_ops dp_packet_genl_ops[] = {
730         { .cmd = ODP_PACKET_CMD_EXECUTE,
731           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
732           .policy = packet_policy,
733           .doit = odp_packet_cmd_execute
734         }
735 };
736
737 static void get_dp_stats(struct datapath *dp, struct odp_stats *stats)
738 {
739         int i;
740
741         stats->n_frags = stats->n_hit = stats->n_missed = stats->n_lost = 0;
742         for_each_possible_cpu(i) {
743                 const struct dp_stats_percpu *percpu_stats;
744                 struct dp_stats_percpu local_stats;
745                 unsigned seqcount;
746
747                 percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
748
749                 do {
750                         seqcount = read_seqcount_begin(&percpu_stats->seqlock);
751                         local_stats = *percpu_stats;
752                 } while (read_seqcount_retry(&percpu_stats->seqlock, seqcount));
753
754                 stats->n_frags += local_stats.n_frags;
755                 stats->n_hit += local_stats.n_hit;
756                 stats->n_missed += local_stats.n_missed;
757                 stats->n_lost += local_stats.n_lost;
758         }
759 }
760
761 /* MTU of the dp pseudo-device: ETH_DATA_LEN or the minimum of the ports.
762  * Called with RTNL lock.
763  */
764 int dp_min_mtu(const struct datapath *dp)
765 {
766         struct vport *p;
767         int mtu = 0;
768
769         ASSERT_RTNL();
770
771         list_for_each_entry (p, &dp->port_list, node) {
772                 int dev_mtu;
773
774                 /* Skip any internal ports, since that's what we're trying to
775                  * set. */
776                 if (is_internal_vport(p))
777                         continue;
778
779                 dev_mtu = vport_get_mtu(p);
780                 if (!dev_mtu)
781                         continue;
782                 if (!mtu || dev_mtu < mtu)
783                         mtu = dev_mtu;
784         }
785
786         return mtu ? mtu : ETH_DATA_LEN;
787 }
788
789 /* Sets the MTU of all datapath devices to the minimum of the ports
790  * Called with RTNL lock.
791  */
792 void set_internal_devs_mtu(const struct datapath *dp)
793 {
794         struct vport *p;
795         int mtu;
796
797         ASSERT_RTNL();
798
799         mtu = dp_min_mtu(dp);
800
801         list_for_each_entry (p, &dp->port_list, node) {
802                 if (is_internal_vport(p))
803                         vport_set_mtu(p, mtu);
804         }
805 }
806
807 static const struct nla_policy flow_policy[ODP_FLOW_ATTR_MAX + 1] = {
808         [ODP_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
809         [ODP_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
810         [ODP_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
811 };
812
813 static struct genl_family dp_flow_genl_family = {
814         .id = GENL_ID_GENERATE,
815         .hdrsize = sizeof(struct odp_header),
816         .name = ODP_FLOW_FAMILY,
817         .version = 1,
818         .maxattr = ODP_FLOW_ATTR_MAX
819 };
820
821 static struct genl_multicast_group dp_flow_multicast_group = {
822         .name = ODP_FLOW_MCGROUP
823 };
824
825 /* Called with genl_lock. */
826 static int odp_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
827                                   struct sk_buff *skb, u32 pid, u32 seq, u32 flags, u8 cmd)
828 {
829         const int skb_orig_len = skb->len;
830         const struct sw_flow_actions *sf_acts;
831         struct odp_flow_stats stats;
832         struct odp_header *odp_header;
833         struct nlattr *nla;
834         unsigned long used;
835         u8 tcp_flags;
836         int err;
837
838         sf_acts = rcu_dereference_protected(flow->sf_acts,
839                                             lockdep_genl_is_held());
840
841         odp_header = genlmsg_put(skb, pid, seq, &dp_flow_genl_family, flags, cmd);
842         if (!odp_header)
843                 return -EMSGSIZE;
844
845         odp_header->dp_ifindex = dp->dp_ifindex;
846
847         nla = nla_nest_start(skb, ODP_FLOW_ATTR_KEY);
848         if (!nla)
849                 goto nla_put_failure;
850         err = flow_to_nlattrs(&flow->key, skb);
851         if (err)
852                 goto error;
853         nla_nest_end(skb, nla);
854
855         spin_lock_bh(&flow->lock);
856         used = flow->used;
857         stats.n_packets = flow->packet_count;
858         stats.n_bytes = flow->byte_count;
859         tcp_flags = flow->tcp_flags;
860         spin_unlock_bh(&flow->lock);
861
862         if (used)
863                 NLA_PUT_U64(skb, ODP_FLOW_ATTR_USED, flow_used_time(used));
864
865         if (stats.n_packets)
866                 NLA_PUT(skb, ODP_FLOW_ATTR_STATS, sizeof(struct odp_flow_stats), &stats);
867
868         if (tcp_flags)
869                 NLA_PUT_U8(skb, ODP_FLOW_ATTR_TCP_FLAGS, tcp_flags);
870
871         /* If ODP_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
872          * this is the first flow to be dumped into 'skb'.  This is unusual for
873          * Netlink but individual action lists can be longer than
874          * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
875          * The userspace caller can always fetch the actions separately if it
876          * really wants them.  (Most userspace callers in fact don't care.)
877          *
878          * This can only fail for dump operations because the skb is always
879          * properly sized for single flows.
880          */
881         err = nla_put(skb, ODP_FLOW_ATTR_ACTIONS, sf_acts->actions_len,
882                       sf_acts->actions);
883         if (err < 0 && skb_orig_len)
884                 goto error;
885
886         return genlmsg_end(skb, odp_header);
887
888 nla_put_failure:
889         err = -EMSGSIZE;
890 error:
891         genlmsg_cancel(skb, odp_header);
892         return err;
893 }
894
895 static struct sk_buff *odp_flow_cmd_alloc_info(struct sw_flow *flow)
896 {
897         const struct sw_flow_actions *sf_acts;
898         int len;
899
900         sf_acts = rcu_dereference_protected(flow->sf_acts,
901                                             lockdep_genl_is_held());
902
903         len = nla_total_size(FLOW_BUFSIZE); /* ODP_FLOW_ATTR_KEY */
904         len += nla_total_size(sf_acts->actions_len); /* ODP_FLOW_ATTR_ACTIONS */
905         len += nla_total_size(sizeof(struct odp_flow_stats)); /* ODP_FLOW_ATTR_STATS */
906         len += nla_total_size(1); /* ODP_FLOW_ATTR_TCP_FLAGS */
907         len += nla_total_size(8); /* ODP_FLOW_ATTR_USED */
908         return genlmsg_new(NLMSG_ALIGN(sizeof(struct odp_header)) + len, GFP_KERNEL);
909 }
910
911 static struct sk_buff *odp_flow_cmd_build_info(struct sw_flow *flow, struct datapath *dp,
912                                                u32 pid, u32 seq, u8 cmd)
913 {
914         struct sk_buff *skb;
915         int retval;
916
917         skb = odp_flow_cmd_alloc_info(flow);
918         if (!skb)
919                 return ERR_PTR(-ENOMEM);
920
921         retval = odp_flow_cmd_fill_info(flow, dp, skb, pid, seq, 0, cmd);
922         BUG_ON(retval < 0);
923         return skb;
924 }
925
926 static int odp_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
927 {
928         struct nlattr **a = info->attrs;
929         struct odp_header *odp_header = info->userhdr;
930         struct tbl_node *flow_node;
931         struct sw_flow_key key;
932         struct sw_flow *flow;
933         struct sk_buff *reply;
934         struct datapath *dp;
935         struct tbl *table;
936         u32 hash;
937         int error;
938
939         /* Extract key. */
940         error = -EINVAL;
941         if (!a[ODP_FLOW_ATTR_KEY])
942                 goto error;
943         error = flow_from_nlattrs(&key, a[ODP_FLOW_ATTR_KEY]);
944         if (error)
945                 goto error;
946
947         /* Validate actions. */
948         if (a[ODP_FLOW_ATTR_ACTIONS]) {
949                 error = validate_actions(a[ODP_FLOW_ATTR_ACTIONS]);
950                 if (error)
951                         goto error;
952         } else if (info->genlhdr->cmd == ODP_FLOW_CMD_NEW) {
953                 error = -EINVAL;
954                 goto error;
955         }
956
957         dp = get_dp(odp_header->dp_ifindex);
958         error = -ENODEV;
959         if (!dp)
960                 goto error;
961
962         hash = flow_hash(&key);
963         table = get_table_protected(dp);
964         flow_node = tbl_lookup(table, &key, hash, flow_cmp);
965         if (!flow_node) {
966                 struct sw_flow_actions *acts;
967
968                 /* Bail out if we're not allowed to create a new flow. */
969                 error = -ENOENT;
970                 if (info->genlhdr->cmd == ODP_FLOW_CMD_SET)
971                         goto error;
972
973                 /* Expand table, if necessary, to make room. */
974                 if (tbl_count(table) >= tbl_n_buckets(table)) {
975                         error = expand_table(dp);
976                         if (error)
977                                 goto error;
978                         table = get_table_protected(dp);
979                 }
980
981                 /* Allocate flow. */
982                 flow = flow_alloc();
983                 if (IS_ERR(flow)) {
984                         error = PTR_ERR(flow);
985                         goto error;
986                 }
987                 flow->key = key;
988                 clear_stats(flow);
989
990                 /* Obtain actions. */
991                 acts = flow_actions_alloc(a[ODP_FLOW_ATTR_ACTIONS]);
992                 error = PTR_ERR(acts);
993                 if (IS_ERR(acts))
994                         goto error_free_flow;
995                 rcu_assign_pointer(flow->sf_acts, acts);
996
997                 /* Put flow in bucket. */
998                 error = tbl_insert(table, &flow->tbl_node, hash);
999                 if (error)
1000                         goto error_free_flow;
1001
1002                 reply = odp_flow_cmd_build_info(flow, dp, info->snd_pid,
1003                                                 info->snd_seq, ODP_FLOW_CMD_NEW);
1004         } else {
1005                 /* We found a matching flow. */
1006                 struct sw_flow_actions *old_acts;
1007
1008                 /* Bail out if we're not allowed to modify an existing flow.
1009                  * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
1010                  * because Generic Netlink treats the latter as a dump
1011                  * request.  We also accept NLM_F_EXCL in case that bug ever
1012                  * gets fixed.
1013                  */
1014                 error = -EEXIST;
1015                 if (info->genlhdr->cmd == ODP_FLOW_CMD_NEW &&
1016                     info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
1017                         goto error;
1018
1019                 /* Update actions. */
1020                 flow = flow_cast(flow_node);
1021                 old_acts = rcu_dereference_protected(flow->sf_acts,
1022                                                      lockdep_genl_is_held());
1023                 if (a[ODP_FLOW_ATTR_ACTIONS] &&
1024                     (old_acts->actions_len != nla_len(a[ODP_FLOW_ATTR_ACTIONS]) ||
1025                      memcmp(old_acts->actions, nla_data(a[ODP_FLOW_ATTR_ACTIONS]),
1026                             old_acts->actions_len))) {
1027                         struct sw_flow_actions *new_acts;
1028
1029                         new_acts = flow_actions_alloc(a[ODP_FLOW_ATTR_ACTIONS]);
1030                         error = PTR_ERR(new_acts);
1031                         if (IS_ERR(new_acts))
1032                                 goto error;
1033
1034                         rcu_assign_pointer(flow->sf_acts, new_acts);
1035                         flow_deferred_free_acts(old_acts);
1036                 }
1037
1038                 reply = odp_flow_cmd_build_info(flow, dp, info->snd_pid,
1039                                                 info->snd_seq, ODP_FLOW_CMD_NEW);
1040
1041                 /* Clear stats. */
1042                 if (a[ODP_FLOW_ATTR_CLEAR]) {
1043                         spin_lock_bh(&flow->lock);
1044                         clear_stats(flow);
1045                         spin_unlock_bh(&flow->lock);
1046                 }
1047         }
1048
1049         if (!IS_ERR(reply))
1050                 genl_notify(reply, genl_info_net(info), info->snd_pid,
1051                             dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL);
1052         else
1053                 netlink_set_err(INIT_NET_GENL_SOCK, 0,
1054                                 dp_flow_multicast_group.id, PTR_ERR(reply));
1055         return 0;
1056
1057 error_free_flow:
1058         flow_put(flow);
1059 error:
1060         return error;
1061 }
1062
1063 static int odp_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1064 {
1065         struct nlattr **a = info->attrs;
1066         struct odp_header *odp_header = info->userhdr;
1067         struct sw_flow_key key;
1068         struct tbl_node *flow_node;
1069         struct sk_buff *reply;
1070         struct sw_flow *flow;
1071         struct datapath *dp;
1072         struct tbl *table;
1073         int err;
1074
1075         if (!a[ODP_FLOW_ATTR_KEY])
1076                 return -EINVAL;
1077         err = flow_from_nlattrs(&key, a[ODP_FLOW_ATTR_KEY]);
1078         if (err)
1079                 return err;
1080
1081         dp = get_dp(odp_header->dp_ifindex);
1082         if (!dp)
1083                 return -ENODEV;
1084
1085         table = get_table_protected(dp);
1086         flow_node = tbl_lookup(table, &key, flow_hash(&key), flow_cmp);
1087         if (!flow_node)
1088                 return -ENOENT;
1089
1090         flow = flow_cast(flow_node);
1091         reply = odp_flow_cmd_build_info(flow, dp, info->snd_pid, info->snd_seq, ODP_FLOW_CMD_NEW);
1092         if (IS_ERR(reply))
1093                 return PTR_ERR(reply);
1094
1095         return genlmsg_reply(reply, info);
1096 }
1097
1098 static int odp_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1099 {
1100         struct nlattr **a = info->attrs;
1101         struct odp_header *odp_header = info->userhdr;
1102         struct sw_flow_key key;
1103         struct tbl_node *flow_node;
1104         struct sk_buff *reply;
1105         struct sw_flow *flow;
1106         struct datapath *dp;
1107         struct tbl *table;
1108         int err;
1109
1110         if (!a[ODP_FLOW_ATTR_KEY])
1111                 return flush_flows(odp_header->dp_ifindex);
1112         err = flow_from_nlattrs(&key, a[ODP_FLOW_ATTR_KEY]);
1113         if (err)
1114                 return err;
1115
1116         dp = get_dp(odp_header->dp_ifindex);
1117         if (!dp)
1118                 return -ENODEV;
1119
1120         table = get_table_protected(dp);
1121         flow_node = tbl_lookup(table, &key, flow_hash(&key), flow_cmp);
1122         if (!flow_node)
1123                 return -ENOENT;
1124         flow = flow_cast(flow_node);
1125
1126         reply = odp_flow_cmd_alloc_info(flow);
1127         if (!reply)
1128                 return -ENOMEM;
1129
1130         err = tbl_remove(table, flow_node);
1131         if (err) {
1132                 kfree_skb(reply);
1133                 return err;
1134         }
1135
1136         err = odp_flow_cmd_fill_info(flow, dp, reply, info->snd_pid,
1137                                      info->snd_seq, 0, ODP_FLOW_CMD_DEL);
1138         BUG_ON(err < 0);
1139
1140         flow_deferred_free(flow);
1141
1142         genl_notify(reply, genl_info_net(info), info->snd_pid,
1143                     dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL);
1144         return 0;
1145 }
1146
1147 static int odp_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1148 {
1149         struct odp_header *odp_header = genlmsg_data(nlmsg_data(cb->nlh));
1150         struct datapath *dp;
1151
1152         dp = get_dp(odp_header->dp_ifindex);
1153         if (!dp)
1154                 return -ENODEV;
1155
1156         for (;;) {
1157                 struct tbl_node *flow_node;
1158                 struct sw_flow *flow;
1159                 u32 bucket, obj;
1160
1161                 bucket = cb->args[0];
1162                 obj = cb->args[1];
1163                 flow_node = tbl_next(get_table_protected(dp), &bucket, &obj);
1164                 if (!flow_node)
1165                         break;
1166
1167                 flow = flow_cast(flow_node);
1168                 if (odp_flow_cmd_fill_info(flow, dp, skb, NETLINK_CB(cb->skb).pid,
1169                                            cb->nlh->nlmsg_seq, NLM_F_MULTI,
1170                                            ODP_FLOW_CMD_NEW) < 0)
1171                         break;
1172
1173                 cb->args[0] = bucket;
1174                 cb->args[1] = obj;
1175         }
1176         return skb->len;
1177 }
1178
1179 static struct genl_ops dp_flow_genl_ops[] = {
1180         { .cmd = ODP_FLOW_CMD_NEW,
1181           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1182           .policy = flow_policy,
1183           .doit = odp_flow_cmd_new_or_set
1184         },
1185         { .cmd = ODP_FLOW_CMD_DEL,
1186           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1187           .policy = flow_policy,
1188           .doit = odp_flow_cmd_del
1189         },
1190         { .cmd = ODP_FLOW_CMD_GET,
1191           .flags = 0,               /* OK for unprivileged users. */
1192           .policy = flow_policy,
1193           .doit = odp_flow_cmd_get,
1194           .dumpit = odp_flow_cmd_dump
1195         },
1196         { .cmd = ODP_FLOW_CMD_SET,
1197           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1198           .policy = flow_policy,
1199           .doit = odp_flow_cmd_new_or_set,
1200         },
1201 };
1202
1203 static const struct nla_policy datapath_policy[ODP_DP_ATTR_MAX + 1] = {
1204 #ifdef HAVE_NLA_NUL_STRING
1205         [ODP_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1206 #endif
1207         [ODP_DP_ATTR_IPV4_FRAGS] = { .type = NLA_U32 },
1208         [ODP_DP_ATTR_SAMPLING] = { .type = NLA_U32 },
1209 };
1210
1211 static struct genl_family dp_datapath_genl_family = {
1212         .id = GENL_ID_GENERATE,
1213         .hdrsize = sizeof(struct odp_header),
1214         .name = ODP_DATAPATH_FAMILY,
1215         .version = 1,
1216         .maxattr = ODP_DP_ATTR_MAX
1217 };
1218
1219 static struct genl_multicast_group dp_datapath_multicast_group = {
1220         .name = ODP_DATAPATH_MCGROUP
1221 };
1222
1223 static int odp_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
1224                                 u32 pid, u32 seq, u32 flags, u8 cmd)
1225 {
1226         struct odp_header *odp_header;
1227         struct nlattr *nla;
1228         int err;
1229
1230         odp_header = genlmsg_put(skb, pid, seq, &dp_datapath_genl_family,
1231                                    flags, cmd);
1232         if (!odp_header)
1233                 goto error;
1234
1235         odp_header->dp_ifindex = dp->dp_ifindex;
1236
1237         rcu_read_lock();
1238         err = nla_put_string(skb, ODP_DP_ATTR_NAME, dp_name(dp));
1239         rcu_read_unlock();
1240         if (err)
1241                 goto nla_put_failure;
1242
1243         nla = nla_reserve(skb, ODP_DP_ATTR_STATS, sizeof(struct odp_stats));
1244         if (!nla)
1245                 goto nla_put_failure;
1246         get_dp_stats(dp, nla_data(nla));
1247
1248         NLA_PUT_U32(skb, ODP_DP_ATTR_IPV4_FRAGS,
1249                     dp->drop_frags ? ODP_DP_FRAG_DROP : ODP_DP_FRAG_ZERO);
1250
1251         if (dp->sflow_probability)
1252                 NLA_PUT_U32(skb, ODP_DP_ATTR_SAMPLING, dp->sflow_probability);
1253
1254         nla = nla_nest_start(skb, ODP_DP_ATTR_MCGROUPS);
1255         if (!nla)
1256                 goto nla_put_failure;
1257         NLA_PUT_U32(skb, ODP_PACKET_CMD_MISS, packet_mc_group(dp, ODP_PACKET_CMD_MISS));
1258         NLA_PUT_U32(skb, ODP_PACKET_CMD_ACTION, packet_mc_group(dp, ODP_PACKET_CMD_ACTION));
1259         NLA_PUT_U32(skb, ODP_PACKET_CMD_SAMPLE, packet_mc_group(dp, ODP_PACKET_CMD_SAMPLE));
1260         nla_nest_end(skb, nla);
1261
1262         return genlmsg_end(skb, odp_header);
1263
1264 nla_put_failure:
1265         genlmsg_cancel(skb, odp_header);
1266 error:
1267         return -EMSGSIZE;
1268 }
1269
1270 static struct sk_buff *odp_dp_cmd_build_info(struct datapath *dp, u32 pid,
1271                                              u32 seq, u8 cmd)
1272 {
1273         struct sk_buff *skb;
1274         int retval;
1275
1276         skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1277         if (!skb)
1278                 return ERR_PTR(-ENOMEM);
1279
1280         retval = odp_dp_cmd_fill_info(dp, skb, pid, seq, 0, cmd);
1281         if (retval < 0) {
1282                 kfree_skb(skb);
1283                 return ERR_PTR(retval);
1284         }
1285         return skb;
1286 }
1287
1288 static int odp_dp_cmd_validate(struct nlattr *a[ODP_DP_ATTR_MAX + 1])
1289 {
1290         if (a[ODP_DP_ATTR_IPV4_FRAGS]) {
1291                 u32 frags = nla_get_u32(a[ODP_DP_ATTR_IPV4_FRAGS]);
1292
1293                 if (frags != ODP_DP_FRAG_ZERO && frags != ODP_DP_FRAG_DROP)
1294                         return -EINVAL;
1295         }
1296
1297         return CHECK_NUL_STRING(a[ODP_DP_ATTR_NAME], IFNAMSIZ - 1);
1298 }
1299
1300 /* Called with genl_mutex and optionally with RTNL lock also. */
1301 static struct datapath *lookup_datapath(struct odp_header *odp_header, struct nlattr *a[ODP_DP_ATTR_MAX + 1])
1302 {
1303         struct datapath *dp;
1304
1305         if (!a[ODP_DP_ATTR_NAME])
1306                 dp = get_dp(odp_header->dp_ifindex);
1307         else {
1308                 struct vport *vport;
1309
1310                 rcu_read_lock();
1311                 vport = vport_locate(nla_data(a[ODP_DP_ATTR_NAME]));
1312                 dp = vport && vport->port_no == ODPP_LOCAL ? vport->dp : NULL;
1313                 rcu_read_unlock();
1314         }
1315         return dp ? dp : ERR_PTR(-ENODEV);
1316 }
1317
1318 /* Called with genl_mutex. */
1319 static void change_datapath(struct datapath *dp, struct nlattr *a[ODP_DP_ATTR_MAX + 1])
1320 {
1321         if (a[ODP_DP_ATTR_IPV4_FRAGS])
1322                 dp->drop_frags = nla_get_u32(a[ODP_DP_ATTR_IPV4_FRAGS]) == ODP_DP_FRAG_DROP;
1323         if (a[ODP_DP_ATTR_SAMPLING])
1324                 dp->sflow_probability = nla_get_u32(a[ODP_DP_ATTR_SAMPLING]);
1325 }
1326
1327 static int odp_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1328 {
1329         struct nlattr **a = info->attrs;
1330         struct vport_parms parms;
1331         struct sk_buff *reply;
1332         struct datapath *dp;
1333         struct vport *vport;
1334         int err;
1335
1336         err = -EINVAL;
1337         if (!a[ODP_DP_ATTR_NAME])
1338                 goto err;
1339
1340         err = odp_dp_cmd_validate(a);
1341         if (err)
1342                 goto err;
1343
1344         rtnl_lock();
1345         err = -ENODEV;
1346         if (!try_module_get(THIS_MODULE))
1347                 goto err_unlock_rtnl;
1348
1349         err = -ENOMEM;
1350         dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1351         if (dp == NULL)
1352                 goto err_put_module;
1353         INIT_LIST_HEAD(&dp->port_list);
1354
1355         /* Initialize kobject for bridge.  This will be added as
1356          * /sys/class/net/<devname>/brif later, if sysfs is enabled. */
1357         dp->ifobj.kset = NULL;
1358         kobject_init(&dp->ifobj, &dp_ktype);
1359
1360         /* Allocate table. */
1361         err = -ENOMEM;
1362         rcu_assign_pointer(dp->table, tbl_create(TBL_MIN_BUCKETS));
1363         if (!dp->table)
1364                 goto err_free_dp;
1365
1366         /* Set up our datapath device. */
1367         parms.name = nla_data(a[ODP_DP_ATTR_NAME]);
1368         parms.type = ODP_VPORT_TYPE_INTERNAL;
1369         parms.options = NULL;
1370         parms.dp = dp;
1371         parms.port_no = ODPP_LOCAL;
1372         vport = new_vport(&parms);
1373         if (IS_ERR(vport)) {
1374                 err = PTR_ERR(vport);
1375                 if (err == -EBUSY)
1376                         err = -EEXIST;
1377
1378                 goto err_destroy_table;
1379         }
1380         dp->dp_ifindex = vport_get_ifindex(vport);
1381
1382         dp->drop_frags = 0;
1383         dp->stats_percpu = alloc_percpu(struct dp_stats_percpu);
1384         if (!dp->stats_percpu) {
1385                 err = -ENOMEM;
1386                 goto err_destroy_local_port;
1387         }
1388
1389         change_datapath(dp, a);
1390
1391         reply = odp_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, ODP_DP_CMD_NEW);
1392         err = PTR_ERR(reply);
1393         if (IS_ERR(reply))
1394                 goto err_destroy_local_port;
1395
1396         list_add_tail(&dp->list_node, &dps);
1397         dp_sysfs_add_dp(dp);
1398
1399         rtnl_unlock();
1400
1401         genl_notify(reply, genl_info_net(info), info->snd_pid,
1402                     dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL);
1403         return 0;
1404
1405 err_destroy_local_port:
1406         dp_detach_port(get_vport_protected(dp, ODPP_LOCAL));
1407 err_destroy_table:
1408         tbl_destroy(get_table_protected(dp), NULL);
1409 err_free_dp:
1410         kfree(dp);
1411 err_put_module:
1412         module_put(THIS_MODULE);
1413 err_unlock_rtnl:
1414         rtnl_unlock();
1415 err:
1416         return err;
1417 }
1418
1419 static int odp_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1420 {
1421         struct vport *vport, *next_vport;
1422         struct sk_buff *reply;
1423         struct datapath *dp;
1424         int err;
1425
1426         err = odp_dp_cmd_validate(info->attrs);
1427         if (err)
1428                 goto exit;
1429
1430         rtnl_lock();
1431         dp = lookup_datapath(info->userhdr, info->attrs);
1432         err = PTR_ERR(dp);
1433         if (IS_ERR(dp))
1434                 goto exit_unlock;
1435
1436         reply = odp_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, ODP_DP_CMD_DEL);
1437         err = PTR_ERR(reply);
1438         if (IS_ERR(reply))
1439                 goto exit_unlock;
1440
1441         list_for_each_entry_safe (vport, next_vport, &dp->port_list, node)
1442                 if (vport->port_no != ODPP_LOCAL)
1443                         dp_detach_port(vport);
1444
1445         dp_sysfs_del_dp(dp);
1446         list_del(&dp->list_node);
1447         dp_detach_port(get_vport_protected(dp, ODPP_LOCAL));
1448
1449         /* rtnl_unlock() will wait until all the references to devices that
1450          * are pending unregistration have been dropped.  We do it here to
1451          * ensure that any internal devices (which contain DP pointers) are
1452          * fully destroyed before freeing the datapath.
1453          */
1454         rtnl_unlock();
1455
1456         call_rcu(&dp->rcu, destroy_dp_rcu);
1457         module_put(THIS_MODULE);
1458
1459         genl_notify(reply, genl_info_net(info), info->snd_pid,
1460                     dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL);
1461
1462         return 0;
1463
1464 exit_unlock:
1465         rtnl_unlock();
1466 exit:
1467         return err;
1468 }
1469
1470 static int odp_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1471 {
1472         struct sk_buff *reply;
1473         struct datapath *dp;
1474         int err;
1475
1476         err = odp_dp_cmd_validate(info->attrs);
1477         if (err)
1478                 return err;
1479
1480         dp = lookup_datapath(info->userhdr, info->attrs);
1481         if (IS_ERR(dp))
1482                 return PTR_ERR(dp);
1483
1484         change_datapath(dp, info->attrs);
1485
1486         reply = odp_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, ODP_DP_CMD_NEW);
1487         if (IS_ERR(reply)) {
1488                 err = PTR_ERR(reply);
1489                 netlink_set_err(INIT_NET_GENL_SOCK, 0,
1490                                 dp_datapath_multicast_group.id, err);
1491                 return 0;
1492         }
1493
1494         genl_notify(reply, genl_info_net(info), info->snd_pid,
1495                     dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL);
1496         return 0;
1497 }
1498
1499 static int odp_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1500 {
1501         struct sk_buff *reply;
1502         struct datapath *dp;
1503         int err;
1504
1505         err = odp_dp_cmd_validate(info->attrs);
1506         if (err)
1507                 return err;
1508
1509         dp = lookup_datapath(info->userhdr, info->attrs);
1510         if (IS_ERR(dp))
1511                 return PTR_ERR(dp);
1512
1513         reply = odp_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, ODP_DP_CMD_NEW);
1514         if (IS_ERR(reply))
1515                 return PTR_ERR(reply);
1516
1517         return genlmsg_reply(reply, info);
1518 }
1519
1520 static int odp_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1521 {
1522         struct datapath *dp;
1523         int skip = cb->args[0];
1524         int i = 0;
1525
1526         list_for_each_entry (dp, &dps, list_node) {
1527                 if (i < skip)
1528                         continue;
1529                 if (odp_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).pid,
1530                                          cb->nlh->nlmsg_seq, NLM_F_MULTI,
1531                                          ODP_DP_CMD_NEW) < 0)
1532                         break;
1533                 i++;
1534         }
1535
1536         cb->args[0] = i;
1537
1538         return skb->len;
1539 }
1540
1541 static struct genl_ops dp_datapath_genl_ops[] = {
1542         { .cmd = ODP_DP_CMD_NEW,
1543           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1544           .policy = datapath_policy,
1545           .doit = odp_dp_cmd_new
1546         },
1547         { .cmd = ODP_DP_CMD_DEL,
1548           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1549           .policy = datapath_policy,
1550           .doit = odp_dp_cmd_del
1551         },
1552         { .cmd = ODP_DP_CMD_GET,
1553           .flags = 0,               /* OK for unprivileged users. */
1554           .policy = datapath_policy,
1555           .doit = odp_dp_cmd_get,
1556           .dumpit = odp_dp_cmd_dump
1557         },
1558         { .cmd = ODP_DP_CMD_SET,
1559           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1560           .policy = datapath_policy,
1561           .doit = odp_dp_cmd_set,
1562         },
1563 };
1564
1565 static const struct nla_policy vport_policy[ODP_VPORT_ATTR_MAX + 1] = {
1566 #ifdef HAVE_NLA_NUL_STRING
1567         [ODP_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1568         [ODP_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
1569         [ODP_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
1570         [ODP_VPORT_ATTR_STATS] = { .len = sizeof(struct rtnl_link_stats64) },
1571         [ODP_VPORT_ATTR_ADDRESS] = { .len = ETH_ALEN },
1572 #else
1573         [ODP_VPORT_ATTR_STATS] = { .minlen = sizeof(struct rtnl_link_stats64) },
1574         [ODP_VPORT_ATTR_ADDRESS] = { .minlen = ETH_ALEN },
1575 #endif
1576         [ODP_VPORT_ATTR_MTU] = { .type = NLA_U32 },
1577         [ODP_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
1578 };
1579
1580 static struct genl_family dp_vport_genl_family = {
1581         .id = GENL_ID_GENERATE,
1582         .hdrsize = sizeof(struct odp_header),
1583         .name = ODP_VPORT_FAMILY,
1584         .version = 1,
1585         .maxattr = ODP_VPORT_ATTR_MAX
1586 };
1587
1588 static struct genl_multicast_group dp_vport_multicast_group = {
1589         .name = ODP_VPORT_MCGROUP
1590 };
1591
1592 /* Called with RTNL lock or RCU read lock. */
1593 static int odp_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
1594                                    u32 pid, u32 seq, u32 flags, u8 cmd)
1595 {
1596         struct odp_header *odp_header;
1597         struct nlattr *nla;
1598         int ifindex, iflink;
1599         int mtu;
1600         int err;
1601
1602         odp_header = genlmsg_put(skb, pid, seq, &dp_vport_genl_family,
1603                                  flags, cmd);
1604         if (!odp_header)
1605                 return -EMSGSIZE;
1606
1607         odp_header->dp_ifindex = vport->dp->dp_ifindex;
1608
1609         NLA_PUT_U32(skb, ODP_VPORT_ATTR_PORT_NO, vport->port_no);
1610         NLA_PUT_U32(skb, ODP_VPORT_ATTR_TYPE, vport_get_type(vport));
1611         NLA_PUT_STRING(skb, ODP_VPORT_ATTR_NAME, vport_get_name(vport));
1612
1613         nla = nla_reserve(skb, ODP_VPORT_ATTR_STATS, sizeof(struct rtnl_link_stats64));
1614         if (!nla)
1615                 goto nla_put_failure;
1616         if (vport_get_stats(vport, nla_data(nla)))
1617                 __skb_trim(skb, skb->len - nla->nla_len);
1618
1619         NLA_PUT(skb, ODP_VPORT_ATTR_ADDRESS, ETH_ALEN, vport_get_addr(vport));
1620
1621         mtu = vport_get_mtu(vport);
1622         if (mtu)
1623                 NLA_PUT_U32(skb, ODP_VPORT_ATTR_MTU, mtu);
1624
1625         err = vport_get_options(vport, skb);
1626         if (err == -EMSGSIZE)
1627                 goto error;
1628
1629         ifindex = vport_get_ifindex(vport);
1630         if (ifindex > 0)
1631                 NLA_PUT_U32(skb, ODP_VPORT_ATTR_IFINDEX, ifindex);
1632
1633         iflink = vport_get_iflink(vport);
1634         if (iflink > 0)
1635                 NLA_PUT_U32(skb, ODP_VPORT_ATTR_IFLINK, iflink);
1636
1637         return genlmsg_end(skb, odp_header);
1638
1639 nla_put_failure:
1640         err = -EMSGSIZE;
1641 error:
1642         genlmsg_cancel(skb, odp_header);
1643         return err;
1644 }
1645
1646 /* Called with RTNL lock or RCU read lock. */
1647 static struct sk_buff *odp_vport_cmd_build_info(struct vport *vport, u32 pid,
1648                                                 u32 seq, u8 cmd)
1649 {
1650         struct sk_buff *skb;
1651         int retval;
1652
1653         skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
1654         if (!skb)
1655                 return ERR_PTR(-ENOMEM);
1656
1657         retval = odp_vport_cmd_fill_info(vport, skb, pid, seq, 0, cmd);
1658         if (retval < 0) {
1659                 kfree_skb(skb);
1660                 return ERR_PTR(retval);
1661         }
1662         return skb;
1663 }
1664
1665 static int odp_vport_cmd_validate(struct nlattr *a[ODP_VPORT_ATTR_MAX + 1])
1666 {
1667         return CHECK_NUL_STRING(a[ODP_VPORT_ATTR_NAME], IFNAMSIZ - 1);
1668 }
1669
1670 /* Called with RTNL lock or RCU read lock. */
1671 static struct vport *lookup_vport(struct odp_header *odp_header,
1672                                   struct nlattr *a[ODP_VPORT_ATTR_MAX + 1])
1673 {
1674         struct datapath *dp;
1675         struct vport *vport;
1676
1677         if (a[ODP_VPORT_ATTR_NAME]) {
1678                 vport = vport_locate(nla_data(a[ODP_VPORT_ATTR_NAME]));
1679                 if (!vport)
1680                         return ERR_PTR(-ENODEV);
1681                 return vport;
1682         } else if (a[ODP_VPORT_ATTR_PORT_NO]) {
1683                 u32 port_no = nla_get_u32(a[ODP_VPORT_ATTR_PORT_NO]);
1684
1685                 if (port_no >= DP_MAX_PORTS)
1686                         return ERR_PTR(-EFBIG);
1687
1688                 dp = get_dp(odp_header->dp_ifindex);
1689                 if (!dp)
1690                         return ERR_PTR(-ENODEV);
1691
1692                 vport = get_vport_protected(dp, port_no);
1693                 if (!vport)
1694                         return ERR_PTR(-ENOENT);
1695                 return vport;
1696         } else
1697                 return ERR_PTR(-EINVAL);
1698 }
1699
1700 /* Called with RTNL lock. */
1701 static int change_vport(struct vport *vport, struct nlattr *a[ODP_VPORT_ATTR_MAX + 1])
1702 {
1703         int err = 0;
1704         if (a[ODP_VPORT_ATTR_STATS])
1705                 err = vport_set_stats(vport, nla_data(a[ODP_VPORT_ATTR_STATS]));
1706         if (!err && a[ODP_VPORT_ATTR_ADDRESS])
1707                 err = vport_set_addr(vport, nla_data(a[ODP_VPORT_ATTR_ADDRESS]));
1708         if (!err && a[ODP_VPORT_ATTR_MTU])
1709                 err = vport_set_mtu(vport, nla_get_u32(a[ODP_VPORT_ATTR_MTU]));
1710         return err;
1711 }
1712
1713 static int odp_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1714 {
1715         struct nlattr **a = info->attrs;
1716         struct odp_header *odp_header = info->userhdr;
1717         struct vport_parms parms;
1718         struct sk_buff *reply;
1719         struct vport *vport;
1720         struct datapath *dp;
1721         u32 port_no;
1722         int err;
1723
1724         err = -EINVAL;
1725         if (!a[ODP_VPORT_ATTR_NAME] || !a[ODP_VPORT_ATTR_TYPE])
1726                 goto exit;
1727
1728         err = odp_vport_cmd_validate(a);
1729         if (err)
1730                 goto exit;
1731
1732         rtnl_lock();
1733         dp = get_dp(odp_header->dp_ifindex);
1734         err = -ENODEV;
1735         if (!dp)
1736                 goto exit_unlock;
1737
1738         if (a[ODP_VPORT_ATTR_PORT_NO]) {
1739                 port_no = nla_get_u32(a[ODP_VPORT_ATTR_PORT_NO]);
1740
1741                 err = -EFBIG;
1742                 if (port_no >= DP_MAX_PORTS)
1743                         goto exit_unlock;
1744
1745                 vport = get_vport_protected(dp, port_no);
1746                 err = -EBUSY;
1747                 if (vport)
1748                         goto exit_unlock;
1749         } else {
1750                 for (port_no = 1; ; port_no++) {
1751                         if (port_no >= DP_MAX_PORTS) {
1752                                 err = -EFBIG;
1753                                 goto exit_unlock;
1754                         }
1755                         vport = get_vport_protected(dp, port_no);
1756                         if (!vport)
1757                                 break;
1758                 }
1759         }
1760
1761         parms.name = nla_data(a[ODP_VPORT_ATTR_NAME]);
1762         parms.type = nla_get_u32(a[ODP_VPORT_ATTR_TYPE]);
1763         parms.options = a[ODP_VPORT_ATTR_OPTIONS];
1764         parms.dp = dp;
1765         parms.port_no = port_no;
1766
1767         vport = new_vport(&parms);
1768         err = PTR_ERR(vport);
1769         if (IS_ERR(vport))
1770                 goto exit_unlock;
1771
1772         set_internal_devs_mtu(dp);
1773         dp_sysfs_add_if(vport);
1774
1775         err = change_vport(vport, a);
1776         if (!err) {
1777                 reply = odp_vport_cmd_build_info(vport, info->snd_pid,
1778                                                  info->snd_seq, ODP_VPORT_CMD_NEW);
1779                 if (IS_ERR(reply))
1780                         err = PTR_ERR(reply);
1781         }
1782         if (err) {
1783                 dp_detach_port(vport);
1784                 goto exit_unlock;
1785         }
1786         genl_notify(reply, genl_info_net(info), info->snd_pid,
1787                     dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1788
1789
1790 exit_unlock:
1791         rtnl_unlock();
1792 exit:
1793         return err;
1794 }
1795
1796 static int odp_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
1797 {
1798         struct nlattr **a = info->attrs;
1799         struct sk_buff *reply;
1800         struct vport *vport;
1801         int err;
1802
1803         err = odp_vport_cmd_validate(a);
1804         if (err)
1805                 goto exit;
1806
1807         rtnl_lock();
1808         vport = lookup_vport(info->userhdr, a);
1809         err = PTR_ERR(vport);
1810         if (IS_ERR(vport))
1811                 goto exit_unlock;
1812
1813         err = 0;
1814         if (a[ODP_VPORT_ATTR_OPTIONS])
1815                 err = vport_set_options(vport, a[ODP_VPORT_ATTR_OPTIONS]);
1816         if (!err)
1817                 err = change_vport(vport, a);
1818
1819         reply = odp_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
1820                                          ODP_VPORT_CMD_NEW);
1821         if (IS_ERR(reply)) {
1822                 err = PTR_ERR(reply);
1823                 netlink_set_err(INIT_NET_GENL_SOCK, 0,
1824                                 dp_vport_multicast_group.id, err);
1825                 return 0;
1826         }
1827
1828         genl_notify(reply, genl_info_net(info), info->snd_pid,
1829                     dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1830
1831 exit_unlock:
1832         rtnl_unlock();
1833 exit:
1834         return err;
1835 }
1836
1837 static int odp_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
1838 {
1839         struct nlattr **a = info->attrs;
1840         struct sk_buff *reply;
1841         struct vport *vport;
1842         int err;
1843
1844         err = odp_vport_cmd_validate(a);
1845         if (err)
1846                 goto exit;
1847
1848         rtnl_lock();
1849         vport = lookup_vport(info->userhdr, a);
1850         err = PTR_ERR(vport);
1851         if (IS_ERR(vport))
1852                 goto exit_unlock;
1853
1854         if (vport->port_no == ODPP_LOCAL) {
1855                 err = -EINVAL;
1856                 goto exit_unlock;
1857         }
1858
1859         reply = odp_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
1860                                          ODP_VPORT_CMD_DEL);
1861         err = PTR_ERR(reply);
1862         if (IS_ERR(reply))
1863                 goto exit_unlock;
1864
1865         err = dp_detach_port(vport);
1866
1867         genl_notify(reply, genl_info_net(info), info->snd_pid,
1868                     dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1869
1870 exit_unlock:
1871         rtnl_unlock();
1872 exit:
1873         return err;
1874 }
1875
1876 static int odp_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
1877 {
1878         struct nlattr **a = info->attrs;
1879         struct odp_header *odp_header = info->userhdr;
1880         struct sk_buff *reply;
1881         struct vport *vport;
1882         int err;
1883
1884         err = odp_vport_cmd_validate(a);
1885         if (err)
1886                 goto exit;
1887
1888         rcu_read_lock();
1889         vport = lookup_vport(odp_header, a);
1890         err = PTR_ERR(vport);
1891         if (IS_ERR(vport))
1892                 goto exit_unlock;
1893
1894         reply = odp_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
1895                                          ODP_VPORT_CMD_NEW);
1896         err = PTR_ERR(reply);
1897         if (IS_ERR(reply))
1898                 goto exit_unlock;
1899
1900         err = genlmsg_reply(reply, info);
1901
1902 exit_unlock:
1903         rcu_read_unlock();
1904 exit:
1905         return err;
1906 }
1907
1908 static int odp_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1909 {
1910         struct odp_header *odp_header = genlmsg_data(nlmsg_data(cb->nlh));
1911         struct datapath *dp;
1912         u32 port_no;
1913         int retval;
1914
1915         dp = get_dp(odp_header->dp_ifindex);
1916         if (!dp)
1917                 return -ENODEV;
1918
1919         rcu_read_lock();
1920         for (port_no = cb->args[0]; port_no < DP_MAX_PORTS; port_no++) {
1921                 struct vport *vport;
1922
1923                 vport = get_vport_protected(dp, port_no);
1924                 if (!vport)
1925                         continue;
1926
1927                 if (odp_vport_cmd_fill_info(vport, skb, NETLINK_CB(cb->skb).pid,
1928                                             cb->nlh->nlmsg_seq, NLM_F_MULTI,
1929                                             ODP_VPORT_CMD_NEW) < 0)
1930                         break;
1931         }
1932         rcu_read_unlock();
1933
1934         cb->args[0] = port_no;
1935         retval = skb->len;
1936
1937         return retval;
1938 }
1939
1940 static struct genl_ops dp_vport_genl_ops[] = {
1941         { .cmd = ODP_VPORT_CMD_NEW,
1942           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1943           .policy = vport_policy,
1944           .doit = odp_vport_cmd_new
1945         },
1946         { .cmd = ODP_VPORT_CMD_DEL,
1947           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1948           .policy = vport_policy,
1949           .doit = odp_vport_cmd_del
1950         },
1951         { .cmd = ODP_VPORT_CMD_GET,
1952           .flags = 0,               /* OK for unprivileged users. */
1953           .policy = vport_policy,
1954           .doit = odp_vport_cmd_get,
1955           .dumpit = odp_vport_cmd_dump
1956         },
1957         { .cmd = ODP_VPORT_CMD_SET,
1958           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1959           .policy = vport_policy,
1960           .doit = odp_vport_cmd_set,
1961         },
1962 };
1963
1964 struct genl_family_and_ops {
1965         struct genl_family *family;
1966         struct genl_ops *ops;
1967         int n_ops;
1968         struct genl_multicast_group *group;
1969 };
1970
1971 static const struct genl_family_and_ops dp_genl_families[] = {
1972         { &dp_datapath_genl_family,
1973           dp_datapath_genl_ops, ARRAY_SIZE(dp_datapath_genl_ops),
1974           &dp_datapath_multicast_group },
1975         { &dp_vport_genl_family,
1976           dp_vport_genl_ops, ARRAY_SIZE(dp_vport_genl_ops),
1977           &dp_vport_multicast_group },
1978         { &dp_flow_genl_family,
1979           dp_flow_genl_ops, ARRAY_SIZE(dp_flow_genl_ops),
1980           &dp_flow_multicast_group },
1981         { &dp_packet_genl_family,
1982           dp_packet_genl_ops, ARRAY_SIZE(dp_packet_genl_ops),
1983           NULL },
1984 };
1985
1986 static void dp_unregister_genl(int n_families)
1987 {
1988         int i;
1989
1990         for (i = 0; i < n_families; i++) {
1991                 genl_unregister_family(dp_genl_families[i].family);
1992         }
1993 }
1994
1995 static int dp_register_genl(void)
1996 {
1997         int n_registered;
1998         int err;
1999         int i;
2000
2001         n_registered = 0;
2002         for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
2003                 const struct genl_family_and_ops *f = &dp_genl_families[i];
2004
2005                 err = genl_register_family_with_ops(f->family, f->ops,
2006                                                     f->n_ops);
2007                 if (err)
2008                         goto error;
2009                 n_registered++;
2010
2011                 if (f->group) {
2012                         err = genl_register_mc_group(f->family, f->group);
2013                         if (err)
2014                                 goto error;
2015                 }
2016         }
2017
2018         err = packet_register_mc_groups();
2019         if (err)
2020                 goto error;
2021         return 0;
2022
2023 error:
2024         dp_unregister_genl(n_registered);
2025         return err;
2026 }
2027
2028 static int __init dp_init(void)
2029 {
2030         struct sk_buff *dummy_skb;
2031         int err;
2032
2033         BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > sizeof(dummy_skb->cb));
2034
2035         printk("Open vSwitch %s, built "__DATE__" "__TIME__"\n", VERSION BUILDNR);
2036
2037         err = flow_init();
2038         if (err)
2039                 goto error;
2040
2041         err = vport_init();
2042         if (err)
2043                 goto error_flow_exit;
2044
2045         err = register_netdevice_notifier(&dp_device_notifier);
2046         if (err)
2047                 goto error_vport_exit;
2048
2049         err = dp_register_genl();
2050         if (err < 0)
2051                 goto error_unreg_notifier;
2052
2053         return 0;
2054
2055 error_unreg_notifier:
2056         unregister_netdevice_notifier(&dp_device_notifier);
2057 error_vport_exit:
2058         vport_exit();
2059 error_flow_exit:
2060         flow_exit();
2061 error:
2062         return err;
2063 }
2064
2065 static void dp_cleanup(void)
2066 {
2067         rcu_barrier();
2068         dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
2069         unregister_netdevice_notifier(&dp_device_notifier);
2070         vport_exit();
2071         flow_exit();
2072 }
2073
2074 module_init(dp_init);
2075 module_exit(dp_cleanup);
2076
2077 MODULE_DESCRIPTION("Open vSwitch switching datapath");
2078 MODULE_LICENSE("GPL");