ebfc2e9941e16a19f15887ad172432eef43e17d4
[sliver-openvswitch.git] / datapath / datapath.c
1 /*
2  * Copyright (c) 2007, 2008, 2009, 2010, 2011 Nicira Networks.
3  * Distributed under the terms of the GNU GPL version 2.
4  *
5  * Significant portions of this file may be copied from parts of the Linux
6  * kernel, by Linus Torvalds and others.
7  */
8
9 /* Functions for managing the dp interface/device. */
10
11 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12
13 #include <linux/init.h>
14 #include <linux/module.h>
15 #include <linux/if_arp.h>
16 #include <linux/if_vlan.h>
17 #include <linux/in.h>
18 #include <linux/ip.h>
19 #include <linux/jhash.h>
20 #include <linux/delay.h>
21 #include <linux/time.h>
22 #include <linux/etherdevice.h>
23 #include <linux/genetlink.h>
24 #include <linux/kernel.h>
25 #include <linux/kthread.h>
26 #include <linux/mutex.h>
27 #include <linux/percpu.h>
28 #include <linux/rcupdate.h>
29 #include <linux/tcp.h>
30 #include <linux/udp.h>
31 #include <linux/version.h>
32 #include <linux/ethtool.h>
33 #include <linux/wait.h>
34 #include <asm/system.h>
35 #include <asm/div64.h>
36 #include <asm/bug.h>
37 #include <linux/highmem.h>
38 #include <linux/netfilter_bridge.h>
39 #include <linux/netfilter_ipv4.h>
40 #include <linux/inetdevice.h>
41 #include <linux/list.h>
42 #include <linux/rculist.h>
43 #include <linux/dmi.h>
44 #include <net/inet_ecn.h>
45 #include <net/genetlink.h>
46
47 #include "openvswitch/datapath-protocol.h"
48 #include "checksum.h"
49 #include "datapath.h"
50 #include "actions.h"
51 #include "flow.h"
52 #include "loop_counter.h"
53 #include "table.h"
54 #include "vlan.h"
55 #include "vport-internal_dev.h"
56
57 int (*dp_ioctl_hook)(struct net_device *dev, struct ifreq *rq, int cmd);
58 EXPORT_SYMBOL(dp_ioctl_hook);
59
60 /**
61  * DOC: Locking:
62  *
63  * Writes to device state (add/remove datapath, port, set operations on vports,
64  * etc.) are protected by RTNL.
65  *
66  * Writes to other state (flow table modifications, set miscellaneous datapath
67  * parameters such as drop frags, etc.) are protected by genl_mutex.  The RTNL
68  * lock nests inside genl_mutex.
69  *
70  * Reads are protected by RCU.
71  *
72  * There are a few special cases (mostly stats) that have their own
73  * synchronization but they nest under all of above and don't interact with
74  * each other.
75  */
76
77 /* Global list of datapaths to enable dumping them all out.
78  * Protected by genl_mutex.
79  */
80 static LIST_HEAD(dps);
81
82 static struct vport *new_vport(const struct vport_parms *);
83 static int queue_control_packets(struct datapath *, struct sk_buff *,
84                                  const struct dp_upcall_info *);
85
86 /* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */
87 struct datapath *get_dp(int dp_ifindex)
88 {
89         struct datapath *dp = NULL;
90         struct net_device *dev;
91
92         rcu_read_lock();
93         dev = dev_get_by_index_rcu(&init_net, dp_ifindex);
94         if (dev) {
95                 struct vport *vport = internal_dev_get_vport(dev);
96                 if (vport)
97                         dp = vport->dp;
98         }
99         rcu_read_unlock();
100
101         return dp;
102 }
103 EXPORT_SYMBOL_GPL(get_dp);
104
105 /* Must be called with genl_mutex. */
106 static struct tbl *get_table_protected(struct datapath *dp)
107 {
108         return rcu_dereference_protected(dp->table, lockdep_genl_is_held());
109 }
110
111 /* Must be called with rcu_read_lock or RTNL lock. */
112 static struct vport *get_vport_protected(struct datapath *dp, u16 port_no)
113 {
114         return rcu_dereference_rtnl(dp->ports[port_no]);
115 }
116
117 /* Must be called with rcu_read_lock or RTNL lock. */
118 const char *dp_name(const struct datapath *dp)
119 {
120         return vport_get_name(rcu_dereference_rtnl(dp->ports[ODPP_LOCAL]));
121 }
122
123 static inline size_t br_nlmsg_size(void)
124 {
125         return NLMSG_ALIGN(sizeof(struct ifinfomsg))
126                + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
127                + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
128                + nla_total_size(4) /* IFLA_MASTER */
129                + nla_total_size(4) /* IFLA_MTU */
130                + nla_total_size(4) /* IFLA_LINK */
131                + nla_total_size(1); /* IFLA_OPERSTATE */
132 }
133
134 /* Caller must hold RTNL lock. */
135 static int dp_fill_ifinfo(struct sk_buff *skb,
136                           const struct vport *port,
137                           int event, unsigned int flags)
138 {
139         struct datapath *dp = port->dp;
140         int ifindex = vport_get_ifindex(port);
141         int iflink = vport_get_iflink(port);
142         struct ifinfomsg *hdr;
143         struct nlmsghdr *nlh;
144
145         if (ifindex < 0)
146                 return ifindex;
147
148         if (iflink < 0)
149                 return iflink;
150
151         nlh = nlmsg_put(skb, 0, 0, event, sizeof(*hdr), flags);
152         if (nlh == NULL)
153                 return -EMSGSIZE;
154
155         hdr = nlmsg_data(nlh);
156         hdr->ifi_family = AF_BRIDGE;
157         hdr->__ifi_pad = 0;
158         hdr->ifi_type = ARPHRD_ETHER;
159         hdr->ifi_index = ifindex;
160         hdr->ifi_flags = vport_get_flags(port);
161         hdr->ifi_change = 0;
162
163         NLA_PUT_STRING(skb, IFLA_IFNAME, vport_get_name(port));
164         NLA_PUT_U32(skb, IFLA_MASTER,
165                 vport_get_ifindex(get_vport_protected(dp, ODPP_LOCAL)));
166         NLA_PUT_U32(skb, IFLA_MTU, vport_get_mtu(port));
167 #ifdef IFLA_OPERSTATE
168         NLA_PUT_U8(skb, IFLA_OPERSTATE,
169                    vport_is_running(port)
170                         ? vport_get_operstate(port)
171                         : IF_OPER_DOWN);
172 #endif
173
174         NLA_PUT(skb, IFLA_ADDRESS, ETH_ALEN, vport_get_addr(port));
175
176         if (ifindex != iflink)
177                 NLA_PUT_U32(skb, IFLA_LINK,iflink);
178
179         return nlmsg_end(skb, nlh);
180
181 nla_put_failure:
182         nlmsg_cancel(skb, nlh);
183         return -EMSGSIZE;
184 }
185
186 /* Caller must hold RTNL lock. */
187 static void dp_ifinfo_notify(int event, struct vport *port)
188 {
189         struct sk_buff *skb;
190         int err = -ENOBUFS;
191
192         skb = nlmsg_new(br_nlmsg_size(), GFP_KERNEL);
193         if (skb == NULL)
194                 goto errout;
195
196         err = dp_fill_ifinfo(skb, port, event, 0);
197         if (err < 0) {
198                 /* -EMSGSIZE implies BUG in br_nlmsg_size() */
199                 WARN_ON(err == -EMSGSIZE);
200                 kfree_skb(skb);
201                 goto errout;
202         }
203         rtnl_notify(skb, &init_net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
204         return;
205 errout:
206         if (err < 0)
207                 rtnl_set_sk_err(&init_net, RTNLGRP_LINK, err);
208 }
209
210 static void release_dp(struct kobject *kobj)
211 {
212         struct datapath *dp = container_of(kobj, struct datapath, ifobj);
213         kfree(dp);
214 }
215
216 static struct kobj_type dp_ktype = {
217         .release = release_dp
218 };
219
220 static void destroy_dp_rcu(struct rcu_head *rcu)
221 {
222         struct datapath *dp = container_of(rcu, struct datapath, rcu);
223
224         tbl_destroy((struct tbl __force *)dp->table, flow_free_tbl);
225         free_percpu(dp->stats_percpu);
226         kobject_put(&dp->ifobj);
227 }
228
229 /* Called with RTNL lock and genl_lock. */
230 static struct vport *new_vport(const struct vport_parms *parms)
231 {
232         struct vport *vport;
233
234         vport = vport_add(parms);
235         if (!IS_ERR(vport)) {
236                 struct datapath *dp = parms->dp;
237
238                 rcu_assign_pointer(dp->ports[parms->port_no], vport);
239                 list_add(&vport->node, &dp->port_list);
240
241                 dp_ifinfo_notify(RTM_NEWLINK, vport);
242         }
243
244         return vport;
245 }
246
247 /* Called with RTNL lock. */
248 int dp_detach_port(struct vport *p)
249 {
250         ASSERT_RTNL();
251
252         if (p->port_no != ODPP_LOCAL)
253                 dp_sysfs_del_if(p);
254         dp_ifinfo_notify(RTM_DELLINK, p);
255
256         /* First drop references to device. */
257         list_del(&p->node);
258         rcu_assign_pointer(p->dp->ports[p->port_no], NULL);
259
260         /* Then destroy it. */
261         return vport_del(p);
262 }
263
264 /* Must be called with rcu_read_lock. */
265 void dp_process_received_packet(struct vport *p, struct sk_buff *skb)
266 {
267         struct datapath *dp = p->dp;
268         struct dp_stats_percpu *stats;
269         int stats_counter_off;
270         struct sw_flow_actions *acts;
271         struct loop_counter *loop;
272         int error;
273
274         OVS_CB(skb)->vport = p;
275
276         if (!OVS_CB(skb)->flow) {
277                 struct sw_flow_key key;
278                 struct tbl_node *flow_node;
279                 bool is_frag;
280
281                 /* Extract flow from 'skb' into 'key'. */
282                 error = flow_extract(skb, p->port_no, &key, &is_frag);
283                 if (unlikely(error)) {
284                         kfree_skb(skb);
285                         return;
286                 }
287
288                 if (is_frag && dp->drop_frags) {
289                         kfree_skb(skb);
290                         stats_counter_off = offsetof(struct dp_stats_percpu, n_frags);
291                         goto out;
292                 }
293
294                 /* Look up flow. */
295                 flow_node = tbl_lookup(rcu_dereference(dp->table), &key,
296                                         flow_hash(&key), flow_cmp);
297                 if (unlikely(!flow_node)) {
298                         struct dp_upcall_info upcall;
299
300                         upcall.cmd = ODP_PACKET_CMD_MISS;
301                         upcall.key = &key;
302                         upcall.userdata = 0;
303                         upcall.sample_pool = 0;
304                         upcall.actions = NULL;
305                         upcall.actions_len = 0;
306                         dp_upcall(dp, skb, &upcall);
307                         stats_counter_off = offsetof(struct dp_stats_percpu, n_missed);
308                         goto out;
309                 }
310
311                 OVS_CB(skb)->flow = flow_cast(flow_node);
312         }
313
314         stats_counter_off = offsetof(struct dp_stats_percpu, n_hit);
315         flow_used(OVS_CB(skb)->flow, skb);
316
317         acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts);
318
319         /* Check whether we've looped too much. */
320         loop = loop_get_counter();
321         if (unlikely(++loop->count > MAX_LOOPS))
322                 loop->looping = true;
323         if (unlikely(loop->looping)) {
324                 loop_suppress(dp, acts);
325                 kfree_skb(skb);
326                 goto out_loop;
327         }
328
329         /* Execute actions. */
330         execute_actions(dp, skb, &OVS_CB(skb)->flow->key, acts->actions,
331                         acts->actions_len);
332
333         /* Check whether sub-actions looped too much. */
334         if (unlikely(loop->looping))
335                 loop_suppress(dp, acts);
336
337 out_loop:
338         /* Decrement loop counter. */
339         if (!--loop->count)
340                 loop->looping = false;
341         loop_put_counter();
342
343 out:
344         /* Update datapath statistics. */
345         local_bh_disable();
346         stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
347
348         write_seqcount_begin(&stats->seqlock);
349         (*(u64 *)((u8 *)stats + stats_counter_off))++;
350         write_seqcount_end(&stats->seqlock);
351
352         local_bh_enable();
353 }
354
355 static void copy_and_csum_skb(struct sk_buff *skb, void *to)
356 {
357         u16 csum_start, csum_offset;
358         __wsum csum;
359
360         get_skb_csum_pointers(skb, &csum_start, &csum_offset);
361         csum_start -= skb_headroom(skb);
362         BUG_ON(csum_start >= skb_headlen(skb));
363
364         skb_copy_bits(skb, 0, to, csum_start);
365
366         csum = skb_copy_and_csum_bits(skb, csum_start, to + csum_start,
367                                       skb->len - csum_start, 0);
368         *(__sum16 *)(to + csum_start + csum_offset) = csum_fold(csum);
369 }
370
371 static struct genl_family dp_packet_genl_family = {
372         .id = GENL_ID_GENERATE,
373         .hdrsize = sizeof(struct odp_header),
374         .name = ODP_PACKET_FAMILY,
375         .version = 1,
376         .maxattr = ODP_PACKET_ATTR_MAX
377 };
378
379 /* Generic Netlink multicast groups for upcalls.
380  *
381  * We really want three unique multicast groups per datapath, but we can't even
382  * get one, because genl_register_mc_group() takes genl_lock, which is also
383  * held during Generic Netlink message processing, so trying to acquire
384  * multicast groups during ODP_DP_NEW processing deadlocks.  Instead, we
385  * preallocate a few groups and use them round-robin for datapaths.  Collision
386  * isn't fatal--multicast listeners should check that the family is the one
387  * that they want and discard others--but it wastes time and memory to receive
388  * unwanted messages.
389  */
390 #define PACKET_N_MC_GROUPS 16
391 static struct genl_multicast_group packet_mc_groups[PACKET_N_MC_GROUPS];
392
393 static u32 packet_mc_group(struct datapath *dp, u8 cmd)
394 {
395         u32 idx;
396         BUILD_BUG_ON_NOT_POWER_OF_2(PACKET_N_MC_GROUPS);
397
398         idx = jhash_2words(dp->dp_ifindex, cmd, 0) & (PACKET_N_MC_GROUPS - 1);
399         return packet_mc_groups[idx].id;
400 }
401
402 static int packet_register_mc_groups(void)
403 {
404         int i;
405
406         for (i = 0; i < PACKET_N_MC_GROUPS; i++) {
407                 struct genl_multicast_group *group = &packet_mc_groups[i];
408                 int error;
409
410                 sprintf(group->name, "packet%d", i);
411                 error = genl_register_mc_group(&dp_packet_genl_family, group);
412                 if (error)
413                         return error;
414         }
415         return 0;
416 }
417
418 int dp_upcall(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info)
419 {
420         struct dp_stats_percpu *stats;
421         int err;
422
423         WARN_ON_ONCE(skb_shared(skb));
424
425         forward_ip_summed(skb);
426
427         err = vswitch_skb_checksum_setup(skb);
428         if (err)
429                 goto err_kfree_skb;
430
431         /* Break apart GSO packets into their component pieces.  Otherwise
432          * userspace may try to stuff a 64kB packet into a 1500-byte MTU. */
433         if (skb_is_gso(skb)) {
434                 struct sk_buff *nskb = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM);
435                 
436                 kfree_skb(skb);
437                 skb = nskb;
438                 if (IS_ERR(skb)) {
439                         err = PTR_ERR(skb);
440                         goto err;
441                 }
442         }
443
444         err = queue_control_packets(dp, skb, upcall_info);
445         if (err)
446                 goto err;
447
448         return 0;
449
450 err_kfree_skb:
451         kfree_skb(skb);
452 err:
453         local_bh_disable();
454         stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
455
456         write_seqcount_begin(&stats->seqlock);
457         stats->n_lost++;
458         write_seqcount_end(&stats->seqlock);
459
460         local_bh_enable();
461
462         return err;
463 }
464
465 /* Send each packet in the 'skb' list to userspace for 'dp' as directed by
466  * 'upcall_info'.  There will be only one packet unless we broke up a GSO
467  * packet.
468  */
469 static int queue_control_packets(struct datapath *dp, struct sk_buff *skb,
470                                  const struct dp_upcall_info *upcall_info)
471 {
472         u32 group = packet_mc_group(dp, upcall_info->cmd);
473         struct sk_buff *nskb;
474         int port_no;
475         int err;
476
477         if (OVS_CB(skb)->vport)
478                 port_no = OVS_CB(skb)->vport->port_no;
479         else
480                 port_no = ODPP_LOCAL;
481
482         do {
483                 struct odp_header *upcall;
484                 struct sk_buff *user_skb; /* to be queued to userspace */
485                 struct nlattr *nla;
486                 unsigned int len;
487
488                 nskb = skb->next;
489                 skb->next = NULL;
490
491                 err = vlan_deaccel_tag(skb);
492                 if (unlikely(err))
493                         goto err_kfree_skbs;
494
495                 if (nla_attr_size(skb->len) > USHRT_MAX)
496                         goto err_kfree_skbs;
497
498                 len = sizeof(struct odp_header);
499                 len += nla_total_size(skb->len);
500                 len += nla_total_size(FLOW_BUFSIZE);
501                 if (upcall_info->userdata)
502                         len += nla_total_size(8);
503                 if (upcall_info->sample_pool)
504                         len += nla_total_size(4);
505                 if (upcall_info->actions_len)
506                         len += nla_total_size(upcall_info->actions_len);
507
508                 user_skb = genlmsg_new(len, GFP_ATOMIC);
509                 if (!user_skb) {
510                         netlink_set_err(INIT_NET_GENL_SOCK, 0, group, -ENOBUFS);
511                         goto err_kfree_skbs;
512                 }
513
514                 upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family, 0, upcall_info->cmd);
515                 upcall->dp_ifindex = dp->dp_ifindex;
516
517                 nla = nla_nest_start(user_skb, ODP_PACKET_ATTR_KEY);
518                 flow_to_nlattrs(upcall_info->key, user_skb);
519                 nla_nest_end(user_skb, nla);
520
521                 if (upcall_info->userdata)
522                         nla_put_u64(user_skb, ODP_PACKET_ATTR_USERDATA, upcall_info->userdata);
523                 if (upcall_info->sample_pool)
524                         nla_put_u32(user_skb, ODP_PACKET_ATTR_SAMPLE_POOL, upcall_info->sample_pool);
525                 if (upcall_info->actions_len) {
526                         const struct nlattr *actions = upcall_info->actions;
527                         u32 actions_len = upcall_info->actions_len;
528
529                         nla = nla_nest_start(user_skb, ODP_PACKET_ATTR_ACTIONS);
530                         memcpy(__skb_put(user_skb, actions_len), actions, actions_len);
531                         nla_nest_end(user_skb, nla);
532                 }
533
534                 nla = __nla_reserve(user_skb, ODP_PACKET_ATTR_PACKET, skb->len);
535                 if (skb->ip_summed == CHECKSUM_PARTIAL)
536                         copy_and_csum_skb(skb, nla_data(nla));
537                 else
538                         skb_copy_bits(skb, 0, nla_data(nla), skb->len);
539
540                 err = genlmsg_multicast(user_skb, 0, group, GFP_ATOMIC);
541                 if (err)
542                         goto err_kfree_skbs;
543
544                 kfree_skb(skb);
545                 skb = nskb;
546         } while (skb);
547         return 0;
548
549 err_kfree_skbs:
550         kfree_skb(skb);
551         while ((skb = nskb) != NULL) {
552                 nskb = skb->next;
553                 kfree_skb(skb);
554         }
555         return err;
556 }
557
558 /* Called with genl_mutex. */
559 static int flush_flows(int dp_ifindex)
560 {
561         struct tbl *old_table;
562         struct tbl *new_table;
563         struct datapath *dp;
564
565         dp = get_dp(dp_ifindex);
566         if (!dp)
567                 return -ENODEV;
568
569         old_table = get_table_protected(dp);
570         new_table = tbl_create(TBL_MIN_BUCKETS);
571         if (!new_table)
572                 return -ENOMEM;
573
574         rcu_assign_pointer(dp->table, new_table);
575
576         tbl_deferred_destroy(old_table, flow_free_tbl);
577
578         return 0;
579 }
580
581 static int validate_actions(const struct nlattr *attr)
582 {
583         const struct nlattr *a;
584         int rem;
585
586         nla_for_each_nested(a, attr, rem) {
587                 static const u32 action_lens[ODP_ACTION_ATTR_MAX + 1] = {
588                         [ODP_ACTION_ATTR_OUTPUT] = 4,
589                         [ODP_ACTION_ATTR_CONTROLLER] = 8,
590                         [ODP_ACTION_ATTR_SET_DL_TCI] = 2,
591                         [ODP_ACTION_ATTR_STRIP_VLAN] = 0,
592                         [ODP_ACTION_ATTR_SET_DL_SRC] = ETH_ALEN,
593                         [ODP_ACTION_ATTR_SET_DL_DST] = ETH_ALEN,
594                         [ODP_ACTION_ATTR_SET_NW_SRC] = 4,
595                         [ODP_ACTION_ATTR_SET_NW_DST] = 4,
596                         [ODP_ACTION_ATTR_SET_NW_TOS] = 1,
597                         [ODP_ACTION_ATTR_SET_TP_SRC] = 2,
598                         [ODP_ACTION_ATTR_SET_TP_DST] = 2,
599                         [ODP_ACTION_ATTR_SET_TUNNEL] = 8,
600                         [ODP_ACTION_ATTR_SET_PRIORITY] = 4,
601                         [ODP_ACTION_ATTR_POP_PRIORITY] = 0,
602                         [ODP_ACTION_ATTR_DROP_SPOOFED_ARP] = 0,
603                 };
604                 int type = nla_type(a);
605
606                 if (type > ODP_ACTION_ATTR_MAX || nla_len(a) != action_lens[type])
607                         return -EINVAL;
608
609                 switch (type) {
610                 case ODP_ACTION_ATTR_UNSPEC:
611                         return -EINVAL;
612
613                 case ODP_ACTION_ATTR_CONTROLLER:
614                 case ODP_ACTION_ATTR_STRIP_VLAN:
615                 case ODP_ACTION_ATTR_SET_DL_SRC:
616                 case ODP_ACTION_ATTR_SET_DL_DST:
617                 case ODP_ACTION_ATTR_SET_NW_SRC:
618                 case ODP_ACTION_ATTR_SET_NW_DST:
619                 case ODP_ACTION_ATTR_SET_TP_SRC:
620                 case ODP_ACTION_ATTR_SET_TP_DST:
621                 case ODP_ACTION_ATTR_SET_TUNNEL:
622                 case ODP_ACTION_ATTR_SET_PRIORITY:
623                 case ODP_ACTION_ATTR_POP_PRIORITY:
624                 case ODP_ACTION_ATTR_DROP_SPOOFED_ARP:
625                         /* No validation needed. */
626                         break;
627
628                 case ODP_ACTION_ATTR_OUTPUT:
629                         if (nla_get_u32(a) >= DP_MAX_PORTS)
630                                 return -EINVAL;
631                         break;
632
633                 case ODP_ACTION_ATTR_SET_DL_TCI:
634                         if (nla_get_be16(a) & htons(VLAN_CFI_MASK))
635                                 return -EINVAL;
636                         break;
637
638                 case ODP_ACTION_ATTR_SET_NW_TOS:
639                         if (nla_get_u8(a) & INET_ECN_MASK)
640                                 return -EINVAL;
641                         break;
642
643                 default:
644                         return -EOPNOTSUPP;
645                 }
646         }
647
648         if (rem > 0)
649                 return -EINVAL;
650
651         return 0;
652 }
653 static void clear_stats(struct sw_flow *flow)
654 {
655         flow->used = 0;
656         flow->tcp_flags = 0;
657         flow->packet_count = 0;
658         flow->byte_count = 0;
659 }
660
661 /* Called with genl_mutex. */
662 static int expand_table(struct datapath *dp)
663 {
664         struct tbl *old_table = get_table_protected(dp);
665         struct tbl *new_table;
666
667         new_table = tbl_expand(old_table);
668         if (IS_ERR(new_table))
669                 return PTR_ERR(new_table);
670
671         rcu_assign_pointer(dp->table, new_table);
672         tbl_deferred_destroy(old_table, NULL);
673
674         return 0;
675 }
676
677 static int odp_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
678 {
679         struct odp_header *odp_header = info->userhdr;
680         struct nlattr **a = info->attrs;
681         struct sk_buff *packet;
682         struct sw_flow_key key;
683         struct datapath *dp;
684         struct ethhdr *eth;
685         bool is_frag;
686         int err;
687
688         err = -EINVAL;
689         if (!a[ODP_PACKET_ATTR_PACKET] || !a[ODP_PACKET_ATTR_ACTIONS] ||
690             nla_len(a[ODP_PACKET_ATTR_PACKET]) < ETH_HLEN)
691                 goto exit;
692
693         err = validate_actions(a[ODP_PACKET_ATTR_ACTIONS]);
694         if (err)
695                 goto exit;
696
697         packet = skb_clone(skb, GFP_KERNEL);
698         err = -ENOMEM;
699         if (!packet)
700                 goto exit;
701         packet->data = nla_data(a[ODP_PACKET_ATTR_PACKET]);
702         packet->len = nla_len(a[ODP_PACKET_ATTR_PACKET]);
703
704         skb_reset_mac_header(packet);
705         eth = eth_hdr(packet);
706
707         /* Normally, setting the skb 'protocol' field would be handled by a
708          * call to eth_type_trans(), but it assumes there's a sending
709          * device, which we may not have. */
710         if (ntohs(eth->h_proto) >= 1536)
711                 packet->protocol = eth->h_proto;
712         else
713                 packet->protocol = htons(ETH_P_802_2);
714
715         err = flow_extract(packet, -1, &key, &is_frag);
716         if (err)
717                 goto exit;
718
719         /* Initialize OVS_CB (it came from Netlink so might not be zeroed). */
720         memset(OVS_CB(packet), 0, sizeof(struct ovs_skb_cb));
721
722         rcu_read_lock();
723         dp = get_dp(odp_header->dp_ifindex);
724         err = -ENODEV;
725         if (dp)
726                 err = execute_actions(dp, packet, &key,
727                                       nla_data(a[ODP_PACKET_ATTR_ACTIONS]),
728                                       nla_len(a[ODP_PACKET_ATTR_ACTIONS]));
729         rcu_read_unlock();
730
731 exit:
732         return err;
733 }
734
735 static const struct nla_policy packet_policy[ODP_PACKET_ATTR_MAX + 1] = {
736         [ODP_PACKET_ATTR_PACKET] = { .type = NLA_UNSPEC },
737         [ODP_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
738 };
739
740 static struct genl_ops dp_packet_genl_ops[] = {
741         { .cmd = ODP_PACKET_CMD_EXECUTE,
742           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
743           .policy = packet_policy,
744           .doit = odp_packet_cmd_execute
745         }
746 };
747
748 static void get_dp_stats(struct datapath *dp, struct odp_stats *stats)
749 {
750         int i;
751
752         stats->n_frags = stats->n_hit = stats->n_missed = stats->n_lost = 0;
753         for_each_possible_cpu(i) {
754                 const struct dp_stats_percpu *percpu_stats;
755                 struct dp_stats_percpu local_stats;
756                 unsigned seqcount;
757
758                 percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
759
760                 do {
761                         seqcount = read_seqcount_begin(&percpu_stats->seqlock);
762                         local_stats = *percpu_stats;
763                 } while (read_seqcount_retry(&percpu_stats->seqlock, seqcount));
764
765                 stats->n_frags += local_stats.n_frags;
766                 stats->n_hit += local_stats.n_hit;
767                 stats->n_missed += local_stats.n_missed;
768                 stats->n_lost += local_stats.n_lost;
769         }
770 }
771
772 /* MTU of the dp pseudo-device: ETH_DATA_LEN or the minimum of the ports.
773  * Called with RTNL lock.
774  */
775 int dp_min_mtu(const struct datapath *dp)
776 {
777         struct vport *p;
778         int mtu = 0;
779
780         ASSERT_RTNL();
781
782         list_for_each_entry (p, &dp->port_list, node) {
783                 int dev_mtu;
784
785                 /* Skip any internal ports, since that's what we're trying to
786                  * set. */
787                 if (is_internal_vport(p))
788                         continue;
789
790                 dev_mtu = vport_get_mtu(p);
791                 if (!dev_mtu)
792                         continue;
793                 if (!mtu || dev_mtu < mtu)
794                         mtu = dev_mtu;
795         }
796
797         return mtu ? mtu : ETH_DATA_LEN;
798 }
799
800 /* Sets the MTU of all datapath devices to the minimum of the ports
801  * Called with RTNL lock.
802  */
803 void set_internal_devs_mtu(const struct datapath *dp)
804 {
805         struct vport *p;
806         int mtu;
807
808         ASSERT_RTNL();
809
810         mtu = dp_min_mtu(dp);
811
812         list_for_each_entry (p, &dp->port_list, node) {
813                 if (is_internal_vport(p))
814                         vport_set_mtu(p, mtu);
815         }
816 }
817
818 static const struct nla_policy flow_policy[ODP_FLOW_ATTR_MAX + 1] = {
819         [ODP_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
820         [ODP_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
821         [ODP_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
822 };
823
824 static struct genl_family dp_flow_genl_family = {
825         .id = GENL_ID_GENERATE,
826         .hdrsize = sizeof(struct odp_header),
827         .name = ODP_FLOW_FAMILY,
828         .version = 1,
829         .maxattr = ODP_FLOW_ATTR_MAX
830 };
831
832 static struct genl_multicast_group dp_flow_multicast_group = {
833         .name = ODP_FLOW_MCGROUP
834 };
835
836 /* Called with genl_lock. */
837 static int odp_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
838                                   struct sk_buff *skb, u32 pid, u32 seq, u32 flags, u8 cmd)
839 {
840         const int skb_orig_len = skb->len;
841         const struct sw_flow_actions *sf_acts;
842         struct odp_flow_stats stats;
843         struct odp_header *odp_header;
844         struct nlattr *nla;
845         unsigned long used;
846         u8 tcp_flags;
847         int err;
848
849         sf_acts = rcu_dereference_protected(flow->sf_acts,
850                                             lockdep_genl_is_held());
851
852         odp_header = genlmsg_put(skb, pid, seq, &dp_flow_genl_family, flags, cmd);
853         if (!odp_header)
854                 return -EMSGSIZE;
855
856         odp_header->dp_ifindex = dp->dp_ifindex;
857
858         nla = nla_nest_start(skb, ODP_FLOW_ATTR_KEY);
859         if (!nla)
860                 goto nla_put_failure;
861         err = flow_to_nlattrs(&flow->key, skb);
862         if (err)
863                 goto error;
864         nla_nest_end(skb, nla);
865
866         spin_lock_bh(&flow->lock);
867         used = flow->used;
868         stats.n_packets = flow->packet_count;
869         stats.n_bytes = flow->byte_count;
870         tcp_flags = flow->tcp_flags;
871         spin_unlock_bh(&flow->lock);
872
873         if (used)
874                 NLA_PUT_U64(skb, ODP_FLOW_ATTR_USED, flow_used_time(used));
875
876         if (stats.n_packets)
877                 NLA_PUT(skb, ODP_FLOW_ATTR_STATS, sizeof(struct odp_flow_stats), &stats);
878
879         if (tcp_flags)
880                 NLA_PUT_U8(skb, ODP_FLOW_ATTR_TCP_FLAGS, tcp_flags);
881
882         /* If ODP_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
883          * this is the first flow to be dumped into 'skb'.  This is unusual for
884          * Netlink but individual action lists can be longer than
885          * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
886          * The userspace caller can always fetch the actions separately if it
887          * really wants them.  (Most userspace callers in fact don't care.)
888          *
889          * This can only fail for dump operations because the skb is always
890          * properly sized for single flows.
891          */
892         err = nla_put(skb, ODP_FLOW_ATTR_ACTIONS, sf_acts->actions_len,
893                       sf_acts->actions);
894         if (err < 0 && skb_orig_len)
895                 goto error;
896
897         return genlmsg_end(skb, odp_header);
898
899 nla_put_failure:
900         err = -EMSGSIZE;
901 error:
902         genlmsg_cancel(skb, odp_header);
903         return err;
904 }
905
906 static struct sk_buff *odp_flow_cmd_alloc_info(struct sw_flow *flow)
907 {
908         const struct sw_flow_actions *sf_acts;
909         int len;
910
911         sf_acts = rcu_dereference_protected(flow->sf_acts,
912                                             lockdep_genl_is_held());
913
914         len = nla_total_size(FLOW_BUFSIZE); /* ODP_FLOW_ATTR_KEY */
915         len += nla_total_size(sf_acts->actions_len); /* ODP_FLOW_ATTR_ACTIONS */
916         len += nla_total_size(sizeof(struct odp_flow_stats)); /* ODP_FLOW_ATTR_STATS */
917         len += nla_total_size(1); /* ODP_FLOW_ATTR_TCP_FLAGS */
918         len += nla_total_size(8); /* ODP_FLOW_ATTR_USED */
919         return genlmsg_new(NLMSG_ALIGN(sizeof(struct odp_header)) + len, GFP_KERNEL);
920 }
921
922 static struct sk_buff *odp_flow_cmd_build_info(struct sw_flow *flow, struct datapath *dp,
923                                                u32 pid, u32 seq, u8 cmd)
924 {
925         struct sk_buff *skb;
926         int retval;
927
928         skb = odp_flow_cmd_alloc_info(flow);
929         if (!skb)
930                 return ERR_PTR(-ENOMEM);
931
932         retval = odp_flow_cmd_fill_info(flow, dp, skb, pid, seq, 0, cmd);
933         BUG_ON(retval < 0);
934         return skb;
935 }
936
937 static int odp_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
938 {
939         struct nlattr **a = info->attrs;
940         struct odp_header *odp_header = info->userhdr;
941         struct tbl_node *flow_node;
942         struct sw_flow_key key;
943         struct sw_flow *flow;
944         struct sk_buff *reply;
945         struct datapath *dp;
946         struct tbl *table;
947         u32 hash;
948         int error;
949
950         /* Extract key. */
951         error = -EINVAL;
952         if (!a[ODP_FLOW_ATTR_KEY])
953                 goto error;
954         error = flow_from_nlattrs(&key, a[ODP_FLOW_ATTR_KEY]);
955         if (error)
956                 goto error;
957
958         /* Validate actions. */
959         if (a[ODP_FLOW_ATTR_ACTIONS]) {
960                 error = validate_actions(a[ODP_FLOW_ATTR_ACTIONS]);
961                 if (error)
962                         goto error;
963         } else if (info->genlhdr->cmd == ODP_FLOW_CMD_NEW) {
964                 error = -EINVAL;
965                 goto error;
966         }
967
968         dp = get_dp(odp_header->dp_ifindex);
969         error = -ENODEV;
970         if (!dp)
971                 goto error;
972
973         hash = flow_hash(&key);
974         table = get_table_protected(dp);
975         flow_node = tbl_lookup(table, &key, hash, flow_cmp);
976         if (!flow_node) {
977                 struct sw_flow_actions *acts;
978
979                 /* Bail out if we're not allowed to create a new flow. */
980                 error = -ENOENT;
981                 if (info->genlhdr->cmd == ODP_FLOW_CMD_SET)
982                         goto error;
983
984                 /* Expand table, if necessary, to make room. */
985                 if (tbl_count(table) >= tbl_n_buckets(table)) {
986                         error = expand_table(dp);
987                         if (error)
988                                 goto error;
989                         table = get_table_protected(dp);
990                 }
991
992                 /* Allocate flow. */
993                 flow = flow_alloc();
994                 if (IS_ERR(flow)) {
995                         error = PTR_ERR(flow);
996                         goto error;
997                 }
998                 flow->key = key;
999                 clear_stats(flow);
1000
1001                 /* Obtain actions. */
1002                 acts = flow_actions_alloc(a[ODP_FLOW_ATTR_ACTIONS]);
1003                 error = PTR_ERR(acts);
1004                 if (IS_ERR(acts))
1005                         goto error_free_flow;
1006                 rcu_assign_pointer(flow->sf_acts, acts);
1007
1008                 /* Put flow in bucket. */
1009                 error = tbl_insert(table, &flow->tbl_node, hash);
1010                 if (error)
1011                         goto error_free_flow;
1012
1013                 reply = odp_flow_cmd_build_info(flow, dp, info->snd_pid,
1014                                                 info->snd_seq, ODP_FLOW_CMD_NEW);
1015         } else {
1016                 /* We found a matching flow. */
1017                 struct sw_flow_actions *old_acts;
1018
1019                 /* Bail out if we're not allowed to modify an existing flow.
1020                  * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
1021                  * because Generic Netlink treats the latter as a dump
1022                  * request.  We also accept NLM_F_EXCL in case that bug ever
1023                  * gets fixed.
1024                  */
1025                 error = -EEXIST;
1026                 if (info->genlhdr->cmd == ODP_FLOW_CMD_NEW &&
1027                     info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
1028                         goto error;
1029
1030                 /* Update actions. */
1031                 flow = flow_cast(flow_node);
1032                 old_acts = rcu_dereference_protected(flow->sf_acts,
1033                                                      lockdep_genl_is_held());
1034                 if (a[ODP_FLOW_ATTR_ACTIONS] &&
1035                     (old_acts->actions_len != nla_len(a[ODP_FLOW_ATTR_ACTIONS]) ||
1036                      memcmp(old_acts->actions, nla_data(a[ODP_FLOW_ATTR_ACTIONS]),
1037                             old_acts->actions_len))) {
1038                         struct sw_flow_actions *new_acts;
1039
1040                         new_acts = flow_actions_alloc(a[ODP_FLOW_ATTR_ACTIONS]);
1041                         error = PTR_ERR(new_acts);
1042                         if (IS_ERR(new_acts))
1043                                 goto error;
1044
1045                         rcu_assign_pointer(flow->sf_acts, new_acts);
1046                         flow_deferred_free_acts(old_acts);
1047                 }
1048
1049                 reply = odp_flow_cmd_build_info(flow, dp, info->snd_pid,
1050                                                 info->snd_seq, ODP_FLOW_CMD_NEW);
1051
1052                 /* Clear stats. */
1053                 if (a[ODP_FLOW_ATTR_CLEAR]) {
1054                         spin_lock_bh(&flow->lock);
1055                         clear_stats(flow);
1056                         spin_unlock_bh(&flow->lock);
1057                 }
1058         }
1059
1060         if (!IS_ERR(reply))
1061                 genl_notify(reply, genl_info_net(info), info->snd_pid,
1062                             dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL);
1063         else
1064                 netlink_set_err(INIT_NET_GENL_SOCK, 0,
1065                                 dp_flow_multicast_group.id, PTR_ERR(reply));
1066         return 0;
1067
1068 error_free_flow:
1069         flow_put(flow);
1070 error:
1071         return error;
1072 }
1073
1074 static int odp_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1075 {
1076         struct nlattr **a = info->attrs;
1077         struct odp_header *odp_header = info->userhdr;
1078         struct sw_flow_key key;
1079         struct tbl_node *flow_node;
1080         struct sk_buff *reply;
1081         struct sw_flow *flow;
1082         struct datapath *dp;
1083         struct tbl *table;
1084         int err;
1085
1086         if (!a[ODP_FLOW_ATTR_KEY])
1087                 return -EINVAL;
1088         err = flow_from_nlattrs(&key, a[ODP_FLOW_ATTR_KEY]);
1089         if (err)
1090                 return err;
1091
1092         dp = get_dp(odp_header->dp_ifindex);
1093         if (!dp)
1094                 return -ENODEV;
1095
1096         table = get_table_protected(dp);
1097         flow_node = tbl_lookup(table, &key, flow_hash(&key), flow_cmp);
1098         if (!flow_node)
1099                 return -ENOENT;
1100
1101         flow = flow_cast(flow_node);
1102         reply = odp_flow_cmd_build_info(flow, dp, info->snd_pid, info->snd_seq, ODP_FLOW_CMD_NEW);
1103         if (IS_ERR(reply))
1104                 return PTR_ERR(reply);
1105
1106         return genlmsg_reply(reply, info);
1107 }
1108
1109 static int odp_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1110 {
1111         struct nlattr **a = info->attrs;
1112         struct odp_header *odp_header = info->userhdr;
1113         struct sw_flow_key key;
1114         struct tbl_node *flow_node;
1115         struct sk_buff *reply;
1116         struct sw_flow *flow;
1117         struct datapath *dp;
1118         struct tbl *table;
1119         int err;
1120
1121         if (!a[ODP_FLOW_ATTR_KEY])
1122                 return flush_flows(odp_header->dp_ifindex);
1123         err = flow_from_nlattrs(&key, a[ODP_FLOW_ATTR_KEY]);
1124         if (err)
1125                 return err;
1126
1127         dp = get_dp(odp_header->dp_ifindex);
1128         if (!dp)
1129                 return -ENODEV;
1130
1131         table = get_table_protected(dp);
1132         flow_node = tbl_lookup(table, &key, flow_hash(&key), flow_cmp);
1133         if (!flow_node)
1134                 return -ENOENT;
1135         flow = flow_cast(flow_node);
1136
1137         reply = odp_flow_cmd_alloc_info(flow);
1138         if (!reply)
1139                 return -ENOMEM;
1140
1141         err = tbl_remove(table, flow_node);
1142         if (err) {
1143                 kfree_skb(reply);
1144                 return err;
1145         }
1146
1147         err = odp_flow_cmd_fill_info(flow, dp, reply, info->snd_pid,
1148                                      info->snd_seq, 0, ODP_FLOW_CMD_DEL);
1149         BUG_ON(err < 0);
1150
1151         flow_deferred_free(flow);
1152
1153         genl_notify(reply, genl_info_net(info), info->snd_pid,
1154                     dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL);
1155         return 0;
1156 }
1157
1158 static int odp_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1159 {
1160         struct odp_header *odp_header = genlmsg_data(nlmsg_data(cb->nlh));
1161         struct datapath *dp;
1162
1163         dp = get_dp(odp_header->dp_ifindex);
1164         if (!dp)
1165                 return -ENODEV;
1166
1167         for (;;) {
1168                 struct tbl_node *flow_node;
1169                 struct sw_flow *flow;
1170                 u32 bucket, obj;
1171
1172                 bucket = cb->args[0];
1173                 obj = cb->args[1];
1174                 flow_node = tbl_next(get_table_protected(dp), &bucket, &obj);
1175                 if (!flow_node)
1176                         break;
1177
1178                 flow = flow_cast(flow_node);
1179                 if (odp_flow_cmd_fill_info(flow, dp, skb, NETLINK_CB(cb->skb).pid,
1180                                            cb->nlh->nlmsg_seq, NLM_F_MULTI,
1181                                            ODP_FLOW_CMD_NEW) < 0)
1182                         break;
1183
1184                 cb->args[0] = bucket;
1185                 cb->args[1] = obj;
1186         }
1187         return skb->len;
1188 }
1189
1190 static struct genl_ops dp_flow_genl_ops[] = {
1191         { .cmd = ODP_FLOW_CMD_NEW,
1192           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1193           .policy = flow_policy,
1194           .doit = odp_flow_cmd_new_or_set
1195         },
1196         { .cmd = ODP_FLOW_CMD_DEL,
1197           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1198           .policy = flow_policy,
1199           .doit = odp_flow_cmd_del
1200         },
1201         { .cmd = ODP_FLOW_CMD_GET,
1202           .flags = 0,               /* OK for unprivileged users. */
1203           .policy = flow_policy,
1204           .doit = odp_flow_cmd_get,
1205           .dumpit = odp_flow_cmd_dump
1206         },
1207         { .cmd = ODP_FLOW_CMD_SET,
1208           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1209           .policy = flow_policy,
1210           .doit = odp_flow_cmd_new_or_set,
1211         },
1212 };
1213
1214 static const struct nla_policy datapath_policy[ODP_DP_ATTR_MAX + 1] = {
1215 #ifdef HAVE_NLA_NUL_STRING
1216         [ODP_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1217 #endif
1218         [ODP_DP_ATTR_IPV4_FRAGS] = { .type = NLA_U32 },
1219         [ODP_DP_ATTR_SAMPLING] = { .type = NLA_U32 },
1220 };
1221
1222 static struct genl_family dp_datapath_genl_family = {
1223         .id = GENL_ID_GENERATE,
1224         .hdrsize = sizeof(struct odp_header),
1225         .name = ODP_DATAPATH_FAMILY,
1226         .version = 1,
1227         .maxattr = ODP_DP_ATTR_MAX
1228 };
1229
1230 static struct genl_multicast_group dp_datapath_multicast_group = {
1231         .name = ODP_DATAPATH_MCGROUP
1232 };
1233
1234 static int odp_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
1235                                 u32 pid, u32 seq, u32 flags, u8 cmd)
1236 {
1237         struct odp_header *odp_header;
1238         struct nlattr *nla;
1239         int err;
1240
1241         odp_header = genlmsg_put(skb, pid, seq, &dp_datapath_genl_family,
1242                                    flags, cmd);
1243         if (!odp_header)
1244                 goto error;
1245
1246         odp_header->dp_ifindex = dp->dp_ifindex;
1247
1248         rcu_read_lock();
1249         err = nla_put_string(skb, ODP_DP_ATTR_NAME, dp_name(dp));
1250         rcu_read_unlock();
1251         if (err)
1252                 goto nla_put_failure;
1253
1254         nla = nla_reserve(skb, ODP_DP_ATTR_STATS, sizeof(struct odp_stats));
1255         if (!nla)
1256                 goto nla_put_failure;
1257         get_dp_stats(dp, nla_data(nla));
1258
1259         NLA_PUT_U32(skb, ODP_DP_ATTR_IPV4_FRAGS,
1260                     dp->drop_frags ? ODP_DP_FRAG_DROP : ODP_DP_FRAG_ZERO);
1261
1262         if (dp->sflow_probability)
1263                 NLA_PUT_U32(skb, ODP_DP_ATTR_SAMPLING, dp->sflow_probability);
1264
1265         nla = nla_nest_start(skb, ODP_DP_ATTR_MCGROUPS);
1266         if (!nla)
1267                 goto nla_put_failure;
1268         NLA_PUT_U32(skb, ODP_PACKET_CMD_MISS, packet_mc_group(dp, ODP_PACKET_CMD_MISS));
1269         NLA_PUT_U32(skb, ODP_PACKET_CMD_ACTION, packet_mc_group(dp, ODP_PACKET_CMD_ACTION));
1270         NLA_PUT_U32(skb, ODP_PACKET_CMD_SAMPLE, packet_mc_group(dp, ODP_PACKET_CMD_SAMPLE));
1271         nla_nest_end(skb, nla);
1272
1273         return genlmsg_end(skb, odp_header);
1274
1275 nla_put_failure:
1276         genlmsg_cancel(skb, odp_header);
1277 error:
1278         return -EMSGSIZE;
1279 }
1280
1281 static struct sk_buff *odp_dp_cmd_build_info(struct datapath *dp, u32 pid,
1282                                              u32 seq, u8 cmd)
1283 {
1284         struct sk_buff *skb;
1285         int retval;
1286
1287         skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1288         if (!skb)
1289                 return ERR_PTR(-ENOMEM);
1290
1291         retval = odp_dp_cmd_fill_info(dp, skb, pid, seq, 0, cmd);
1292         if (retval < 0) {
1293                 kfree_skb(skb);
1294                 return ERR_PTR(retval);
1295         }
1296         return skb;
1297 }
1298
1299 static int odp_dp_cmd_validate(struct nlattr *a[ODP_DP_ATTR_MAX + 1])
1300 {
1301         if (a[ODP_DP_ATTR_IPV4_FRAGS]) {
1302                 u32 frags = nla_get_u32(a[ODP_DP_ATTR_IPV4_FRAGS]);
1303
1304                 if (frags != ODP_DP_FRAG_ZERO && frags != ODP_DP_FRAG_DROP)
1305                         return -EINVAL;
1306         }
1307
1308         return VERIFY_NUL_STRING(a[ODP_DP_ATTR_NAME], IFNAMSIZ - 1);
1309 }
1310
1311 /* Called with genl_mutex and optionally with RTNL lock also. */
1312 static struct datapath *lookup_datapath(struct odp_header *odp_header, struct nlattr *a[ODP_DP_ATTR_MAX + 1])
1313 {
1314         struct datapath *dp;
1315
1316         if (!a[ODP_DP_ATTR_NAME])
1317                 dp = get_dp(odp_header->dp_ifindex);
1318         else {
1319                 struct vport *vport;
1320
1321                 rcu_read_lock();
1322                 vport = vport_locate(nla_data(a[ODP_DP_ATTR_NAME]));
1323                 dp = vport && vport->port_no == ODPP_LOCAL ? vport->dp : NULL;
1324                 rcu_read_unlock();
1325         }
1326         return dp ? dp : ERR_PTR(-ENODEV);
1327 }
1328
1329 /* Called with genl_mutex. */
1330 static void change_datapath(struct datapath *dp, struct nlattr *a[ODP_DP_ATTR_MAX + 1])
1331 {
1332         if (a[ODP_DP_ATTR_IPV4_FRAGS])
1333                 dp->drop_frags = nla_get_u32(a[ODP_DP_ATTR_IPV4_FRAGS]) == ODP_DP_FRAG_DROP;
1334         if (a[ODP_DP_ATTR_SAMPLING])
1335                 dp->sflow_probability = nla_get_u32(a[ODP_DP_ATTR_SAMPLING]);
1336 }
1337
1338 static int odp_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1339 {
1340         struct nlattr **a = info->attrs;
1341         struct vport_parms parms;
1342         struct sk_buff *reply;
1343         struct datapath *dp;
1344         struct vport *vport;
1345         int err;
1346
1347         err = -EINVAL;
1348         if (!a[ODP_DP_ATTR_NAME])
1349                 goto err;
1350
1351         err = odp_dp_cmd_validate(a);
1352         if (err)
1353                 goto err;
1354
1355         rtnl_lock();
1356         err = -ENODEV;
1357         if (!try_module_get(THIS_MODULE))
1358                 goto err_unlock_rtnl;
1359
1360         err = -ENOMEM;
1361         dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1362         if (dp == NULL)
1363                 goto err_put_module;
1364         INIT_LIST_HEAD(&dp->port_list);
1365
1366         /* Initialize kobject for bridge.  This will be added as
1367          * /sys/class/net/<devname>/brif later, if sysfs is enabled. */
1368         dp->ifobj.kset = NULL;
1369         kobject_init(&dp->ifobj, &dp_ktype);
1370
1371         /* Allocate table. */
1372         err = -ENOMEM;
1373         rcu_assign_pointer(dp->table, tbl_create(TBL_MIN_BUCKETS));
1374         if (!dp->table)
1375                 goto err_free_dp;
1376
1377         /* Set up our datapath device. */
1378         parms.name = nla_data(a[ODP_DP_ATTR_NAME]);
1379         parms.type = ODP_VPORT_TYPE_INTERNAL;
1380         parms.options = NULL;
1381         parms.dp = dp;
1382         parms.port_no = ODPP_LOCAL;
1383         vport = new_vport(&parms);
1384         if (IS_ERR(vport)) {
1385                 err = PTR_ERR(vport);
1386                 if (err == -EBUSY)
1387                         err = -EEXIST;
1388
1389                 goto err_destroy_table;
1390         }
1391         dp->dp_ifindex = vport_get_ifindex(vport);
1392
1393         dp->drop_frags = 0;
1394         dp->stats_percpu = alloc_percpu(struct dp_stats_percpu);
1395         if (!dp->stats_percpu) {
1396                 err = -ENOMEM;
1397                 goto err_destroy_local_port;
1398         }
1399
1400         change_datapath(dp, a);
1401
1402         reply = odp_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, ODP_DP_CMD_NEW);
1403         err = PTR_ERR(reply);
1404         if (IS_ERR(reply))
1405                 goto err_destroy_local_port;
1406
1407         list_add_tail(&dp->list_node, &dps);
1408         dp_sysfs_add_dp(dp);
1409
1410         rtnl_unlock();
1411
1412         genl_notify(reply, genl_info_net(info), info->snd_pid,
1413                     dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL);
1414         return 0;
1415
1416 err_destroy_local_port:
1417         dp_detach_port(get_vport_protected(dp, ODPP_LOCAL));
1418 err_destroy_table:
1419         tbl_destroy(get_table_protected(dp), NULL);
1420 err_free_dp:
1421         kfree(dp);
1422 err_put_module:
1423         module_put(THIS_MODULE);
1424 err_unlock_rtnl:
1425         rtnl_unlock();
1426 err:
1427         return err;
1428 }
1429
1430 static int odp_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1431 {
1432         struct vport *vport, *next_vport;
1433         struct sk_buff *reply;
1434         struct datapath *dp;
1435         int err;
1436
1437         err = odp_dp_cmd_validate(info->attrs);
1438         if (err)
1439                 goto exit;
1440
1441         rtnl_lock();
1442         dp = lookup_datapath(info->userhdr, info->attrs);
1443         err = PTR_ERR(dp);
1444         if (IS_ERR(dp))
1445                 goto exit_unlock;
1446
1447         reply = odp_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, ODP_DP_CMD_DEL);
1448         err = PTR_ERR(reply);
1449         if (IS_ERR(reply))
1450                 goto exit_unlock;
1451
1452         list_for_each_entry_safe (vport, next_vport, &dp->port_list, node)
1453                 if (vport->port_no != ODPP_LOCAL)
1454                         dp_detach_port(vport);
1455
1456         dp_sysfs_del_dp(dp);
1457         list_del(&dp->list_node);
1458         dp_detach_port(get_vport_protected(dp, ODPP_LOCAL));
1459
1460         /* rtnl_unlock() will wait until all the references to devices that
1461          * are pending unregistration have been dropped.  We do it here to
1462          * ensure that any internal devices (which contain DP pointers) are
1463          * fully destroyed before freeing the datapath.
1464          */
1465         rtnl_unlock();
1466
1467         call_rcu(&dp->rcu, destroy_dp_rcu);
1468         module_put(THIS_MODULE);
1469
1470         genl_notify(reply, genl_info_net(info), info->snd_pid,
1471                     dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL);
1472
1473         return 0;
1474
1475 exit_unlock:
1476         rtnl_unlock();
1477 exit:
1478         return err;
1479 }
1480
1481 static int odp_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1482 {
1483         struct sk_buff *reply;
1484         struct datapath *dp;
1485         int err;
1486
1487         err = odp_dp_cmd_validate(info->attrs);
1488         if (err)
1489                 return err;
1490
1491         dp = lookup_datapath(info->userhdr, info->attrs);
1492         if (IS_ERR(dp))
1493                 return PTR_ERR(dp);
1494
1495         change_datapath(dp, info->attrs);
1496
1497         reply = odp_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, ODP_DP_CMD_NEW);
1498         if (IS_ERR(reply)) {
1499                 err = PTR_ERR(reply);
1500                 netlink_set_err(INIT_NET_GENL_SOCK, 0,
1501                                 dp_datapath_multicast_group.id, err);
1502                 return 0;
1503         }
1504
1505         genl_notify(reply, genl_info_net(info), info->snd_pid,
1506                     dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL);
1507         return 0;
1508 }
1509
1510 static int odp_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1511 {
1512         struct sk_buff *reply;
1513         struct datapath *dp;
1514         int err;
1515
1516         err = odp_dp_cmd_validate(info->attrs);
1517         if (err)
1518                 return err;
1519
1520         dp = lookup_datapath(info->userhdr, info->attrs);
1521         if (IS_ERR(dp))
1522                 return PTR_ERR(dp);
1523
1524         reply = odp_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, ODP_DP_CMD_NEW);
1525         if (IS_ERR(reply))
1526                 return PTR_ERR(reply);
1527
1528         return genlmsg_reply(reply, info);
1529 }
1530
1531 static int odp_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1532 {
1533         struct datapath *dp;
1534         int skip = cb->args[0];
1535         int i = 0;
1536
1537         list_for_each_entry (dp, &dps, list_node) {
1538                 if (i < skip)
1539                         continue;
1540                 if (odp_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).pid,
1541                                          cb->nlh->nlmsg_seq, NLM_F_MULTI,
1542                                          ODP_DP_CMD_NEW) < 0)
1543                         break;
1544                 i++;
1545         }
1546
1547         cb->args[0] = i;
1548
1549         return skb->len;
1550 }
1551
1552 static struct genl_ops dp_datapath_genl_ops[] = {
1553         { .cmd = ODP_DP_CMD_NEW,
1554           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1555           .policy = datapath_policy,
1556           .doit = odp_dp_cmd_new
1557         },
1558         { .cmd = ODP_DP_CMD_DEL,
1559           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1560           .policy = datapath_policy,
1561           .doit = odp_dp_cmd_del
1562         },
1563         { .cmd = ODP_DP_CMD_GET,
1564           .flags = 0,               /* OK for unprivileged users. */
1565           .policy = datapath_policy,
1566           .doit = odp_dp_cmd_get,
1567           .dumpit = odp_dp_cmd_dump
1568         },
1569         { .cmd = ODP_DP_CMD_SET,
1570           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1571           .policy = datapath_policy,
1572           .doit = odp_dp_cmd_set,
1573         },
1574 };
1575
1576 static const struct nla_policy vport_policy[ODP_VPORT_ATTR_MAX + 1] = {
1577 #ifdef HAVE_NLA_NUL_STRING
1578         [ODP_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1579         [ODP_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
1580         [ODP_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
1581         [ODP_VPORT_ATTR_STATS] = { .len = sizeof(struct rtnl_link_stats64) },
1582         [ODP_VPORT_ATTR_ADDRESS] = { .len = ETH_ALEN },
1583 #else
1584         [ODP_VPORT_ATTR_STATS] = { .minlen = sizeof(struct rtnl_link_stats64) },
1585         [ODP_VPORT_ATTR_ADDRESS] = { .minlen = ETH_ALEN },
1586 #endif
1587         [ODP_VPORT_ATTR_MTU] = { .type = NLA_U32 },
1588         [ODP_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
1589 };
1590
1591 static struct genl_family dp_vport_genl_family = {
1592         .id = GENL_ID_GENERATE,
1593         .hdrsize = sizeof(struct odp_header),
1594         .name = ODP_VPORT_FAMILY,
1595         .version = 1,
1596         .maxattr = ODP_VPORT_ATTR_MAX
1597 };
1598
1599 static struct genl_multicast_group dp_vport_multicast_group = {
1600         .name = ODP_VPORT_MCGROUP
1601 };
1602
1603 /* Called with RTNL lock or RCU read lock. */
1604 static int odp_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
1605                                    u32 pid, u32 seq, u32 flags, u8 cmd)
1606 {
1607         struct odp_header *odp_header;
1608         struct nlattr *nla;
1609         int ifindex, iflink;
1610         int mtu;
1611         int err;
1612
1613         odp_header = genlmsg_put(skb, pid, seq, &dp_vport_genl_family,
1614                                  flags, cmd);
1615         if (!odp_header)
1616                 return -EMSGSIZE;
1617
1618         odp_header->dp_ifindex = vport->dp->dp_ifindex;
1619
1620         NLA_PUT_U32(skb, ODP_VPORT_ATTR_PORT_NO, vport->port_no);
1621         NLA_PUT_U32(skb, ODP_VPORT_ATTR_TYPE, vport_get_type(vport));
1622         NLA_PUT_STRING(skb, ODP_VPORT_ATTR_NAME, vport_get_name(vport));
1623
1624         nla = nla_reserve(skb, ODP_VPORT_ATTR_STATS, sizeof(struct rtnl_link_stats64));
1625         if (!nla)
1626                 goto nla_put_failure;
1627         if (vport_get_stats(vport, nla_data(nla)))
1628                 __skb_trim(skb, skb->len - nla->nla_len);
1629
1630         NLA_PUT(skb, ODP_VPORT_ATTR_ADDRESS, ETH_ALEN, vport_get_addr(vport));
1631
1632         mtu = vport_get_mtu(vport);
1633         if (mtu)
1634                 NLA_PUT_U32(skb, ODP_VPORT_ATTR_MTU, mtu);
1635
1636         err = vport_get_options(vport, skb);
1637         if (err == -EMSGSIZE)
1638                 goto error;
1639
1640         ifindex = vport_get_ifindex(vport);
1641         if (ifindex > 0)
1642                 NLA_PUT_U32(skb, ODP_VPORT_ATTR_IFINDEX, ifindex);
1643
1644         iflink = vport_get_iflink(vport);
1645         if (iflink > 0)
1646                 NLA_PUT_U32(skb, ODP_VPORT_ATTR_IFLINK, iflink);
1647
1648         return genlmsg_end(skb, odp_header);
1649
1650 nla_put_failure:
1651         err = -EMSGSIZE;
1652 error:
1653         genlmsg_cancel(skb, odp_header);
1654         return err;
1655 }
1656
1657 /* Called with RTNL lock or RCU read lock. */
1658 static struct sk_buff *odp_vport_cmd_build_info(struct vport *vport, u32 pid,
1659                                                 u32 seq, u8 cmd)
1660 {
1661         struct sk_buff *skb;
1662         int retval;
1663
1664         skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
1665         if (!skb)
1666                 return ERR_PTR(-ENOMEM);
1667
1668         retval = odp_vport_cmd_fill_info(vport, skb, pid, seq, 0, cmd);
1669         if (retval < 0) {
1670                 kfree_skb(skb);
1671                 return ERR_PTR(retval);
1672         }
1673         return skb;
1674 }
1675
1676 static int odp_vport_cmd_validate(struct nlattr *a[ODP_VPORT_ATTR_MAX + 1])
1677 {
1678         return VERIFY_NUL_STRING(a[ODP_VPORT_ATTR_NAME], IFNAMSIZ - 1);
1679 }
1680
1681 /* Called with RTNL lock or RCU read lock. */
1682 static struct vport *lookup_vport(struct odp_header *odp_header,
1683                                   struct nlattr *a[ODP_VPORT_ATTR_MAX + 1])
1684 {
1685         struct datapath *dp;
1686         struct vport *vport;
1687
1688         if (a[ODP_VPORT_ATTR_NAME]) {
1689                 vport = vport_locate(nla_data(a[ODP_VPORT_ATTR_NAME]));
1690                 if (!vport)
1691                         return ERR_PTR(-ENODEV);
1692                 return vport;
1693         } else if (a[ODP_VPORT_ATTR_PORT_NO]) {
1694                 u32 port_no = nla_get_u32(a[ODP_VPORT_ATTR_PORT_NO]);
1695
1696                 if (port_no >= DP_MAX_PORTS)
1697                         return ERR_PTR(-EFBIG);
1698
1699                 dp = get_dp(odp_header->dp_ifindex);
1700                 if (!dp)
1701                         return ERR_PTR(-ENODEV);
1702
1703                 vport = get_vport_protected(dp, port_no);
1704                 if (!vport)
1705                         return ERR_PTR(-ENOENT);
1706                 return vport;
1707         } else
1708                 return ERR_PTR(-EINVAL);
1709 }
1710
1711 /* Called with RTNL lock. */
1712 static int change_vport(struct vport *vport, struct nlattr *a[ODP_VPORT_ATTR_MAX + 1])
1713 {
1714         int err = 0;
1715         if (a[ODP_VPORT_ATTR_STATS])
1716                 err = vport_set_stats(vport, nla_data(a[ODP_VPORT_ATTR_STATS]));
1717         if (!err && a[ODP_VPORT_ATTR_ADDRESS])
1718                 err = vport_set_addr(vport, nla_data(a[ODP_VPORT_ATTR_ADDRESS]));
1719         if (!err && a[ODP_VPORT_ATTR_MTU])
1720                 err = vport_set_mtu(vport, nla_get_u32(a[ODP_VPORT_ATTR_MTU]));
1721         return err;
1722 }
1723
1724 static int odp_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1725 {
1726         struct nlattr **a = info->attrs;
1727         struct odp_header *odp_header = info->userhdr;
1728         struct vport_parms parms;
1729         struct sk_buff *reply;
1730         struct vport *vport;
1731         struct datapath *dp;
1732         u32 port_no;
1733         int err;
1734
1735         err = -EINVAL;
1736         if (!a[ODP_VPORT_ATTR_NAME] || !a[ODP_VPORT_ATTR_TYPE])
1737                 goto exit;
1738
1739         err = odp_vport_cmd_validate(a);
1740         if (err)
1741                 goto exit;
1742
1743         rtnl_lock();
1744         dp = get_dp(odp_header->dp_ifindex);
1745         err = -ENODEV;
1746         if (!dp)
1747                 goto exit_unlock;
1748
1749         if (a[ODP_VPORT_ATTR_PORT_NO]) {
1750                 port_no = nla_get_u32(a[ODP_VPORT_ATTR_PORT_NO]);
1751
1752                 err = -EFBIG;
1753                 if (port_no >= DP_MAX_PORTS)
1754                         goto exit_unlock;
1755
1756                 vport = get_vport_protected(dp, port_no);
1757                 err = -EBUSY;
1758                 if (vport)
1759                         goto exit_unlock;
1760         } else {
1761                 for (port_no = 1; ; port_no++) {
1762                         if (port_no >= DP_MAX_PORTS) {
1763                                 err = -EFBIG;
1764                                 goto exit_unlock;
1765                         }
1766                         vport = get_vport_protected(dp, port_no);
1767                         if (!vport)
1768                                 break;
1769                 }
1770         }
1771
1772         parms.name = nla_data(a[ODP_VPORT_ATTR_NAME]);
1773         parms.type = nla_get_u32(a[ODP_VPORT_ATTR_TYPE]);
1774         parms.options = a[ODP_VPORT_ATTR_OPTIONS];
1775         parms.dp = dp;
1776         parms.port_no = port_no;
1777
1778         vport = new_vport(&parms);
1779         err = PTR_ERR(vport);
1780         if (IS_ERR(vport))
1781                 goto exit_unlock;
1782
1783         set_internal_devs_mtu(dp);
1784         dp_sysfs_add_if(vport);
1785
1786         err = change_vport(vport, a);
1787         if (!err) {
1788                 reply = odp_vport_cmd_build_info(vport, info->snd_pid,
1789                                                  info->snd_seq, ODP_VPORT_CMD_NEW);
1790                 if (IS_ERR(reply))
1791                         err = PTR_ERR(reply);
1792         }
1793         if (err) {
1794                 dp_detach_port(vport);
1795                 goto exit_unlock;
1796         }
1797         genl_notify(reply, genl_info_net(info), info->snd_pid,
1798                     dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1799
1800
1801 exit_unlock:
1802         rtnl_unlock();
1803 exit:
1804         return err;
1805 }
1806
1807 static int odp_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
1808 {
1809         struct nlattr **a = info->attrs;
1810         struct sk_buff *reply;
1811         struct vport *vport;
1812         int err;
1813
1814         err = odp_vport_cmd_validate(a);
1815         if (err)
1816                 goto exit;
1817
1818         rtnl_lock();
1819         vport = lookup_vport(info->userhdr, a);
1820         err = PTR_ERR(vport);
1821         if (IS_ERR(vport))
1822                 goto exit_unlock;
1823
1824         err = 0;
1825         if (a[ODP_VPORT_ATTR_OPTIONS])
1826                 err = vport_set_options(vport, a[ODP_VPORT_ATTR_OPTIONS]);
1827         if (!err)
1828                 err = change_vport(vport, a);
1829
1830         reply = odp_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
1831                                          ODP_VPORT_CMD_NEW);
1832         if (IS_ERR(reply)) {
1833                 err = PTR_ERR(reply);
1834                 netlink_set_err(INIT_NET_GENL_SOCK, 0,
1835                                 dp_vport_multicast_group.id, err);
1836                 return 0;
1837         }
1838
1839         genl_notify(reply, genl_info_net(info), info->snd_pid,
1840                     dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1841
1842 exit_unlock:
1843         rtnl_unlock();
1844 exit:
1845         return err;
1846 }
1847
1848 static int odp_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
1849 {
1850         struct nlattr **a = info->attrs;
1851         struct sk_buff *reply;
1852         struct vport *vport;
1853         int err;
1854
1855         err = odp_vport_cmd_validate(a);
1856         if (err)
1857                 goto exit;
1858
1859         rtnl_lock();
1860         vport = lookup_vport(info->userhdr, a);
1861         err = PTR_ERR(vport);
1862         if (IS_ERR(vport))
1863                 goto exit_unlock;
1864
1865         if (vport->port_no == ODPP_LOCAL) {
1866                 err = -EINVAL;
1867                 goto exit_unlock;
1868         }
1869
1870         reply = odp_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
1871                                          ODP_VPORT_CMD_DEL);
1872         err = PTR_ERR(reply);
1873         if (IS_ERR(reply))
1874                 goto exit_unlock;
1875
1876         err = dp_detach_port(vport);
1877
1878         genl_notify(reply, genl_info_net(info), info->snd_pid,
1879                     dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1880
1881 exit_unlock:
1882         rtnl_unlock();
1883 exit:
1884         return err;
1885 }
1886
1887 static int odp_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
1888 {
1889         struct nlattr **a = info->attrs;
1890         struct odp_header *odp_header = info->userhdr;
1891         struct sk_buff *reply;
1892         struct vport *vport;
1893         int err;
1894
1895         err = odp_vport_cmd_validate(a);
1896         if (err)
1897                 goto exit;
1898
1899         rcu_read_lock();
1900         vport = lookup_vport(odp_header, a);
1901         err = PTR_ERR(vport);
1902         if (IS_ERR(vport))
1903                 goto exit_unlock;
1904
1905         reply = odp_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
1906                                          ODP_VPORT_CMD_NEW);
1907         err = PTR_ERR(reply);
1908         if (IS_ERR(reply))
1909                 goto exit_unlock;
1910
1911         err = genlmsg_reply(reply, info);
1912
1913 exit_unlock:
1914         rcu_read_unlock();
1915 exit:
1916         return err;
1917 }
1918
1919 static int odp_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1920 {
1921         struct odp_header *odp_header = genlmsg_data(nlmsg_data(cb->nlh));
1922         struct datapath *dp;
1923         u32 port_no;
1924         int retval;
1925
1926         dp = get_dp(odp_header->dp_ifindex);
1927         if (!dp)
1928                 return -ENODEV;
1929
1930         rcu_read_lock();
1931         for (port_no = cb->args[0]; port_no < DP_MAX_PORTS; port_no++) {
1932                 struct vport *vport;
1933
1934                 vport = get_vport_protected(dp, port_no);
1935                 if (!vport)
1936                         continue;
1937
1938                 if (odp_vport_cmd_fill_info(vport, skb, NETLINK_CB(cb->skb).pid,
1939                                             cb->nlh->nlmsg_seq, NLM_F_MULTI,
1940                                             ODP_VPORT_CMD_NEW) < 0)
1941                         break;
1942         }
1943         rcu_read_unlock();
1944
1945         cb->args[0] = port_no;
1946         retval = skb->len;
1947
1948         return retval;
1949 }
1950
1951 static struct genl_ops dp_vport_genl_ops[] = {
1952         { .cmd = ODP_VPORT_CMD_NEW,
1953           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1954           .policy = vport_policy,
1955           .doit = odp_vport_cmd_new
1956         },
1957         { .cmd = ODP_VPORT_CMD_DEL,
1958           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1959           .policy = vport_policy,
1960           .doit = odp_vport_cmd_del
1961         },
1962         { .cmd = ODP_VPORT_CMD_GET,
1963           .flags = 0,               /* OK for unprivileged users. */
1964           .policy = vport_policy,
1965           .doit = odp_vport_cmd_get,
1966           .dumpit = odp_vport_cmd_dump
1967         },
1968         { .cmd = ODP_VPORT_CMD_SET,
1969           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1970           .policy = vport_policy,
1971           .doit = odp_vport_cmd_set,
1972         },
1973 };
1974
1975 struct genl_family_and_ops {
1976         struct genl_family *family;
1977         struct genl_ops *ops;
1978         int n_ops;
1979         struct genl_multicast_group *group;
1980 };
1981
1982 static const struct genl_family_and_ops dp_genl_families[] = {
1983         { &dp_datapath_genl_family,
1984           dp_datapath_genl_ops, ARRAY_SIZE(dp_datapath_genl_ops),
1985           &dp_datapath_multicast_group },
1986         { &dp_vport_genl_family,
1987           dp_vport_genl_ops, ARRAY_SIZE(dp_vport_genl_ops),
1988           &dp_vport_multicast_group },
1989         { &dp_flow_genl_family,
1990           dp_flow_genl_ops, ARRAY_SIZE(dp_flow_genl_ops),
1991           &dp_flow_multicast_group },
1992         { &dp_packet_genl_family,
1993           dp_packet_genl_ops, ARRAY_SIZE(dp_packet_genl_ops),
1994           NULL },
1995 };
1996
1997 static void dp_unregister_genl(int n_families)
1998 {
1999         int i;
2000
2001         for (i = 0; i < n_families; i++) {
2002                 genl_unregister_family(dp_genl_families[i].family);
2003         }
2004 }
2005
2006 static int dp_register_genl(void)
2007 {
2008         int n_registered;
2009         int err;
2010         int i;
2011
2012         n_registered = 0;
2013         for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
2014                 const struct genl_family_and_ops *f = &dp_genl_families[i];
2015
2016                 err = genl_register_family_with_ops(f->family, f->ops,
2017                                                     f->n_ops);
2018                 if (err)
2019                         goto error;
2020                 n_registered++;
2021
2022                 if (f->group) {
2023                         err = genl_register_mc_group(f->family, f->group);
2024                         if (err)
2025                                 goto error;
2026                 }
2027         }
2028
2029         err = packet_register_mc_groups();
2030         if (err)
2031                 goto error;
2032         return 0;
2033
2034 error:
2035         dp_unregister_genl(n_registered);
2036         return err;
2037 }
2038
2039 static int __init dp_init(void)
2040 {
2041         struct sk_buff *dummy_skb;
2042         int err;
2043
2044         BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > sizeof(dummy_skb->cb));
2045
2046         printk("Open vSwitch %s, built "__DATE__" "__TIME__"\n", VERSION BUILDNR);
2047
2048         err = flow_init();
2049         if (err)
2050                 goto error;
2051
2052         err = vport_init();
2053         if (err)
2054                 goto error_flow_exit;
2055
2056         err = register_netdevice_notifier(&dp_device_notifier);
2057         if (err)
2058                 goto error_vport_exit;
2059
2060         err = dp_register_genl();
2061         if (err < 0)
2062                 goto error_unreg_notifier;
2063
2064         return 0;
2065
2066 error_unreg_notifier:
2067         unregister_netdevice_notifier(&dp_device_notifier);
2068 error_vport_exit:
2069         vport_exit();
2070 error_flow_exit:
2071         flow_exit();
2072 error:
2073         return err;
2074 }
2075
2076 static void dp_cleanup(void)
2077 {
2078         rcu_barrier();
2079         dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
2080         unregister_netdevice_notifier(&dp_device_notifier);
2081         vport_exit();
2082         flow_exit();
2083 }
2084
2085 module_init(dp_init);
2086 module_exit(dp_cleanup);
2087
2088 MODULE_DESCRIPTION("Open vSwitch switching datapath");
2089 MODULE_LICENSE("GPL");