datapath: Drop parameters from execute_actions().
[sliver-openvswitch.git] / datapath / datapath.c
1 /*
2  * Copyright (c) 2007, 2008, 2009, 2010, 2011 Nicira Networks.
3  * Distributed under the terms of the GNU GPL version 2.
4  *
5  * Significant portions of this file may be copied from parts of the Linux
6  * kernel, by Linus Torvalds and others.
7  */
8
9 /* Functions for managing the dp interface/device. */
10
11 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12
13 #include <linux/init.h>
14 #include <linux/module.h>
15 #include <linux/if_arp.h>
16 #include <linux/if_vlan.h>
17 #include <linux/in.h>
18 #include <linux/ip.h>
19 #include <linux/jhash.h>
20 #include <linux/delay.h>
21 #include <linux/time.h>
22 #include <linux/etherdevice.h>
23 #include <linux/genetlink.h>
24 #include <linux/kernel.h>
25 #include <linux/kthread.h>
26 #include <linux/mutex.h>
27 #include <linux/percpu.h>
28 #include <linux/rcupdate.h>
29 #include <linux/tcp.h>
30 #include <linux/udp.h>
31 #include <linux/version.h>
32 #include <linux/ethtool.h>
33 #include <linux/wait.h>
34 #include <asm/system.h>
35 #include <asm/div64.h>
36 #include <asm/bug.h>
37 #include <linux/highmem.h>
38 #include <linux/netfilter_bridge.h>
39 #include <linux/netfilter_ipv4.h>
40 #include <linux/inetdevice.h>
41 #include <linux/list.h>
42 #include <linux/rculist.h>
43 #include <linux/dmi.h>
44 #include <net/inet_ecn.h>
45 #include <net/genetlink.h>
46
47 #include "openvswitch/datapath-protocol.h"
48 #include "checksum.h"
49 #include "datapath.h"
50 #include "actions.h"
51 #include "flow.h"
52 #include "table.h"
53 #include "vlan.h"
54 #include "vport-internal_dev.h"
55
56 int (*dp_ioctl_hook)(struct net_device *dev, struct ifreq *rq, int cmd);
57 EXPORT_SYMBOL(dp_ioctl_hook);
58
59 /**
60  * DOC: Locking:
61  *
62  * Writes to device state (add/remove datapath, port, set operations on vports,
63  * etc.) are protected by RTNL.
64  *
65  * Writes to other state (flow table modifications, set miscellaneous datapath
66  * parameters such as drop frags, etc.) are protected by genl_mutex.  The RTNL
67  * lock nests inside genl_mutex.
68  *
69  * Reads are protected by RCU.
70  *
71  * There are a few special cases (mostly stats) that have their own
72  * synchronization but they nest under all of above and don't interact with
73  * each other.
74  */
75
76 /* Global list of datapaths to enable dumping them all out.
77  * Protected by genl_mutex.
78  */
79 static LIST_HEAD(dps);
80
81 static struct vport *new_vport(const struct vport_parms *);
82 static int queue_control_packets(struct datapath *, struct sk_buff *,
83                                  const struct dp_upcall_info *);
84
85 /* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */
86 struct datapath *get_dp(int dp_ifindex)
87 {
88         struct datapath *dp = NULL;
89         struct net_device *dev;
90
91         rcu_read_lock();
92         dev = dev_get_by_index_rcu(&init_net, dp_ifindex);
93         if (dev) {
94                 struct vport *vport = internal_dev_get_vport(dev);
95                 if (vport)
96                         dp = vport->dp;
97         }
98         rcu_read_unlock();
99
100         return dp;
101 }
102 EXPORT_SYMBOL_GPL(get_dp);
103
104 /* Must be called with genl_mutex. */
105 static struct tbl *get_table_protected(struct datapath *dp)
106 {
107         return rcu_dereference_protected(dp->table, lockdep_genl_is_held());
108 }
109
110 /* Must be called with rcu_read_lock or RTNL lock. */
111 static struct vport *get_vport_protected(struct datapath *dp, u16 port_no)
112 {
113         return rcu_dereference_rtnl(dp->ports[port_no]);
114 }
115
116 /* Must be called with rcu_read_lock or RTNL lock. */
117 const char *dp_name(const struct datapath *dp)
118 {
119         return vport_get_name(rcu_dereference_rtnl(dp->ports[ODPP_LOCAL]));
120 }
121
122 static inline size_t br_nlmsg_size(void)
123 {
124         return NLMSG_ALIGN(sizeof(struct ifinfomsg))
125                + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
126                + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
127                + nla_total_size(4) /* IFLA_MASTER */
128                + nla_total_size(4) /* IFLA_MTU */
129                + nla_total_size(4) /* IFLA_LINK */
130                + nla_total_size(1); /* IFLA_OPERSTATE */
131 }
132
133 /* Caller must hold RTNL lock. */
134 static int dp_fill_ifinfo(struct sk_buff *skb,
135                           const struct vport *port,
136                           int event, unsigned int flags)
137 {
138         struct datapath *dp = port->dp;
139         int ifindex = vport_get_ifindex(port);
140         int iflink = vport_get_iflink(port);
141         struct ifinfomsg *hdr;
142         struct nlmsghdr *nlh;
143
144         if (ifindex < 0)
145                 return ifindex;
146
147         if (iflink < 0)
148                 return iflink;
149
150         nlh = nlmsg_put(skb, 0, 0, event, sizeof(*hdr), flags);
151         if (nlh == NULL)
152                 return -EMSGSIZE;
153
154         hdr = nlmsg_data(nlh);
155         hdr->ifi_family = AF_BRIDGE;
156         hdr->__ifi_pad = 0;
157         hdr->ifi_type = ARPHRD_ETHER;
158         hdr->ifi_index = ifindex;
159         hdr->ifi_flags = vport_get_flags(port);
160         hdr->ifi_change = 0;
161
162         NLA_PUT_STRING(skb, IFLA_IFNAME, vport_get_name(port));
163         NLA_PUT_U32(skb, IFLA_MASTER,
164                 vport_get_ifindex(get_vport_protected(dp, ODPP_LOCAL)));
165         NLA_PUT_U32(skb, IFLA_MTU, vport_get_mtu(port));
166 #ifdef IFLA_OPERSTATE
167         NLA_PUT_U8(skb, IFLA_OPERSTATE,
168                    vport_is_running(port)
169                         ? vport_get_operstate(port)
170                         : IF_OPER_DOWN);
171 #endif
172
173         NLA_PUT(skb, IFLA_ADDRESS, ETH_ALEN, vport_get_addr(port));
174
175         if (ifindex != iflink)
176                 NLA_PUT_U32(skb, IFLA_LINK,iflink);
177
178         return nlmsg_end(skb, nlh);
179
180 nla_put_failure:
181         nlmsg_cancel(skb, nlh);
182         return -EMSGSIZE;
183 }
184
185 /* Caller must hold RTNL lock. */
186 static void dp_ifinfo_notify(int event, struct vport *port)
187 {
188         struct sk_buff *skb;
189         int err = -ENOBUFS;
190
191         skb = nlmsg_new(br_nlmsg_size(), GFP_KERNEL);
192         if (skb == NULL)
193                 goto errout;
194
195         err = dp_fill_ifinfo(skb, port, event, 0);
196         if (err < 0) {
197                 /* -EMSGSIZE implies BUG in br_nlmsg_size() */
198                 WARN_ON(err == -EMSGSIZE);
199                 kfree_skb(skb);
200                 goto errout;
201         }
202         rtnl_notify(skb, &init_net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
203         return;
204 errout:
205         if (err < 0)
206                 rtnl_set_sk_err(&init_net, RTNLGRP_LINK, err);
207 }
208
209 static void release_dp(struct kobject *kobj)
210 {
211         struct datapath *dp = container_of(kobj, struct datapath, ifobj);
212         kfree(dp);
213 }
214
215 static struct kobj_type dp_ktype = {
216         .release = release_dp
217 };
218
219 static void destroy_dp_rcu(struct rcu_head *rcu)
220 {
221         struct datapath *dp = container_of(rcu, struct datapath, rcu);
222
223         tbl_destroy((struct tbl __force *)dp->table, flow_free_tbl);
224         free_percpu(dp->stats_percpu);
225         kobject_put(&dp->ifobj);
226 }
227
228 /* Called with RTNL lock and genl_lock. */
229 static struct vport *new_vport(const struct vport_parms *parms)
230 {
231         struct vport *vport;
232
233         vport = vport_add(parms);
234         if (!IS_ERR(vport)) {
235                 struct datapath *dp = parms->dp;
236
237                 rcu_assign_pointer(dp->ports[parms->port_no], vport);
238                 list_add(&vport->node, &dp->port_list);
239
240                 dp_ifinfo_notify(RTM_NEWLINK, vport);
241         }
242
243         return vport;
244 }
245
246 /* Called with RTNL lock. */
247 int dp_detach_port(struct vport *p)
248 {
249         ASSERT_RTNL();
250
251         if (p->port_no != ODPP_LOCAL)
252                 dp_sysfs_del_if(p);
253         dp_ifinfo_notify(RTM_DELLINK, p);
254
255         /* First drop references to device. */
256         list_del(&p->node);
257         rcu_assign_pointer(p->dp->ports[p->port_no], NULL);
258
259         /* Then destroy it. */
260         return vport_del(p);
261 }
262
263 /* Must be called with rcu_read_lock. */
264 void dp_process_received_packet(struct vport *p, struct sk_buff *skb)
265 {
266         struct datapath *dp = p->dp;
267         struct dp_stats_percpu *stats;
268         int stats_counter_off;
269         int error;
270
271         OVS_CB(skb)->vport = p;
272
273         if (!OVS_CB(skb)->flow) {
274                 struct sw_flow_key key;
275                 struct tbl_node *flow_node;
276                 bool is_frag;
277
278                 /* Extract flow from 'skb' into 'key'. */
279                 error = flow_extract(skb, p->port_no, &key, &is_frag);
280                 if (unlikely(error)) {
281                         kfree_skb(skb);
282                         return;
283                 }
284
285                 if (is_frag && dp->drop_frags) {
286                         kfree_skb(skb);
287                         stats_counter_off = offsetof(struct dp_stats_percpu, n_frags);
288                         goto out;
289                 }
290
291                 /* Look up flow. */
292                 flow_node = tbl_lookup(rcu_dereference(dp->table), &key,
293                                         flow_hash(&key), flow_cmp);
294                 if (unlikely(!flow_node)) {
295                         struct dp_upcall_info upcall;
296
297                         upcall.cmd = ODP_PACKET_CMD_MISS;
298                         upcall.key = &key;
299                         upcall.userdata = 0;
300                         upcall.sample_pool = 0;
301                         upcall.actions = NULL;
302                         upcall.actions_len = 0;
303                         dp_upcall(dp, skb, &upcall);
304                         stats_counter_off = offsetof(struct dp_stats_percpu, n_missed);
305                         goto out;
306                 }
307
308                 OVS_CB(skb)->flow = flow_cast(flow_node);
309         }
310
311         stats_counter_off = offsetof(struct dp_stats_percpu, n_hit);
312         flow_used(OVS_CB(skb)->flow, skb);
313         execute_actions(dp, skb);
314
315 out:
316         /* Update datapath statistics. */
317         local_bh_disable();
318         stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
319
320         write_seqcount_begin(&stats->seqlock);
321         (*(u64 *)((u8 *)stats + stats_counter_off))++;
322         write_seqcount_end(&stats->seqlock);
323
324         local_bh_enable();
325 }
326
327 static void copy_and_csum_skb(struct sk_buff *skb, void *to)
328 {
329         u16 csum_start, csum_offset;
330         __wsum csum;
331
332         get_skb_csum_pointers(skb, &csum_start, &csum_offset);
333         csum_start -= skb_headroom(skb);
334
335         skb_copy_bits(skb, 0, to, csum_start);
336
337         csum = skb_copy_and_csum_bits(skb, csum_start, to + csum_start,
338                                       skb->len - csum_start, 0);
339         *(__sum16 *)(to + csum_start + csum_offset) = csum_fold(csum);
340 }
341
342 static struct genl_family dp_packet_genl_family = {
343         .id = GENL_ID_GENERATE,
344         .hdrsize = sizeof(struct odp_header),
345         .name = ODP_PACKET_FAMILY,
346         .version = 1,
347         .maxattr = ODP_PACKET_ATTR_MAX
348 };
349
350 /* Generic Netlink multicast groups for upcalls.
351  *
352  * We really want three unique multicast groups per datapath, but we can't even
353  * get one, because genl_register_mc_group() takes genl_lock, which is also
354  * held during Generic Netlink message processing, so trying to acquire
355  * multicast groups during ODP_DP_NEW processing deadlocks.  Instead, we
356  * preallocate a few groups and use them round-robin for datapaths.  Collision
357  * isn't fatal--multicast listeners should check that the family is the one
358  * that they want and discard others--but it wastes time and memory to receive
359  * unwanted messages.
360  */
361 #define PACKET_N_MC_GROUPS 16
362 static struct genl_multicast_group packet_mc_groups[PACKET_N_MC_GROUPS];
363
364 static u32 packet_mc_group(struct datapath *dp, u8 cmd)
365 {
366         u32 idx;
367         BUILD_BUG_ON_NOT_POWER_OF_2(PACKET_N_MC_GROUPS);
368
369         idx = jhash_2words(dp->dp_ifindex, cmd, 0) & (PACKET_N_MC_GROUPS - 1);
370         return packet_mc_groups[idx].id;
371 }
372
373 static int packet_register_mc_groups(void)
374 {
375         int i;
376
377         for (i = 0; i < PACKET_N_MC_GROUPS; i++) {
378                 struct genl_multicast_group *group = &packet_mc_groups[i];
379                 int error;
380
381                 sprintf(group->name, "packet%d", i);
382                 error = genl_register_mc_group(&dp_packet_genl_family, group);
383                 if (error)
384                         return error;
385         }
386         return 0;
387 }
388
389 int dp_upcall(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info)
390 {
391         struct dp_stats_percpu *stats;
392         int err;
393
394         WARN_ON_ONCE(skb_shared(skb));
395
396         forward_ip_summed(skb);
397
398         err = vswitch_skb_checksum_setup(skb);
399         if (err)
400                 goto err_kfree_skb;
401
402         /* Break apart GSO packets into their component pieces.  Otherwise
403          * userspace may try to stuff a 64kB packet into a 1500-byte MTU. */
404         if (skb_is_gso(skb)) {
405                 struct sk_buff *nskb = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM);
406                 
407                 kfree_skb(skb);
408                 skb = nskb;
409                 if (IS_ERR(skb)) {
410                         err = PTR_ERR(skb);
411                         goto err;
412                 }
413         }
414
415         err = queue_control_packets(dp, skb, upcall_info);
416         if (err)
417                 goto err;
418
419         return 0;
420
421 err_kfree_skb:
422         kfree_skb(skb);
423 err:
424         local_bh_disable();
425         stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
426
427         write_seqcount_begin(&stats->seqlock);
428         stats->n_lost++;
429         write_seqcount_end(&stats->seqlock);
430
431         local_bh_enable();
432
433         return err;
434 }
435
436 /* Send each packet in the 'skb' list to userspace for 'dp' as directed by
437  * 'upcall_info'.  There will be only one packet unless we broke up a GSO
438  * packet.
439  */
440 static int queue_control_packets(struct datapath *dp, struct sk_buff *skb,
441                                  const struct dp_upcall_info *upcall_info)
442 {
443         u32 group = packet_mc_group(dp, upcall_info->cmd);
444         struct sk_buff *nskb;
445         int port_no;
446         int err;
447
448         if (OVS_CB(skb)->vport)
449                 port_no = OVS_CB(skb)->vport->port_no;
450         else
451                 port_no = ODPP_LOCAL;
452
453         do {
454                 struct odp_header *upcall;
455                 struct sk_buff *user_skb; /* to be queued to userspace */
456                 struct nlattr *nla;
457                 unsigned int len;
458
459                 nskb = skb->next;
460                 skb->next = NULL;
461
462                 err = vlan_deaccel_tag(skb);
463                 if (unlikely(err))
464                         goto err_kfree_skbs;
465
466                 if (nla_attr_size(skb->len) > USHRT_MAX)
467                         goto err_kfree_skbs;
468
469                 len = sizeof(struct odp_header);
470                 len += nla_total_size(skb->len);
471                 len += nla_total_size(FLOW_BUFSIZE);
472                 if (upcall_info->userdata)
473                         len += nla_total_size(8);
474                 if (upcall_info->sample_pool)
475                         len += nla_total_size(4);
476                 if (upcall_info->actions_len)
477                         len += nla_total_size(upcall_info->actions_len);
478
479                 user_skb = genlmsg_new(len, GFP_ATOMIC);
480                 if (!user_skb) {
481                         netlink_set_err(INIT_NET_GENL_SOCK, 0, group, -ENOBUFS);
482                         goto err_kfree_skbs;
483                 }
484
485                 upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family, 0, upcall_info->cmd);
486                 upcall->dp_ifindex = dp->dp_ifindex;
487
488                 nla = nla_nest_start(user_skb, ODP_PACKET_ATTR_KEY);
489                 flow_to_nlattrs(upcall_info->key, user_skb);
490                 nla_nest_end(user_skb, nla);
491
492                 if (upcall_info->userdata)
493                         nla_put_u64(user_skb, ODP_PACKET_ATTR_USERDATA, upcall_info->userdata);
494                 if (upcall_info->sample_pool)
495                         nla_put_u32(user_skb, ODP_PACKET_ATTR_SAMPLE_POOL, upcall_info->sample_pool);
496                 if (upcall_info->actions_len) {
497                         const struct nlattr *actions = upcall_info->actions;
498                         u32 actions_len = upcall_info->actions_len;
499
500                         nla = nla_nest_start(user_skb, ODP_PACKET_ATTR_ACTIONS);
501                         memcpy(__skb_put(user_skb, actions_len), actions, actions_len);
502                         nla_nest_end(user_skb, nla);
503                 }
504
505                 nla = __nla_reserve(user_skb, ODP_PACKET_ATTR_PACKET, skb->len);
506                 if (skb->ip_summed == CHECKSUM_PARTIAL)
507                         copy_and_csum_skb(skb, nla_data(nla));
508                 else
509                         skb_copy_bits(skb, 0, nla_data(nla), skb->len);
510
511                 err = genlmsg_multicast(user_skb, 0, group, GFP_ATOMIC);
512                 if (err)
513                         goto err_kfree_skbs;
514
515                 kfree_skb(skb);
516                 skb = nskb;
517         } while (skb);
518         return 0;
519
520 err_kfree_skbs:
521         kfree_skb(skb);
522         while ((skb = nskb) != NULL) {
523                 nskb = skb->next;
524                 kfree_skb(skb);
525         }
526         return err;
527 }
528
529 /* Called with genl_mutex. */
530 static int flush_flows(int dp_ifindex)
531 {
532         struct tbl *old_table;
533         struct tbl *new_table;
534         struct datapath *dp;
535
536         dp = get_dp(dp_ifindex);
537         if (!dp)
538                 return -ENODEV;
539
540         old_table = get_table_protected(dp);
541         new_table = tbl_create(TBL_MIN_BUCKETS);
542         if (!new_table)
543                 return -ENOMEM;
544
545         rcu_assign_pointer(dp->table, new_table);
546
547         tbl_deferred_destroy(old_table, flow_free_tbl);
548
549         return 0;
550 }
551
552 static int validate_actions(const struct nlattr *attr)
553 {
554         const struct nlattr *a;
555         int rem;
556
557         nla_for_each_nested(a, attr, rem) {
558                 static const u32 action_lens[ODP_ACTION_ATTR_MAX + 1] = {
559                         [ODP_ACTION_ATTR_OUTPUT] = 4,
560                         [ODP_ACTION_ATTR_CONTROLLER] = 8,
561                         [ODP_ACTION_ATTR_SET_DL_TCI] = 2,
562                         [ODP_ACTION_ATTR_STRIP_VLAN] = 0,
563                         [ODP_ACTION_ATTR_SET_DL_SRC] = ETH_ALEN,
564                         [ODP_ACTION_ATTR_SET_DL_DST] = ETH_ALEN,
565                         [ODP_ACTION_ATTR_SET_NW_SRC] = 4,
566                         [ODP_ACTION_ATTR_SET_NW_DST] = 4,
567                         [ODP_ACTION_ATTR_SET_NW_TOS] = 1,
568                         [ODP_ACTION_ATTR_SET_TP_SRC] = 2,
569                         [ODP_ACTION_ATTR_SET_TP_DST] = 2,
570                         [ODP_ACTION_ATTR_SET_TUNNEL] = 8,
571                         [ODP_ACTION_ATTR_SET_PRIORITY] = 4,
572                         [ODP_ACTION_ATTR_POP_PRIORITY] = 0,
573                         [ODP_ACTION_ATTR_DROP_SPOOFED_ARP] = 0,
574                 };
575                 int type = nla_type(a);
576
577                 if (type > ODP_ACTION_ATTR_MAX || nla_len(a) != action_lens[type])
578                         return -EINVAL;
579
580                 switch (type) {
581                 case ODP_ACTION_ATTR_UNSPEC:
582                         return -EINVAL;
583
584                 case ODP_ACTION_ATTR_CONTROLLER:
585                 case ODP_ACTION_ATTR_STRIP_VLAN:
586                 case ODP_ACTION_ATTR_SET_DL_SRC:
587                 case ODP_ACTION_ATTR_SET_DL_DST:
588                 case ODP_ACTION_ATTR_SET_NW_SRC:
589                 case ODP_ACTION_ATTR_SET_NW_DST:
590                 case ODP_ACTION_ATTR_SET_TP_SRC:
591                 case ODP_ACTION_ATTR_SET_TP_DST:
592                 case ODP_ACTION_ATTR_SET_TUNNEL:
593                 case ODP_ACTION_ATTR_SET_PRIORITY:
594                 case ODP_ACTION_ATTR_POP_PRIORITY:
595                 case ODP_ACTION_ATTR_DROP_SPOOFED_ARP:
596                         /* No validation needed. */
597                         break;
598
599                 case ODP_ACTION_ATTR_OUTPUT:
600                         if (nla_get_u32(a) >= DP_MAX_PORTS)
601                                 return -EINVAL;
602                         break;
603
604                 case ODP_ACTION_ATTR_SET_DL_TCI:
605                         if (nla_get_be16(a) & htons(VLAN_CFI_MASK))
606                                 return -EINVAL;
607                         break;
608
609                 case ODP_ACTION_ATTR_SET_NW_TOS:
610                         if (nla_get_u8(a) & INET_ECN_MASK)
611                                 return -EINVAL;
612                         break;
613
614                 default:
615                         return -EOPNOTSUPP;
616                 }
617         }
618
619         if (rem > 0)
620                 return -EINVAL;
621
622         return 0;
623 }
624 static void clear_stats(struct sw_flow *flow)
625 {
626         flow->used = 0;
627         flow->tcp_flags = 0;
628         flow->packet_count = 0;
629         flow->byte_count = 0;
630 }
631
632 /* Called with genl_mutex. */
633 static int expand_table(struct datapath *dp)
634 {
635         struct tbl *old_table = get_table_protected(dp);
636         struct tbl *new_table;
637
638         new_table = tbl_expand(old_table);
639         if (IS_ERR(new_table))
640                 return PTR_ERR(new_table);
641
642         rcu_assign_pointer(dp->table, new_table);
643         tbl_deferred_destroy(old_table, NULL);
644
645         return 0;
646 }
647
648 static int odp_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
649 {
650         struct odp_header *odp_header = info->userhdr;
651         struct nlattr **a = info->attrs;
652         struct sw_flow_actions *acts;
653         struct sk_buff *packet;
654         struct sw_flow *flow;
655         struct datapath *dp;
656         struct ethhdr *eth;
657         bool is_frag;
658         int len;
659         int err;
660
661         err = -EINVAL;
662         if (!a[ODP_PACKET_ATTR_PACKET] || !a[ODP_PACKET_ATTR_ACTIONS] ||
663             nla_len(a[ODP_PACKET_ATTR_PACKET]) < ETH_HLEN)
664                 goto err;
665
666         err = validate_actions(a[ODP_PACKET_ATTR_ACTIONS]);
667         if (err)
668                 goto err;
669
670         len = nla_len(a[ODP_PACKET_ATTR_PACKET]);
671         packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
672         err = -ENOMEM;
673         if (!packet)
674                 goto err;
675         skb_reserve(packet, NET_IP_ALIGN);
676
677         memcpy(__skb_put(packet, len), nla_data(a[ODP_PACKET_ATTR_PACKET]), len);
678
679         skb_reset_mac_header(packet);
680         eth = eth_hdr(packet);
681
682         /* Normally, setting the skb 'protocol' field would be handled by a
683          * call to eth_type_trans(), but it assumes there's a sending
684          * device, which we may not have. */
685         if (ntohs(eth->h_proto) >= 1536)
686                 packet->protocol = eth->h_proto;
687         else
688                 packet->protocol = htons(ETH_P_802_2);
689
690         /* Build an sw_flow for sending this packet. */
691         flow = flow_alloc();
692         err = PTR_ERR(flow);
693         if (IS_ERR(flow))
694                 goto err_kfree_skb;
695
696         err = flow_extract(packet, -1, &flow->key, &is_frag);
697         if (err)
698                 goto err_flow_put;
699         flow->tbl_node.hash = flow_hash(&flow->key);
700
701         acts = flow_actions_alloc(a[ODP_PACKET_ATTR_ACTIONS]);
702         err = PTR_ERR(acts);
703         if (IS_ERR(acts))
704                 goto err_flow_put;
705         rcu_assign_pointer(flow->sf_acts, acts);
706
707         OVS_CB(packet)->flow = flow;
708
709         rcu_read_lock();
710         dp = get_dp(odp_header->dp_ifindex);
711         err = -ENODEV;
712         if (!dp)
713                 goto err_unlock;
714         err = execute_actions(dp, packet);
715         rcu_read_unlock();
716
717         flow_put(flow);
718         return err;
719
720 err_unlock:
721         rcu_read_unlock();
722 err_flow_put:
723         flow_put(flow);
724 err_kfree_skb:
725         kfree_skb(packet);
726 err:
727         return err;
728 }
729
730 static const struct nla_policy packet_policy[ODP_PACKET_ATTR_MAX + 1] = {
731         [ODP_PACKET_ATTR_PACKET] = { .type = NLA_UNSPEC },
732         [ODP_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
733 };
734
735 static struct genl_ops dp_packet_genl_ops[] = {
736         { .cmd = ODP_PACKET_CMD_EXECUTE,
737           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
738           .policy = packet_policy,
739           .doit = odp_packet_cmd_execute
740         }
741 };
742
743 static void get_dp_stats(struct datapath *dp, struct odp_stats *stats)
744 {
745         int i;
746
747         stats->n_frags = stats->n_hit = stats->n_missed = stats->n_lost = 0;
748         for_each_possible_cpu(i) {
749                 const struct dp_stats_percpu *percpu_stats;
750                 struct dp_stats_percpu local_stats;
751                 unsigned seqcount;
752
753                 percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
754
755                 do {
756                         seqcount = read_seqcount_begin(&percpu_stats->seqlock);
757                         local_stats = *percpu_stats;
758                 } while (read_seqcount_retry(&percpu_stats->seqlock, seqcount));
759
760                 stats->n_frags += local_stats.n_frags;
761                 stats->n_hit += local_stats.n_hit;
762                 stats->n_missed += local_stats.n_missed;
763                 stats->n_lost += local_stats.n_lost;
764         }
765 }
766
767 /* MTU of the dp pseudo-device: ETH_DATA_LEN or the minimum of the ports.
768  * Called with RTNL lock.
769  */
770 int dp_min_mtu(const struct datapath *dp)
771 {
772         struct vport *p;
773         int mtu = 0;
774
775         ASSERT_RTNL();
776
777         list_for_each_entry (p, &dp->port_list, node) {
778                 int dev_mtu;
779
780                 /* Skip any internal ports, since that's what we're trying to
781                  * set. */
782                 if (is_internal_vport(p))
783                         continue;
784
785                 dev_mtu = vport_get_mtu(p);
786                 if (!dev_mtu)
787                         continue;
788                 if (!mtu || dev_mtu < mtu)
789                         mtu = dev_mtu;
790         }
791
792         return mtu ? mtu : ETH_DATA_LEN;
793 }
794
795 /* Sets the MTU of all datapath devices to the minimum of the ports
796  * Called with RTNL lock.
797  */
798 void set_internal_devs_mtu(const struct datapath *dp)
799 {
800         struct vport *p;
801         int mtu;
802
803         ASSERT_RTNL();
804
805         mtu = dp_min_mtu(dp);
806
807         list_for_each_entry (p, &dp->port_list, node) {
808                 if (is_internal_vport(p))
809                         vport_set_mtu(p, mtu);
810         }
811 }
812
813 static const struct nla_policy flow_policy[ODP_FLOW_ATTR_MAX + 1] = {
814         [ODP_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
815         [ODP_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
816         [ODP_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
817 };
818
819 static struct genl_family dp_flow_genl_family = {
820         .id = GENL_ID_GENERATE,
821         .hdrsize = sizeof(struct odp_header),
822         .name = ODP_FLOW_FAMILY,
823         .version = 1,
824         .maxattr = ODP_FLOW_ATTR_MAX
825 };
826
827 static struct genl_multicast_group dp_flow_multicast_group = {
828         .name = ODP_FLOW_MCGROUP
829 };
830
831 /* Called with genl_lock. */
832 static int odp_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
833                                   struct sk_buff *skb, u32 pid, u32 seq, u32 flags, u8 cmd)
834 {
835         const int skb_orig_len = skb->len;
836         const struct sw_flow_actions *sf_acts;
837         struct odp_flow_stats stats;
838         struct odp_header *odp_header;
839         struct nlattr *nla;
840         unsigned long used;
841         u8 tcp_flags;
842         int err;
843
844         sf_acts = rcu_dereference_protected(flow->sf_acts,
845                                             lockdep_genl_is_held());
846
847         odp_header = genlmsg_put(skb, pid, seq, &dp_flow_genl_family, flags, cmd);
848         if (!odp_header)
849                 return -EMSGSIZE;
850
851         odp_header->dp_ifindex = dp->dp_ifindex;
852
853         nla = nla_nest_start(skb, ODP_FLOW_ATTR_KEY);
854         if (!nla)
855                 goto nla_put_failure;
856         err = flow_to_nlattrs(&flow->key, skb);
857         if (err)
858                 goto error;
859         nla_nest_end(skb, nla);
860
861         spin_lock_bh(&flow->lock);
862         used = flow->used;
863         stats.n_packets = flow->packet_count;
864         stats.n_bytes = flow->byte_count;
865         tcp_flags = flow->tcp_flags;
866         spin_unlock_bh(&flow->lock);
867
868         if (used)
869                 NLA_PUT_U64(skb, ODP_FLOW_ATTR_USED, flow_used_time(used));
870
871         if (stats.n_packets)
872                 NLA_PUT(skb, ODP_FLOW_ATTR_STATS, sizeof(struct odp_flow_stats), &stats);
873
874         if (tcp_flags)
875                 NLA_PUT_U8(skb, ODP_FLOW_ATTR_TCP_FLAGS, tcp_flags);
876
877         /* If ODP_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
878          * this is the first flow to be dumped into 'skb'.  This is unusual for
879          * Netlink but individual action lists can be longer than
880          * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
881          * The userspace caller can always fetch the actions separately if it
882          * really wants them.  (Most userspace callers in fact don't care.)
883          *
884          * This can only fail for dump operations because the skb is always
885          * properly sized for single flows.
886          */
887         err = nla_put(skb, ODP_FLOW_ATTR_ACTIONS, sf_acts->actions_len,
888                       sf_acts->actions);
889         if (err < 0 && skb_orig_len)
890                 goto error;
891
892         return genlmsg_end(skb, odp_header);
893
894 nla_put_failure:
895         err = -EMSGSIZE;
896 error:
897         genlmsg_cancel(skb, odp_header);
898         return err;
899 }
900
901 static struct sk_buff *odp_flow_cmd_alloc_info(struct sw_flow *flow)
902 {
903         const struct sw_flow_actions *sf_acts;
904         int len;
905
906         sf_acts = rcu_dereference_protected(flow->sf_acts,
907                                             lockdep_genl_is_held());
908
909         len = nla_total_size(FLOW_BUFSIZE); /* ODP_FLOW_ATTR_KEY */
910         len += nla_total_size(sf_acts->actions_len); /* ODP_FLOW_ATTR_ACTIONS */
911         len += nla_total_size(sizeof(struct odp_flow_stats)); /* ODP_FLOW_ATTR_STATS */
912         len += nla_total_size(1); /* ODP_FLOW_ATTR_TCP_FLAGS */
913         len += nla_total_size(8); /* ODP_FLOW_ATTR_USED */
914         return genlmsg_new(NLMSG_ALIGN(sizeof(struct odp_header)) + len, GFP_KERNEL);
915 }
916
917 static struct sk_buff *odp_flow_cmd_build_info(struct sw_flow *flow, struct datapath *dp,
918                                                u32 pid, u32 seq, u8 cmd)
919 {
920         struct sk_buff *skb;
921         int retval;
922
923         skb = odp_flow_cmd_alloc_info(flow);
924         if (!skb)
925                 return ERR_PTR(-ENOMEM);
926
927         retval = odp_flow_cmd_fill_info(flow, dp, skb, pid, seq, 0, cmd);
928         BUG_ON(retval < 0);
929         return skb;
930 }
931
932 static int odp_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
933 {
934         struct nlattr **a = info->attrs;
935         struct odp_header *odp_header = info->userhdr;
936         struct tbl_node *flow_node;
937         struct sw_flow_key key;
938         struct sw_flow *flow;
939         struct sk_buff *reply;
940         struct datapath *dp;
941         struct tbl *table;
942         u32 hash;
943         int error;
944
945         /* Extract key. */
946         error = -EINVAL;
947         if (!a[ODP_FLOW_ATTR_KEY])
948                 goto error;
949         error = flow_from_nlattrs(&key, a[ODP_FLOW_ATTR_KEY]);
950         if (error)
951                 goto error;
952
953         /* Validate actions. */
954         if (a[ODP_FLOW_ATTR_ACTIONS]) {
955                 error = validate_actions(a[ODP_FLOW_ATTR_ACTIONS]);
956                 if (error)
957                         goto error;
958         } else if (info->genlhdr->cmd == ODP_FLOW_CMD_NEW) {
959                 error = -EINVAL;
960                 goto error;
961         }
962
963         dp = get_dp(odp_header->dp_ifindex);
964         error = -ENODEV;
965         if (!dp)
966                 goto error;
967
968         hash = flow_hash(&key);
969         table = get_table_protected(dp);
970         flow_node = tbl_lookup(table, &key, hash, flow_cmp);
971         if (!flow_node) {
972                 struct sw_flow_actions *acts;
973
974                 /* Bail out if we're not allowed to create a new flow. */
975                 error = -ENOENT;
976                 if (info->genlhdr->cmd == ODP_FLOW_CMD_SET)
977                         goto error;
978
979                 /* Expand table, if necessary, to make room. */
980                 if (tbl_count(table) >= tbl_n_buckets(table)) {
981                         error = expand_table(dp);
982                         if (error)
983                                 goto error;
984                         table = get_table_protected(dp);
985                 }
986
987                 /* Allocate flow. */
988                 flow = flow_alloc();
989                 if (IS_ERR(flow)) {
990                         error = PTR_ERR(flow);
991                         goto error;
992                 }
993                 flow->key = key;
994                 clear_stats(flow);
995
996                 /* Obtain actions. */
997                 acts = flow_actions_alloc(a[ODP_FLOW_ATTR_ACTIONS]);
998                 error = PTR_ERR(acts);
999                 if (IS_ERR(acts))
1000                         goto error_free_flow;
1001                 rcu_assign_pointer(flow->sf_acts, acts);
1002
1003                 /* Put flow in bucket. */
1004                 error = tbl_insert(table, &flow->tbl_node, hash);
1005                 if (error)
1006                         goto error_free_flow;
1007
1008                 reply = odp_flow_cmd_build_info(flow, dp, info->snd_pid,
1009                                                 info->snd_seq, ODP_FLOW_CMD_NEW);
1010         } else {
1011                 /* We found a matching flow. */
1012                 struct sw_flow_actions *old_acts;
1013
1014                 /* Bail out if we're not allowed to modify an existing flow.
1015                  * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
1016                  * because Generic Netlink treats the latter as a dump
1017                  * request.  We also accept NLM_F_EXCL in case that bug ever
1018                  * gets fixed.
1019                  */
1020                 error = -EEXIST;
1021                 if (info->genlhdr->cmd == ODP_FLOW_CMD_NEW &&
1022                     info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
1023                         goto error;
1024
1025                 /* Update actions. */
1026                 flow = flow_cast(flow_node);
1027                 old_acts = rcu_dereference_protected(flow->sf_acts,
1028                                                      lockdep_genl_is_held());
1029                 if (a[ODP_FLOW_ATTR_ACTIONS] &&
1030                     (old_acts->actions_len != nla_len(a[ODP_FLOW_ATTR_ACTIONS]) ||
1031                      memcmp(old_acts->actions, nla_data(a[ODP_FLOW_ATTR_ACTIONS]),
1032                             old_acts->actions_len))) {
1033                         struct sw_flow_actions *new_acts;
1034
1035                         new_acts = flow_actions_alloc(a[ODP_FLOW_ATTR_ACTIONS]);
1036                         error = PTR_ERR(new_acts);
1037                         if (IS_ERR(new_acts))
1038                                 goto error;
1039
1040                         rcu_assign_pointer(flow->sf_acts, new_acts);
1041                         flow_deferred_free_acts(old_acts);
1042                 }
1043
1044                 reply = odp_flow_cmd_build_info(flow, dp, info->snd_pid,
1045                                                 info->snd_seq, ODP_FLOW_CMD_NEW);
1046
1047                 /* Clear stats. */
1048                 if (a[ODP_FLOW_ATTR_CLEAR]) {
1049                         spin_lock_bh(&flow->lock);
1050                         clear_stats(flow);
1051                         spin_unlock_bh(&flow->lock);
1052                 }
1053         }
1054
1055         if (!IS_ERR(reply))
1056                 genl_notify(reply, genl_info_net(info), info->snd_pid,
1057                             dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL);
1058         else
1059                 netlink_set_err(INIT_NET_GENL_SOCK, 0,
1060                                 dp_flow_multicast_group.id, PTR_ERR(reply));
1061         return 0;
1062
1063 error_free_flow:
1064         flow_put(flow);
1065 error:
1066         return error;
1067 }
1068
1069 static int odp_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1070 {
1071         struct nlattr **a = info->attrs;
1072         struct odp_header *odp_header = info->userhdr;
1073         struct sw_flow_key key;
1074         struct tbl_node *flow_node;
1075         struct sk_buff *reply;
1076         struct sw_flow *flow;
1077         struct datapath *dp;
1078         struct tbl *table;
1079         int err;
1080
1081         if (!a[ODP_FLOW_ATTR_KEY])
1082                 return -EINVAL;
1083         err = flow_from_nlattrs(&key, a[ODP_FLOW_ATTR_KEY]);
1084         if (err)
1085                 return err;
1086
1087         dp = get_dp(odp_header->dp_ifindex);
1088         if (!dp)
1089                 return -ENODEV;
1090
1091         table = get_table_protected(dp);
1092         flow_node = tbl_lookup(table, &key, flow_hash(&key), flow_cmp);
1093         if (!flow_node)
1094                 return -ENOENT;
1095
1096         flow = flow_cast(flow_node);
1097         reply = odp_flow_cmd_build_info(flow, dp, info->snd_pid, info->snd_seq, ODP_FLOW_CMD_NEW);
1098         if (IS_ERR(reply))
1099                 return PTR_ERR(reply);
1100
1101         return genlmsg_reply(reply, info);
1102 }
1103
1104 static int odp_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1105 {
1106         struct nlattr **a = info->attrs;
1107         struct odp_header *odp_header = info->userhdr;
1108         struct sw_flow_key key;
1109         struct tbl_node *flow_node;
1110         struct sk_buff *reply;
1111         struct sw_flow *flow;
1112         struct datapath *dp;
1113         struct tbl *table;
1114         int err;
1115
1116         if (!a[ODP_FLOW_ATTR_KEY])
1117                 return flush_flows(odp_header->dp_ifindex);
1118         err = flow_from_nlattrs(&key, a[ODP_FLOW_ATTR_KEY]);
1119         if (err)
1120                 return err;
1121
1122         dp = get_dp(odp_header->dp_ifindex);
1123         if (!dp)
1124                 return -ENODEV;
1125
1126         table = get_table_protected(dp);
1127         flow_node = tbl_lookup(table, &key, flow_hash(&key), flow_cmp);
1128         if (!flow_node)
1129                 return -ENOENT;
1130         flow = flow_cast(flow_node);
1131
1132         reply = odp_flow_cmd_alloc_info(flow);
1133         if (!reply)
1134                 return -ENOMEM;
1135
1136         err = tbl_remove(table, flow_node);
1137         if (err) {
1138                 kfree_skb(reply);
1139                 return err;
1140         }
1141
1142         err = odp_flow_cmd_fill_info(flow, dp, reply, info->snd_pid,
1143                                      info->snd_seq, 0, ODP_FLOW_CMD_DEL);
1144         BUG_ON(err < 0);
1145
1146         flow_deferred_free(flow);
1147
1148         genl_notify(reply, genl_info_net(info), info->snd_pid,
1149                     dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL);
1150         return 0;
1151 }
1152
1153 static int odp_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1154 {
1155         struct odp_header *odp_header = genlmsg_data(nlmsg_data(cb->nlh));
1156         struct datapath *dp;
1157
1158         dp = get_dp(odp_header->dp_ifindex);
1159         if (!dp)
1160                 return -ENODEV;
1161
1162         for (;;) {
1163                 struct tbl_node *flow_node;
1164                 struct sw_flow *flow;
1165                 u32 bucket, obj;
1166
1167                 bucket = cb->args[0];
1168                 obj = cb->args[1];
1169                 flow_node = tbl_next(get_table_protected(dp), &bucket, &obj);
1170                 if (!flow_node)
1171                         break;
1172
1173                 flow = flow_cast(flow_node);
1174                 if (odp_flow_cmd_fill_info(flow, dp, skb, NETLINK_CB(cb->skb).pid,
1175                                            cb->nlh->nlmsg_seq, NLM_F_MULTI,
1176                                            ODP_FLOW_CMD_NEW) < 0)
1177                         break;
1178
1179                 cb->args[0] = bucket;
1180                 cb->args[1] = obj;
1181         }
1182         return skb->len;
1183 }
1184
1185 static struct genl_ops dp_flow_genl_ops[] = {
1186         { .cmd = ODP_FLOW_CMD_NEW,
1187           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1188           .policy = flow_policy,
1189           .doit = odp_flow_cmd_new_or_set
1190         },
1191         { .cmd = ODP_FLOW_CMD_DEL,
1192           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1193           .policy = flow_policy,
1194           .doit = odp_flow_cmd_del
1195         },
1196         { .cmd = ODP_FLOW_CMD_GET,
1197           .flags = 0,               /* OK for unprivileged users. */
1198           .policy = flow_policy,
1199           .doit = odp_flow_cmd_get,
1200           .dumpit = odp_flow_cmd_dump
1201         },
1202         { .cmd = ODP_FLOW_CMD_SET,
1203           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1204           .policy = flow_policy,
1205           .doit = odp_flow_cmd_new_or_set,
1206         },
1207 };
1208
1209 static const struct nla_policy datapath_policy[ODP_DP_ATTR_MAX + 1] = {
1210 #ifdef HAVE_NLA_NUL_STRING
1211         [ODP_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1212 #endif
1213         [ODP_DP_ATTR_IPV4_FRAGS] = { .type = NLA_U32 },
1214         [ODP_DP_ATTR_SAMPLING] = { .type = NLA_U32 },
1215 };
1216
1217 static struct genl_family dp_datapath_genl_family = {
1218         .id = GENL_ID_GENERATE,
1219         .hdrsize = sizeof(struct odp_header),
1220         .name = ODP_DATAPATH_FAMILY,
1221         .version = 1,
1222         .maxattr = ODP_DP_ATTR_MAX
1223 };
1224
1225 static struct genl_multicast_group dp_datapath_multicast_group = {
1226         .name = ODP_DATAPATH_MCGROUP
1227 };
1228
1229 static int odp_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
1230                                 u32 pid, u32 seq, u32 flags, u8 cmd)
1231 {
1232         struct odp_header *odp_header;
1233         struct nlattr *nla;
1234         int err;
1235
1236         odp_header = genlmsg_put(skb, pid, seq, &dp_datapath_genl_family,
1237                                    flags, cmd);
1238         if (!odp_header)
1239                 goto error;
1240
1241         odp_header->dp_ifindex = dp->dp_ifindex;
1242
1243         rcu_read_lock();
1244         err = nla_put_string(skb, ODP_DP_ATTR_NAME, dp_name(dp));
1245         rcu_read_unlock();
1246         if (err)
1247                 goto nla_put_failure;
1248
1249         nla = nla_reserve(skb, ODP_DP_ATTR_STATS, sizeof(struct odp_stats));
1250         if (!nla)
1251                 goto nla_put_failure;
1252         get_dp_stats(dp, nla_data(nla));
1253
1254         NLA_PUT_U32(skb, ODP_DP_ATTR_IPV4_FRAGS,
1255                     dp->drop_frags ? ODP_DP_FRAG_DROP : ODP_DP_FRAG_ZERO);
1256
1257         if (dp->sflow_probability)
1258                 NLA_PUT_U32(skb, ODP_DP_ATTR_SAMPLING, dp->sflow_probability);
1259
1260         nla = nla_nest_start(skb, ODP_DP_ATTR_MCGROUPS);
1261         if (!nla)
1262                 goto nla_put_failure;
1263         NLA_PUT_U32(skb, ODP_PACKET_CMD_MISS, packet_mc_group(dp, ODP_PACKET_CMD_MISS));
1264         NLA_PUT_U32(skb, ODP_PACKET_CMD_ACTION, packet_mc_group(dp, ODP_PACKET_CMD_ACTION));
1265         NLA_PUT_U32(skb, ODP_PACKET_CMD_SAMPLE, packet_mc_group(dp, ODP_PACKET_CMD_SAMPLE));
1266         nla_nest_end(skb, nla);
1267
1268         return genlmsg_end(skb, odp_header);
1269
1270 nla_put_failure:
1271         genlmsg_cancel(skb, odp_header);
1272 error:
1273         return -EMSGSIZE;
1274 }
1275
1276 static struct sk_buff *odp_dp_cmd_build_info(struct datapath *dp, u32 pid,
1277                                              u32 seq, u8 cmd)
1278 {
1279         struct sk_buff *skb;
1280         int retval;
1281
1282         skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1283         if (!skb)
1284                 return ERR_PTR(-ENOMEM);
1285
1286         retval = odp_dp_cmd_fill_info(dp, skb, pid, seq, 0, cmd);
1287         if (retval < 0) {
1288                 kfree_skb(skb);
1289                 return ERR_PTR(retval);
1290         }
1291         return skb;
1292 }
1293
1294 static int odp_dp_cmd_validate(struct nlattr *a[ODP_DP_ATTR_MAX + 1])
1295 {
1296         if (a[ODP_DP_ATTR_IPV4_FRAGS]) {
1297                 u32 frags = nla_get_u32(a[ODP_DP_ATTR_IPV4_FRAGS]);
1298
1299                 if (frags != ODP_DP_FRAG_ZERO && frags != ODP_DP_FRAG_DROP)
1300                         return -EINVAL;
1301         }
1302
1303         return CHECK_NUL_STRING(a[ODP_DP_ATTR_NAME], IFNAMSIZ - 1);
1304 }
1305
1306 /* Called with genl_mutex and optionally with RTNL lock also. */
1307 static struct datapath *lookup_datapath(struct odp_header *odp_header, struct nlattr *a[ODP_DP_ATTR_MAX + 1])
1308 {
1309         struct datapath *dp;
1310
1311         if (!a[ODP_DP_ATTR_NAME])
1312                 dp = get_dp(odp_header->dp_ifindex);
1313         else {
1314                 struct vport *vport;
1315
1316                 rcu_read_lock();
1317                 vport = vport_locate(nla_data(a[ODP_DP_ATTR_NAME]));
1318                 dp = vport && vport->port_no == ODPP_LOCAL ? vport->dp : NULL;
1319                 rcu_read_unlock();
1320         }
1321         return dp ? dp : ERR_PTR(-ENODEV);
1322 }
1323
1324 /* Called with genl_mutex. */
1325 static void change_datapath(struct datapath *dp, struct nlattr *a[ODP_DP_ATTR_MAX + 1])
1326 {
1327         if (a[ODP_DP_ATTR_IPV4_FRAGS])
1328                 dp->drop_frags = nla_get_u32(a[ODP_DP_ATTR_IPV4_FRAGS]) == ODP_DP_FRAG_DROP;
1329         if (a[ODP_DP_ATTR_SAMPLING])
1330                 dp->sflow_probability = nla_get_u32(a[ODP_DP_ATTR_SAMPLING]);
1331 }
1332
1333 static int odp_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1334 {
1335         struct nlattr **a = info->attrs;
1336         struct vport_parms parms;
1337         struct sk_buff *reply;
1338         struct datapath *dp;
1339         struct vport *vport;
1340         int err;
1341
1342         err = -EINVAL;
1343         if (!a[ODP_DP_ATTR_NAME])
1344                 goto err;
1345
1346         err = odp_dp_cmd_validate(a);
1347         if (err)
1348                 goto err;
1349
1350         rtnl_lock();
1351         err = -ENODEV;
1352         if (!try_module_get(THIS_MODULE))
1353                 goto err_unlock_rtnl;
1354
1355         err = -ENOMEM;
1356         dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1357         if (dp == NULL)
1358                 goto err_put_module;
1359         INIT_LIST_HEAD(&dp->port_list);
1360
1361         /* Initialize kobject for bridge.  This will be added as
1362          * /sys/class/net/<devname>/brif later, if sysfs is enabled. */
1363         dp->ifobj.kset = NULL;
1364         kobject_init(&dp->ifobj, &dp_ktype);
1365
1366         /* Allocate table. */
1367         err = -ENOMEM;
1368         rcu_assign_pointer(dp->table, tbl_create(TBL_MIN_BUCKETS));
1369         if (!dp->table)
1370                 goto err_free_dp;
1371
1372         /* Set up our datapath device. */
1373         parms.name = nla_data(a[ODP_DP_ATTR_NAME]);
1374         parms.type = ODP_VPORT_TYPE_INTERNAL;
1375         parms.options = NULL;
1376         parms.dp = dp;
1377         parms.port_no = ODPP_LOCAL;
1378         vport = new_vport(&parms);
1379         if (IS_ERR(vport)) {
1380                 err = PTR_ERR(vport);
1381                 if (err == -EBUSY)
1382                         err = -EEXIST;
1383
1384                 goto err_destroy_table;
1385         }
1386         dp->dp_ifindex = vport_get_ifindex(vport);
1387
1388         dp->drop_frags = 0;
1389         dp->stats_percpu = alloc_percpu(struct dp_stats_percpu);
1390         if (!dp->stats_percpu) {
1391                 err = -ENOMEM;
1392                 goto err_destroy_local_port;
1393         }
1394
1395         change_datapath(dp, a);
1396
1397         reply = odp_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, ODP_DP_CMD_NEW);
1398         err = PTR_ERR(reply);
1399         if (IS_ERR(reply))
1400                 goto err_destroy_local_port;
1401
1402         list_add_tail(&dp->list_node, &dps);
1403         dp_sysfs_add_dp(dp);
1404
1405         rtnl_unlock();
1406
1407         genl_notify(reply, genl_info_net(info), info->snd_pid,
1408                     dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL);
1409         return 0;
1410
1411 err_destroy_local_port:
1412         dp_detach_port(get_vport_protected(dp, ODPP_LOCAL));
1413 err_destroy_table:
1414         tbl_destroy(get_table_protected(dp), NULL);
1415 err_free_dp:
1416         kfree(dp);
1417 err_put_module:
1418         module_put(THIS_MODULE);
1419 err_unlock_rtnl:
1420         rtnl_unlock();
1421 err:
1422         return err;
1423 }
1424
1425 static int odp_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1426 {
1427         struct vport *vport, *next_vport;
1428         struct sk_buff *reply;
1429         struct datapath *dp;
1430         int err;
1431
1432         err = odp_dp_cmd_validate(info->attrs);
1433         if (err)
1434                 goto exit;
1435
1436         rtnl_lock();
1437         dp = lookup_datapath(info->userhdr, info->attrs);
1438         err = PTR_ERR(dp);
1439         if (IS_ERR(dp))
1440                 goto exit_unlock;
1441
1442         reply = odp_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, ODP_DP_CMD_DEL);
1443         err = PTR_ERR(reply);
1444         if (IS_ERR(reply))
1445                 goto exit_unlock;
1446
1447         list_for_each_entry_safe (vport, next_vport, &dp->port_list, node)
1448                 if (vport->port_no != ODPP_LOCAL)
1449                         dp_detach_port(vport);
1450
1451         dp_sysfs_del_dp(dp);
1452         list_del(&dp->list_node);
1453         dp_detach_port(get_vport_protected(dp, ODPP_LOCAL));
1454
1455         /* rtnl_unlock() will wait until all the references to devices that
1456          * are pending unregistration have been dropped.  We do it here to
1457          * ensure that any internal devices (which contain DP pointers) are
1458          * fully destroyed before freeing the datapath.
1459          */
1460         rtnl_unlock();
1461
1462         call_rcu(&dp->rcu, destroy_dp_rcu);
1463         module_put(THIS_MODULE);
1464
1465         genl_notify(reply, genl_info_net(info), info->snd_pid,
1466                     dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL);
1467
1468         return 0;
1469
1470 exit_unlock:
1471         rtnl_unlock();
1472 exit:
1473         return err;
1474 }
1475
1476 static int odp_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1477 {
1478         struct sk_buff *reply;
1479         struct datapath *dp;
1480         int err;
1481
1482         err = odp_dp_cmd_validate(info->attrs);
1483         if (err)
1484                 return err;
1485
1486         dp = lookup_datapath(info->userhdr, info->attrs);
1487         if (IS_ERR(dp))
1488                 return PTR_ERR(dp);
1489
1490         change_datapath(dp, info->attrs);
1491
1492         reply = odp_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, ODP_DP_CMD_NEW);
1493         if (IS_ERR(reply)) {
1494                 err = PTR_ERR(reply);
1495                 netlink_set_err(INIT_NET_GENL_SOCK, 0,
1496                                 dp_datapath_multicast_group.id, err);
1497                 return 0;
1498         }
1499
1500         genl_notify(reply, genl_info_net(info), info->snd_pid,
1501                     dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL);
1502         return 0;
1503 }
1504
1505 static int odp_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1506 {
1507         struct sk_buff *reply;
1508         struct datapath *dp;
1509         int err;
1510
1511         err = odp_dp_cmd_validate(info->attrs);
1512         if (err)
1513                 return err;
1514
1515         dp = lookup_datapath(info->userhdr, info->attrs);
1516         if (IS_ERR(dp))
1517                 return PTR_ERR(dp);
1518
1519         reply = odp_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, ODP_DP_CMD_NEW);
1520         if (IS_ERR(reply))
1521                 return PTR_ERR(reply);
1522
1523         return genlmsg_reply(reply, info);
1524 }
1525
1526 static int odp_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1527 {
1528         struct datapath *dp;
1529         int skip = cb->args[0];
1530         int i = 0;
1531
1532         list_for_each_entry (dp, &dps, list_node) {
1533                 if (i < skip)
1534                         continue;
1535                 if (odp_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).pid,
1536                                          cb->nlh->nlmsg_seq, NLM_F_MULTI,
1537                                          ODP_DP_CMD_NEW) < 0)
1538                         break;
1539                 i++;
1540         }
1541
1542         cb->args[0] = i;
1543
1544         return skb->len;
1545 }
1546
1547 static struct genl_ops dp_datapath_genl_ops[] = {
1548         { .cmd = ODP_DP_CMD_NEW,
1549           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1550           .policy = datapath_policy,
1551           .doit = odp_dp_cmd_new
1552         },
1553         { .cmd = ODP_DP_CMD_DEL,
1554           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1555           .policy = datapath_policy,
1556           .doit = odp_dp_cmd_del
1557         },
1558         { .cmd = ODP_DP_CMD_GET,
1559           .flags = 0,               /* OK for unprivileged users. */
1560           .policy = datapath_policy,
1561           .doit = odp_dp_cmd_get,
1562           .dumpit = odp_dp_cmd_dump
1563         },
1564         { .cmd = ODP_DP_CMD_SET,
1565           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1566           .policy = datapath_policy,
1567           .doit = odp_dp_cmd_set,
1568         },
1569 };
1570
1571 static const struct nla_policy vport_policy[ODP_VPORT_ATTR_MAX + 1] = {
1572 #ifdef HAVE_NLA_NUL_STRING
1573         [ODP_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1574         [ODP_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
1575         [ODP_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
1576         [ODP_VPORT_ATTR_STATS] = { .len = sizeof(struct rtnl_link_stats64) },
1577         [ODP_VPORT_ATTR_ADDRESS] = { .len = ETH_ALEN },
1578 #else
1579         [ODP_VPORT_ATTR_STATS] = { .minlen = sizeof(struct rtnl_link_stats64) },
1580         [ODP_VPORT_ATTR_ADDRESS] = { .minlen = ETH_ALEN },
1581 #endif
1582         [ODP_VPORT_ATTR_MTU] = { .type = NLA_U32 },
1583         [ODP_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
1584 };
1585
1586 static struct genl_family dp_vport_genl_family = {
1587         .id = GENL_ID_GENERATE,
1588         .hdrsize = sizeof(struct odp_header),
1589         .name = ODP_VPORT_FAMILY,
1590         .version = 1,
1591         .maxattr = ODP_VPORT_ATTR_MAX
1592 };
1593
1594 static struct genl_multicast_group dp_vport_multicast_group = {
1595         .name = ODP_VPORT_MCGROUP
1596 };
1597
1598 /* Called with RTNL lock or RCU read lock. */
1599 static int odp_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
1600                                    u32 pid, u32 seq, u32 flags, u8 cmd)
1601 {
1602         struct odp_header *odp_header;
1603         struct nlattr *nla;
1604         int ifindex, iflink;
1605         int mtu;
1606         int err;
1607
1608         odp_header = genlmsg_put(skb, pid, seq, &dp_vport_genl_family,
1609                                  flags, cmd);
1610         if (!odp_header)
1611                 return -EMSGSIZE;
1612
1613         odp_header->dp_ifindex = vport->dp->dp_ifindex;
1614
1615         NLA_PUT_U32(skb, ODP_VPORT_ATTR_PORT_NO, vport->port_no);
1616         NLA_PUT_U32(skb, ODP_VPORT_ATTR_TYPE, vport_get_type(vport));
1617         NLA_PUT_STRING(skb, ODP_VPORT_ATTR_NAME, vport_get_name(vport));
1618
1619         nla = nla_reserve(skb, ODP_VPORT_ATTR_STATS, sizeof(struct rtnl_link_stats64));
1620         if (!nla)
1621                 goto nla_put_failure;
1622         if (vport_get_stats(vport, nla_data(nla)))
1623                 __skb_trim(skb, skb->len - nla->nla_len);
1624
1625         NLA_PUT(skb, ODP_VPORT_ATTR_ADDRESS, ETH_ALEN, vport_get_addr(vport));
1626
1627         mtu = vport_get_mtu(vport);
1628         if (mtu)
1629                 NLA_PUT_U32(skb, ODP_VPORT_ATTR_MTU, mtu);
1630
1631         err = vport_get_options(vport, skb);
1632         if (err == -EMSGSIZE)
1633                 goto error;
1634
1635         ifindex = vport_get_ifindex(vport);
1636         if (ifindex > 0)
1637                 NLA_PUT_U32(skb, ODP_VPORT_ATTR_IFINDEX, ifindex);
1638
1639         iflink = vport_get_iflink(vport);
1640         if (iflink > 0)
1641                 NLA_PUT_U32(skb, ODP_VPORT_ATTR_IFLINK, iflink);
1642
1643         return genlmsg_end(skb, odp_header);
1644
1645 nla_put_failure:
1646         err = -EMSGSIZE;
1647 error:
1648         genlmsg_cancel(skb, odp_header);
1649         return err;
1650 }
1651
1652 /* Called with RTNL lock or RCU read lock. */
1653 static struct sk_buff *odp_vport_cmd_build_info(struct vport *vport, u32 pid,
1654                                                 u32 seq, u8 cmd)
1655 {
1656         struct sk_buff *skb;
1657         int retval;
1658
1659         skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
1660         if (!skb)
1661                 return ERR_PTR(-ENOMEM);
1662
1663         retval = odp_vport_cmd_fill_info(vport, skb, pid, seq, 0, cmd);
1664         if (retval < 0) {
1665                 kfree_skb(skb);
1666                 return ERR_PTR(retval);
1667         }
1668         return skb;
1669 }
1670
1671 static int odp_vport_cmd_validate(struct nlattr *a[ODP_VPORT_ATTR_MAX + 1])
1672 {
1673         return CHECK_NUL_STRING(a[ODP_VPORT_ATTR_NAME], IFNAMSIZ - 1);
1674 }
1675
1676 /* Called with RTNL lock or RCU read lock. */
1677 static struct vport *lookup_vport(struct odp_header *odp_header,
1678                                   struct nlattr *a[ODP_VPORT_ATTR_MAX + 1])
1679 {
1680         struct datapath *dp;
1681         struct vport *vport;
1682
1683         if (a[ODP_VPORT_ATTR_NAME]) {
1684                 vport = vport_locate(nla_data(a[ODP_VPORT_ATTR_NAME]));
1685                 if (!vport)
1686                         return ERR_PTR(-ENODEV);
1687                 return vport;
1688         } else if (a[ODP_VPORT_ATTR_PORT_NO]) {
1689                 u32 port_no = nla_get_u32(a[ODP_VPORT_ATTR_PORT_NO]);
1690
1691                 if (port_no >= DP_MAX_PORTS)
1692                         return ERR_PTR(-EFBIG);
1693
1694                 dp = get_dp(odp_header->dp_ifindex);
1695                 if (!dp)
1696                         return ERR_PTR(-ENODEV);
1697
1698                 vport = get_vport_protected(dp, port_no);
1699                 if (!vport)
1700                         return ERR_PTR(-ENOENT);
1701                 return vport;
1702         } else
1703                 return ERR_PTR(-EINVAL);
1704 }
1705
1706 /* Called with RTNL lock. */
1707 static int change_vport(struct vport *vport, struct nlattr *a[ODP_VPORT_ATTR_MAX + 1])
1708 {
1709         int err = 0;
1710         if (a[ODP_VPORT_ATTR_STATS])
1711                 err = vport_set_stats(vport, nla_data(a[ODP_VPORT_ATTR_STATS]));
1712         if (!err && a[ODP_VPORT_ATTR_ADDRESS])
1713                 err = vport_set_addr(vport, nla_data(a[ODP_VPORT_ATTR_ADDRESS]));
1714         if (!err && a[ODP_VPORT_ATTR_MTU])
1715                 err = vport_set_mtu(vport, nla_get_u32(a[ODP_VPORT_ATTR_MTU]));
1716         return err;
1717 }
1718
1719 static int odp_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1720 {
1721         struct nlattr **a = info->attrs;
1722         struct odp_header *odp_header = info->userhdr;
1723         struct vport_parms parms;
1724         struct sk_buff *reply;
1725         struct vport *vport;
1726         struct datapath *dp;
1727         u32 port_no;
1728         int err;
1729
1730         err = -EINVAL;
1731         if (!a[ODP_VPORT_ATTR_NAME] || !a[ODP_VPORT_ATTR_TYPE])
1732                 goto exit;
1733
1734         err = odp_vport_cmd_validate(a);
1735         if (err)
1736                 goto exit;
1737
1738         rtnl_lock();
1739         dp = get_dp(odp_header->dp_ifindex);
1740         err = -ENODEV;
1741         if (!dp)
1742                 goto exit_unlock;
1743
1744         if (a[ODP_VPORT_ATTR_PORT_NO]) {
1745                 port_no = nla_get_u32(a[ODP_VPORT_ATTR_PORT_NO]);
1746
1747                 err = -EFBIG;
1748                 if (port_no >= DP_MAX_PORTS)
1749                         goto exit_unlock;
1750
1751                 vport = get_vport_protected(dp, port_no);
1752                 err = -EBUSY;
1753                 if (vport)
1754                         goto exit_unlock;
1755         } else {
1756                 for (port_no = 1; ; port_no++) {
1757                         if (port_no >= DP_MAX_PORTS) {
1758                                 err = -EFBIG;
1759                                 goto exit_unlock;
1760                         }
1761                         vport = get_vport_protected(dp, port_no);
1762                         if (!vport)
1763                                 break;
1764                 }
1765         }
1766
1767         parms.name = nla_data(a[ODP_VPORT_ATTR_NAME]);
1768         parms.type = nla_get_u32(a[ODP_VPORT_ATTR_TYPE]);
1769         parms.options = a[ODP_VPORT_ATTR_OPTIONS];
1770         parms.dp = dp;
1771         parms.port_no = port_no;
1772
1773         vport = new_vport(&parms);
1774         err = PTR_ERR(vport);
1775         if (IS_ERR(vport))
1776                 goto exit_unlock;
1777
1778         set_internal_devs_mtu(dp);
1779         dp_sysfs_add_if(vport);
1780
1781         err = change_vport(vport, a);
1782         if (!err) {
1783                 reply = odp_vport_cmd_build_info(vport, info->snd_pid,
1784                                                  info->snd_seq, ODP_VPORT_CMD_NEW);
1785                 if (IS_ERR(reply))
1786                         err = PTR_ERR(reply);
1787         }
1788         if (err) {
1789                 dp_detach_port(vport);
1790                 goto exit_unlock;
1791         }
1792         genl_notify(reply, genl_info_net(info), info->snd_pid,
1793                     dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1794
1795
1796 exit_unlock:
1797         rtnl_unlock();
1798 exit:
1799         return err;
1800 }
1801
1802 static int odp_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
1803 {
1804         struct nlattr **a = info->attrs;
1805         struct sk_buff *reply;
1806         struct vport *vport;
1807         int err;
1808
1809         err = odp_vport_cmd_validate(a);
1810         if (err)
1811                 goto exit;
1812
1813         rtnl_lock();
1814         vport = lookup_vport(info->userhdr, a);
1815         err = PTR_ERR(vport);
1816         if (IS_ERR(vport))
1817                 goto exit_unlock;
1818
1819         err = 0;
1820         if (a[ODP_VPORT_ATTR_OPTIONS])
1821                 err = vport_set_options(vport, a[ODP_VPORT_ATTR_OPTIONS]);
1822         if (!err)
1823                 err = change_vport(vport, a);
1824
1825         reply = odp_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
1826                                          ODP_VPORT_CMD_NEW);
1827         if (IS_ERR(reply)) {
1828                 err = PTR_ERR(reply);
1829                 netlink_set_err(INIT_NET_GENL_SOCK, 0,
1830                                 dp_vport_multicast_group.id, err);
1831                 return 0;
1832         }
1833
1834         genl_notify(reply, genl_info_net(info), info->snd_pid,
1835                     dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1836
1837 exit_unlock:
1838         rtnl_unlock();
1839 exit:
1840         return err;
1841 }
1842
1843 static int odp_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
1844 {
1845         struct nlattr **a = info->attrs;
1846         struct sk_buff *reply;
1847         struct vport *vport;
1848         int err;
1849
1850         err = odp_vport_cmd_validate(a);
1851         if (err)
1852                 goto exit;
1853
1854         rtnl_lock();
1855         vport = lookup_vport(info->userhdr, a);
1856         err = PTR_ERR(vport);
1857         if (IS_ERR(vport))
1858                 goto exit_unlock;
1859
1860         if (vport->port_no == ODPP_LOCAL) {
1861                 err = -EINVAL;
1862                 goto exit_unlock;
1863         }
1864
1865         reply = odp_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
1866                                          ODP_VPORT_CMD_DEL);
1867         err = PTR_ERR(reply);
1868         if (IS_ERR(reply))
1869                 goto exit_unlock;
1870
1871         err = dp_detach_port(vport);
1872
1873         genl_notify(reply, genl_info_net(info), info->snd_pid,
1874                     dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1875
1876 exit_unlock:
1877         rtnl_unlock();
1878 exit:
1879         return err;
1880 }
1881
1882 static int odp_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
1883 {
1884         struct nlattr **a = info->attrs;
1885         struct odp_header *odp_header = info->userhdr;
1886         struct sk_buff *reply;
1887         struct vport *vport;
1888         int err;
1889
1890         err = odp_vport_cmd_validate(a);
1891         if (err)
1892                 goto exit;
1893
1894         rcu_read_lock();
1895         vport = lookup_vport(odp_header, a);
1896         err = PTR_ERR(vport);
1897         if (IS_ERR(vport))
1898                 goto exit_unlock;
1899
1900         reply = odp_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
1901                                          ODP_VPORT_CMD_NEW);
1902         err = PTR_ERR(reply);
1903         if (IS_ERR(reply))
1904                 goto exit_unlock;
1905
1906         err = genlmsg_reply(reply, info);
1907
1908 exit_unlock:
1909         rcu_read_unlock();
1910 exit:
1911         return err;
1912 }
1913
1914 static int odp_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1915 {
1916         struct odp_header *odp_header = genlmsg_data(nlmsg_data(cb->nlh));
1917         struct datapath *dp;
1918         u32 port_no;
1919         int retval;
1920
1921         dp = get_dp(odp_header->dp_ifindex);
1922         if (!dp)
1923                 return -ENODEV;
1924
1925         rcu_read_lock();
1926         for (port_no = cb->args[0]; port_no < DP_MAX_PORTS; port_no++) {
1927                 struct vport *vport;
1928
1929                 vport = get_vport_protected(dp, port_no);
1930                 if (!vport)
1931                         continue;
1932
1933                 if (odp_vport_cmd_fill_info(vport, skb, NETLINK_CB(cb->skb).pid,
1934                                             cb->nlh->nlmsg_seq, NLM_F_MULTI,
1935                                             ODP_VPORT_CMD_NEW) < 0)
1936                         break;
1937         }
1938         rcu_read_unlock();
1939
1940         cb->args[0] = port_no;
1941         retval = skb->len;
1942
1943         return retval;
1944 }
1945
1946 static struct genl_ops dp_vport_genl_ops[] = {
1947         { .cmd = ODP_VPORT_CMD_NEW,
1948           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1949           .policy = vport_policy,
1950           .doit = odp_vport_cmd_new
1951         },
1952         { .cmd = ODP_VPORT_CMD_DEL,
1953           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1954           .policy = vport_policy,
1955           .doit = odp_vport_cmd_del
1956         },
1957         { .cmd = ODP_VPORT_CMD_GET,
1958           .flags = 0,               /* OK for unprivileged users. */
1959           .policy = vport_policy,
1960           .doit = odp_vport_cmd_get,
1961           .dumpit = odp_vport_cmd_dump
1962         },
1963         { .cmd = ODP_VPORT_CMD_SET,
1964           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1965           .policy = vport_policy,
1966           .doit = odp_vport_cmd_set,
1967         },
1968 };
1969
1970 struct genl_family_and_ops {
1971         struct genl_family *family;
1972         struct genl_ops *ops;
1973         int n_ops;
1974         struct genl_multicast_group *group;
1975 };
1976
1977 static const struct genl_family_and_ops dp_genl_families[] = {
1978         { &dp_datapath_genl_family,
1979           dp_datapath_genl_ops, ARRAY_SIZE(dp_datapath_genl_ops),
1980           &dp_datapath_multicast_group },
1981         { &dp_vport_genl_family,
1982           dp_vport_genl_ops, ARRAY_SIZE(dp_vport_genl_ops),
1983           &dp_vport_multicast_group },
1984         { &dp_flow_genl_family,
1985           dp_flow_genl_ops, ARRAY_SIZE(dp_flow_genl_ops),
1986           &dp_flow_multicast_group },
1987         { &dp_packet_genl_family,
1988           dp_packet_genl_ops, ARRAY_SIZE(dp_packet_genl_ops),
1989           NULL },
1990 };
1991
1992 static void dp_unregister_genl(int n_families)
1993 {
1994         int i;
1995
1996         for (i = 0; i < n_families; i++) {
1997                 genl_unregister_family(dp_genl_families[i].family);
1998         }
1999 }
2000
2001 static int dp_register_genl(void)
2002 {
2003         int n_registered;
2004         int err;
2005         int i;
2006
2007         n_registered = 0;
2008         for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
2009                 const struct genl_family_and_ops *f = &dp_genl_families[i];
2010
2011                 err = genl_register_family_with_ops(f->family, f->ops,
2012                                                     f->n_ops);
2013                 if (err)
2014                         goto error;
2015                 n_registered++;
2016
2017                 if (f->group) {
2018                         err = genl_register_mc_group(f->family, f->group);
2019                         if (err)
2020                                 goto error;
2021                 }
2022         }
2023
2024         err = packet_register_mc_groups();
2025         if (err)
2026                 goto error;
2027         return 0;
2028
2029 error:
2030         dp_unregister_genl(n_registered);
2031         return err;
2032 }
2033
2034 static int __init dp_init(void)
2035 {
2036         struct sk_buff *dummy_skb;
2037         int err;
2038
2039         BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > sizeof(dummy_skb->cb));
2040
2041         printk("Open vSwitch %s, built "__DATE__" "__TIME__"\n", VERSION BUILDNR);
2042
2043         err = flow_init();
2044         if (err)
2045                 goto error;
2046
2047         err = vport_init();
2048         if (err)
2049                 goto error_flow_exit;
2050
2051         err = register_netdevice_notifier(&dp_device_notifier);
2052         if (err)
2053                 goto error_vport_exit;
2054
2055         err = dp_register_genl();
2056         if (err < 0)
2057                 goto error_unreg_notifier;
2058
2059         return 0;
2060
2061 error_unreg_notifier:
2062         unregister_netdevice_notifier(&dp_device_notifier);
2063 error_vport_exit:
2064         vport_exit();
2065 error_flow_exit:
2066         flow_exit();
2067 error:
2068         return err;
2069 }
2070
2071 static void dp_cleanup(void)
2072 {
2073         rcu_barrier();
2074         dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
2075         unregister_netdevice_notifier(&dp_device_notifier);
2076         vport_exit();
2077         flow_exit();
2078 }
2079
2080 module_init(dp_init);
2081 module_exit(dp_cleanup);
2082
2083 MODULE_DESCRIPTION("Open vSwitch switching datapath");
2084 MODULE_LICENSE("GPL");