Prepare Open vSwitch 1.1.2 release.
[sliver-openvswitch.git] / datapath / datapath.c
1 /*
2  * Copyright (c) 2007, 2008, 2009, 2010, 2011 Nicira Networks.
3  * Distributed under the terms of the GNU GPL version 2.
4  *
5  * Significant portions of this file may be copied from parts of the Linux
6  * kernel, by Linus Torvalds and others.
7  */
8
9 /* Functions for managing the dp interface/device. */
10
11 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12
13 #include <linux/init.h>
14 #include <linux/module.h>
15 #include <linux/if_arp.h>
16 #include <linux/if_vlan.h>
17 #include <linux/in.h>
18 #include <linux/ip.h>
19 #include <linux/jhash.h>
20 #include <linux/delay.h>
21 #include <linux/time.h>
22 #include <linux/etherdevice.h>
23 #include <linux/genetlink.h>
24 #include <linux/kernel.h>
25 #include <linux/kthread.h>
26 #include <linux/mutex.h>
27 #include <linux/percpu.h>
28 #include <linux/rcupdate.h>
29 #include <linux/tcp.h>
30 #include <linux/udp.h>
31 #include <linux/version.h>
32 #include <linux/ethtool.h>
33 #include <linux/wait.h>
34 #include <asm/system.h>
35 #include <asm/div64.h>
36 #include <asm/bug.h>
37 #include <linux/highmem.h>
38 #include <linux/netfilter_bridge.h>
39 #include <linux/netfilter_ipv4.h>
40 #include <linux/inetdevice.h>
41 #include <linux/list.h>
42 #include <linux/rculist.h>
43 #include <linux/dmi.h>
44 #include <net/inet_ecn.h>
45 #include <net/genetlink.h>
46
47 #include "openvswitch/datapath-protocol.h"
48 #include "checksum.h"
49 #include "datapath.h"
50 #include "actions.h"
51 #include "flow.h"
52 #include "loop_counter.h"
53 #include "table.h"
54 #include "vlan.h"
55 #include "vport-internal_dev.h"
56
57 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) || \
58     LINUX_VERSION_CODE > KERNEL_VERSION(2,6,38)
59 #error Kernels before 2.6.18 or after 2.6.38 are not supported by this version of Open vSwitch.
60 #endif
61
62 int (*dp_ioctl_hook)(struct net_device *dev, struct ifreq *rq, int cmd);
63 EXPORT_SYMBOL(dp_ioctl_hook);
64
65 /**
66  * DOC: Locking:
67  *
68  * Writes to device state (add/remove datapath, port, set operations on vports,
69  * etc.) are protected by RTNL.
70  *
71  * Writes to other state (flow table modifications, set miscellaneous datapath
72  * parameters such as drop frags, etc.) are protected by genl_mutex.  The RTNL
73  * lock nests inside genl_mutex.
74  *
75  * Reads are protected by RCU.
76  *
77  * There are a few special cases (mostly stats) that have their own
78  * synchronization but they nest under all of above and don't interact with
79  * each other.
80  */
81
82 /* Global list of datapaths to enable dumping them all out.
83  * Protected by genl_mutex.
84  */
85 static LIST_HEAD(dps);
86
87 static struct vport *new_vport(const struct vport_parms *);
88 static int queue_control_packets(struct datapath *, struct sk_buff *,
89                                  const struct dp_upcall_info *);
90
91 /* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */
92 struct datapath *get_dp(int dp_ifindex)
93 {
94         struct datapath *dp = NULL;
95         struct net_device *dev;
96
97         rcu_read_lock();
98         dev = dev_get_by_index_rcu(&init_net, dp_ifindex);
99         if (dev) {
100                 struct vport *vport = internal_dev_get_vport(dev);
101                 if (vport)
102                         dp = vport->dp;
103         }
104         rcu_read_unlock();
105
106         return dp;
107 }
108 EXPORT_SYMBOL_GPL(get_dp);
109
110 /* Must be called with genl_mutex. */
111 static struct tbl *get_table_protected(struct datapath *dp)
112 {
113         return rcu_dereference_protected(dp->table, lockdep_genl_is_held());
114 }
115
116 /* Must be called with rcu_read_lock or RTNL lock. */
117 static struct vport *get_vport_protected(struct datapath *dp, u16 port_no)
118 {
119         return rcu_dereference_rtnl(dp->ports[port_no]);
120 }
121
122 /* Must be called with rcu_read_lock or RTNL lock. */
123 const char *dp_name(const struct datapath *dp)
124 {
125         return vport_get_name(rcu_dereference_rtnl(dp->ports[ODPP_LOCAL]));
126 }
127
128 static inline size_t br_nlmsg_size(void)
129 {
130         return NLMSG_ALIGN(sizeof(struct ifinfomsg))
131                + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
132                + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
133                + nla_total_size(4) /* IFLA_MASTER */
134                + nla_total_size(4) /* IFLA_MTU */
135                + nla_total_size(4) /* IFLA_LINK */
136                + nla_total_size(1); /* IFLA_OPERSTATE */
137 }
138
139 /* Caller must hold RTNL lock. */
140 static int dp_fill_ifinfo(struct sk_buff *skb,
141                           const struct vport *port,
142                           int event, unsigned int flags)
143 {
144         struct datapath *dp = port->dp;
145         int ifindex = vport_get_ifindex(port);
146         int iflink = vport_get_iflink(port);
147         struct ifinfomsg *hdr;
148         struct nlmsghdr *nlh;
149
150         if (ifindex < 0)
151                 return ifindex;
152
153         if (iflink < 0)
154                 return iflink;
155
156         nlh = nlmsg_put(skb, 0, 0, event, sizeof(*hdr), flags);
157         if (nlh == NULL)
158                 return -EMSGSIZE;
159
160         hdr = nlmsg_data(nlh);
161         hdr->ifi_family = AF_BRIDGE;
162         hdr->__ifi_pad = 0;
163         hdr->ifi_type = ARPHRD_ETHER;
164         hdr->ifi_index = ifindex;
165         hdr->ifi_flags = vport_get_flags(port);
166         hdr->ifi_change = 0;
167
168         NLA_PUT_STRING(skb, IFLA_IFNAME, vport_get_name(port));
169         NLA_PUT_U32(skb, IFLA_MASTER,
170                 vport_get_ifindex(get_vport_protected(dp, ODPP_LOCAL)));
171         NLA_PUT_U32(skb, IFLA_MTU, vport_get_mtu(port));
172 #ifdef IFLA_OPERSTATE
173         NLA_PUT_U8(skb, IFLA_OPERSTATE,
174                    vport_is_running(port)
175                         ? vport_get_operstate(port)
176                         : IF_OPER_DOWN);
177 #endif
178
179         NLA_PUT(skb, IFLA_ADDRESS, ETH_ALEN, vport_get_addr(port));
180
181         if (ifindex != iflink)
182                 NLA_PUT_U32(skb, IFLA_LINK,iflink);
183
184         return nlmsg_end(skb, nlh);
185
186 nla_put_failure:
187         nlmsg_cancel(skb, nlh);
188         return -EMSGSIZE;
189 }
190
191 /* Caller must hold RTNL lock. */
192 static void dp_ifinfo_notify(int event, struct vport *port)
193 {
194         struct sk_buff *skb;
195         int err = -ENOBUFS;
196
197         skb = nlmsg_new(br_nlmsg_size(), GFP_KERNEL);
198         if (skb == NULL)
199                 goto errout;
200
201         err = dp_fill_ifinfo(skb, port, event, 0);
202         if (err < 0) {
203                 /* -EMSGSIZE implies BUG in br_nlmsg_size() */
204                 WARN_ON(err == -EMSGSIZE);
205                 kfree_skb(skb);
206                 goto errout;
207         }
208         rtnl_notify(skb, &init_net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
209         return;
210 errout:
211         if (err < 0)
212                 rtnl_set_sk_err(&init_net, RTNLGRP_LINK, err);
213 }
214
215 static void release_dp(struct kobject *kobj)
216 {
217         struct datapath *dp = container_of(kobj, struct datapath, ifobj);
218         kfree(dp);
219 }
220
221 static struct kobj_type dp_ktype = {
222         .release = release_dp
223 };
224
225 static void destroy_dp_rcu(struct rcu_head *rcu)
226 {
227         struct datapath *dp = container_of(rcu, struct datapath, rcu);
228
229         tbl_destroy((struct tbl __force *)dp->table, flow_free_tbl);
230         free_percpu(dp->stats_percpu);
231         kobject_put(&dp->ifobj);
232 }
233
234 /* Called with RTNL lock and genl_lock. */
235 static struct vport *new_vport(const struct vport_parms *parms)
236 {
237         struct vport *vport;
238
239         vport = vport_add(parms);
240         if (!IS_ERR(vport)) {
241                 struct datapath *dp = parms->dp;
242
243                 rcu_assign_pointer(dp->ports[parms->port_no], vport);
244                 list_add(&vport->node, &dp->port_list);
245
246                 dp_ifinfo_notify(RTM_NEWLINK, vport);
247         }
248
249         return vport;
250 }
251
252 /* Called with RTNL lock. */
253 int dp_detach_port(struct vport *p)
254 {
255         ASSERT_RTNL();
256
257         if (p->port_no != ODPP_LOCAL)
258                 dp_sysfs_del_if(p);
259         dp_ifinfo_notify(RTM_DELLINK, p);
260
261         /* First drop references to device. */
262         list_del(&p->node);
263         rcu_assign_pointer(p->dp->ports[p->port_no], NULL);
264
265         /* Then destroy it. */
266         return vport_del(p);
267 }
268
269 /* Must be called with rcu_read_lock. */
270 void dp_process_received_packet(struct vport *p, struct sk_buff *skb)
271 {
272         struct datapath *dp = p->dp;
273         struct dp_stats_percpu *stats;
274         int stats_counter_off;
275         struct sw_flow_actions *acts;
276         struct loop_counter *loop;
277         int error;
278
279         OVS_CB(skb)->vport = p;
280
281         if (!OVS_CB(skb)->flow) {
282                 struct sw_flow_key key;
283                 struct tbl_node *flow_node;
284                 bool is_frag;
285
286                 /* Extract flow from 'skb' into 'key'. */
287                 error = flow_extract(skb, p->port_no, &key, &is_frag);
288                 if (unlikely(error)) {
289                         kfree_skb(skb);
290                         return;
291                 }
292
293                 if (is_frag && dp->drop_frags) {
294                         kfree_skb(skb);
295                         stats_counter_off = offsetof(struct dp_stats_percpu, n_frags);
296                         goto out;
297                 }
298
299                 /* Look up flow. */
300                 flow_node = tbl_lookup(rcu_dereference(dp->table), &key,
301                                         flow_hash(&key), flow_cmp);
302                 if (unlikely(!flow_node)) {
303                         struct dp_upcall_info upcall;
304
305                         upcall.cmd = ODP_PACKET_CMD_MISS;
306                         upcall.key = &key;
307                         upcall.userdata = 0;
308                         upcall.sample_pool = 0;
309                         upcall.actions = NULL;
310                         upcall.actions_len = 0;
311                         dp_upcall(dp, skb, &upcall);
312                         stats_counter_off = offsetof(struct dp_stats_percpu, n_missed);
313                         goto out;
314                 }
315
316                 OVS_CB(skb)->flow = flow_cast(flow_node);
317         }
318
319         stats_counter_off = offsetof(struct dp_stats_percpu, n_hit);
320         flow_used(OVS_CB(skb)->flow, skb);
321
322         acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts);
323
324         /* Check whether we've looped too much. */
325         loop = loop_get_counter();
326         if (unlikely(++loop->count > MAX_LOOPS))
327                 loop->looping = true;
328         if (unlikely(loop->looping)) {
329                 loop_suppress(dp, acts);
330                 kfree_skb(skb);
331                 goto out_loop;
332         }
333
334         /* Execute actions. */
335         execute_actions(dp, skb, &OVS_CB(skb)->flow->key, acts->actions,
336                         acts->actions_len);
337
338         /* Check whether sub-actions looped too much. */
339         if (unlikely(loop->looping))
340                 loop_suppress(dp, acts);
341
342 out_loop:
343         /* Decrement loop counter. */
344         if (!--loop->count)
345                 loop->looping = false;
346         loop_put_counter();
347
348 out:
349         /* Update datapath statistics. */
350         local_bh_disable();
351         stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
352
353         write_seqcount_begin(&stats->seqlock);
354         (*(u64 *)((u8 *)stats + stats_counter_off))++;
355         write_seqcount_end(&stats->seqlock);
356
357         local_bh_enable();
358 }
359
360 static void copy_and_csum_skb(struct sk_buff *skb, void *to)
361 {
362         u16 csum_start, csum_offset;
363         __wsum csum;
364
365         get_skb_csum_pointers(skb, &csum_start, &csum_offset);
366         csum_start -= skb_headroom(skb);
367
368         skb_copy_bits(skb, 0, to, csum_start);
369
370         csum = skb_copy_and_csum_bits(skb, csum_start, to + csum_start,
371                                       skb->len - csum_start, 0);
372         *(__sum16 *)(to + csum_start + csum_offset) = csum_fold(csum);
373 }
374
375 static struct genl_family dp_packet_genl_family = {
376         .id = GENL_ID_GENERATE,
377         .hdrsize = sizeof(struct odp_header),
378         .name = ODP_PACKET_FAMILY,
379         .version = 1,
380         .maxattr = ODP_PACKET_ATTR_MAX
381 };
382
383 /* Generic Netlink multicast groups for upcalls.
384  *
385  * We really want three unique multicast groups per datapath, but we can't even
386  * get one, because genl_register_mc_group() takes genl_lock, which is also
387  * held during Generic Netlink message processing, so trying to acquire
388  * multicast groups during ODP_DP_NEW processing deadlocks.  Instead, we
389  * preallocate a few groups and use them round-robin for datapaths.  Collision
390  * isn't fatal--multicast listeners should check that the family is the one
391  * that they want and discard others--but it wastes time and memory to receive
392  * unwanted messages.
393  */
394 #define PACKET_N_MC_GROUPS 16
395 static struct genl_multicast_group packet_mc_groups[PACKET_N_MC_GROUPS];
396
397 static u32 packet_mc_group(struct datapath *dp, u8 cmd)
398 {
399         u32 idx;
400         BUILD_BUG_ON_NOT_POWER_OF_2(PACKET_N_MC_GROUPS);
401
402         idx = jhash_2words(dp->dp_ifindex, cmd, 0) & (PACKET_N_MC_GROUPS - 1);
403         return packet_mc_groups[idx].id;
404 }
405
406 static int packet_register_mc_groups(void)
407 {
408         int i;
409
410         for (i = 0; i < PACKET_N_MC_GROUPS; i++) {
411                 struct genl_multicast_group *group = &packet_mc_groups[i];
412                 int error;
413
414                 sprintf(group->name, "packet%d", i);
415                 error = genl_register_mc_group(&dp_packet_genl_family, group);
416                 if (error)
417                         return error;
418         }
419         return 0;
420 }
421
422 int dp_upcall(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info)
423 {
424         struct dp_stats_percpu *stats;
425         int err;
426
427         WARN_ON_ONCE(skb_shared(skb));
428
429         forward_ip_summed(skb);
430
431         err = vswitch_skb_checksum_setup(skb);
432         if (err)
433                 goto err_kfree_skb;
434
435         /* Break apart GSO packets into their component pieces.  Otherwise
436          * userspace may try to stuff a 64kB packet into a 1500-byte MTU. */
437         if (skb_is_gso(skb)) {
438                 struct sk_buff *nskb = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM);
439                 
440                 kfree_skb(skb);
441                 skb = nskb;
442                 if (IS_ERR(skb)) {
443                         err = PTR_ERR(skb);
444                         goto err;
445                 }
446         }
447
448         err = queue_control_packets(dp, skb, upcall_info);
449         if (err)
450                 goto err;
451
452         return 0;
453
454 err_kfree_skb:
455         kfree_skb(skb);
456 err:
457         local_bh_disable();
458         stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
459
460         write_seqcount_begin(&stats->seqlock);
461         stats->n_lost++;
462         write_seqcount_end(&stats->seqlock);
463
464         local_bh_enable();
465
466         return err;
467 }
468
469 /* Send each packet in the 'skb' list to userspace for 'dp' as directed by
470  * 'upcall_info'.  There will be only one packet unless we broke up a GSO
471  * packet.
472  */
473 static int queue_control_packets(struct datapath *dp, struct sk_buff *skb,
474                                  const struct dp_upcall_info *upcall_info)
475 {
476         u32 group = packet_mc_group(dp, upcall_info->cmd);
477         struct sk_buff *nskb;
478         int port_no;
479         int err;
480
481         if (OVS_CB(skb)->vport)
482                 port_no = OVS_CB(skb)->vport->port_no;
483         else
484                 port_no = ODPP_LOCAL;
485
486         do {
487                 struct odp_header *upcall;
488                 struct sk_buff *user_skb; /* to be queued to userspace */
489                 struct nlattr *nla;
490                 unsigned int len;
491
492                 nskb = skb->next;
493                 skb->next = NULL;
494
495                 err = vlan_deaccel_tag(skb);
496                 if (unlikely(err))
497                         goto err_kfree_skbs;
498
499                 if (nla_attr_size(skb->len) > USHRT_MAX)
500                         goto err_kfree_skbs;
501
502                 len = sizeof(struct odp_header);
503                 len += nla_total_size(skb->len);
504                 len += nla_total_size(FLOW_BUFSIZE);
505                 if (upcall_info->userdata)
506                         len += nla_total_size(8);
507                 if (upcall_info->sample_pool)
508                         len += nla_total_size(4);
509                 if (upcall_info->actions_len)
510                         len += nla_total_size(upcall_info->actions_len);
511
512                 user_skb = genlmsg_new(len, GFP_ATOMIC);
513                 if (!user_skb) {
514                         netlink_set_err(INIT_NET_GENL_SOCK, 0, group, -ENOBUFS);
515                         goto err_kfree_skbs;
516                 }
517
518                 upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family, 0, upcall_info->cmd);
519                 upcall->dp_ifindex = dp->dp_ifindex;
520
521                 nla = nla_nest_start(user_skb, ODP_PACKET_ATTR_KEY);
522                 flow_to_nlattrs(upcall_info->key, user_skb);
523                 nla_nest_end(user_skb, nla);
524
525                 if (upcall_info->userdata)
526                         nla_put_u64(user_skb, ODP_PACKET_ATTR_USERDATA, upcall_info->userdata);
527                 if (upcall_info->sample_pool)
528                         nla_put_u32(user_skb, ODP_PACKET_ATTR_SAMPLE_POOL, upcall_info->sample_pool);
529                 if (upcall_info->actions_len) {
530                         const struct nlattr *actions = upcall_info->actions;
531                         u32 actions_len = upcall_info->actions_len;
532
533                         nla = nla_nest_start(user_skb, ODP_PACKET_ATTR_ACTIONS);
534                         memcpy(__skb_put(user_skb, actions_len), actions, actions_len);
535                         nla_nest_end(user_skb, nla);
536                 }
537
538                 nla = __nla_reserve(user_skb, ODP_PACKET_ATTR_PACKET, skb->len);
539                 if (skb->ip_summed == CHECKSUM_PARTIAL)
540                         copy_and_csum_skb(skb, nla_data(nla));
541                 else
542                         skb_copy_bits(skb, 0, nla_data(nla), skb->len);
543
544                 err = genlmsg_multicast(user_skb, 0, group, GFP_ATOMIC);
545                 if (err)
546                         goto err_kfree_skbs;
547
548                 kfree_skb(skb);
549                 skb = nskb;
550         } while (skb);
551         return 0;
552
553 err_kfree_skbs:
554         kfree_skb(skb);
555         while ((skb = nskb) != NULL) {
556                 nskb = skb->next;
557                 kfree_skb(skb);
558         }
559         return err;
560 }
561
562 /* Called with genl_mutex. */
563 static int flush_flows(int dp_ifindex)
564 {
565         struct tbl *old_table;
566         struct tbl *new_table;
567         struct datapath *dp;
568
569         dp = get_dp(dp_ifindex);
570         if (!dp)
571                 return -ENODEV;
572
573         old_table = get_table_protected(dp);
574         new_table = tbl_create(TBL_MIN_BUCKETS);
575         if (!new_table)
576                 return -ENOMEM;
577
578         rcu_assign_pointer(dp->table, new_table);
579
580         tbl_deferred_destroy(old_table, flow_free_tbl);
581
582         return 0;
583 }
584
585 static int validate_actions(const struct nlattr *attr)
586 {
587         const struct nlattr *a;
588         int rem;
589
590         nla_for_each_nested(a, attr, rem) {
591                 static const u32 action_lens[ODP_ACTION_ATTR_MAX + 1] = {
592                         [ODP_ACTION_ATTR_OUTPUT] = 4,
593                         [ODP_ACTION_ATTR_CONTROLLER] = 8,
594                         [ODP_ACTION_ATTR_SET_DL_TCI] = 2,
595                         [ODP_ACTION_ATTR_STRIP_VLAN] = 0,
596                         [ODP_ACTION_ATTR_SET_DL_SRC] = ETH_ALEN,
597                         [ODP_ACTION_ATTR_SET_DL_DST] = ETH_ALEN,
598                         [ODP_ACTION_ATTR_SET_NW_SRC] = 4,
599                         [ODP_ACTION_ATTR_SET_NW_DST] = 4,
600                         [ODP_ACTION_ATTR_SET_NW_TOS] = 1,
601                         [ODP_ACTION_ATTR_SET_TP_SRC] = 2,
602                         [ODP_ACTION_ATTR_SET_TP_DST] = 2,
603                         [ODP_ACTION_ATTR_SET_TUNNEL] = 8,
604                         [ODP_ACTION_ATTR_SET_PRIORITY] = 4,
605                         [ODP_ACTION_ATTR_POP_PRIORITY] = 0,
606                         [ODP_ACTION_ATTR_DROP_SPOOFED_ARP] = 0,
607                 };
608                 int type = nla_type(a);
609
610                 if (type > ODP_ACTION_ATTR_MAX || nla_len(a) != action_lens[type])
611                         return -EINVAL;
612
613                 switch (type) {
614                 case ODP_ACTION_ATTR_UNSPEC:
615                         return -EINVAL;
616
617                 case ODP_ACTION_ATTR_CONTROLLER:
618                 case ODP_ACTION_ATTR_STRIP_VLAN:
619                 case ODP_ACTION_ATTR_SET_DL_SRC:
620                 case ODP_ACTION_ATTR_SET_DL_DST:
621                 case ODP_ACTION_ATTR_SET_NW_SRC:
622                 case ODP_ACTION_ATTR_SET_NW_DST:
623                 case ODP_ACTION_ATTR_SET_TP_SRC:
624                 case ODP_ACTION_ATTR_SET_TP_DST:
625                 case ODP_ACTION_ATTR_SET_TUNNEL:
626                 case ODP_ACTION_ATTR_SET_PRIORITY:
627                 case ODP_ACTION_ATTR_POP_PRIORITY:
628                 case ODP_ACTION_ATTR_DROP_SPOOFED_ARP:
629                         /* No validation needed. */
630                         break;
631
632                 case ODP_ACTION_ATTR_OUTPUT:
633                         if (nla_get_u32(a) >= DP_MAX_PORTS)
634                                 return -EINVAL;
635                         break;
636
637                 case ODP_ACTION_ATTR_SET_DL_TCI:
638                         if (nla_get_be16(a) & htons(VLAN_CFI_MASK))
639                                 return -EINVAL;
640                         break;
641
642                 case ODP_ACTION_ATTR_SET_NW_TOS:
643                         if (nla_get_u8(a) & INET_ECN_MASK)
644                                 return -EINVAL;
645                         break;
646
647                 default:
648                         return -EOPNOTSUPP;
649                 }
650         }
651
652         if (rem > 0)
653                 return -EINVAL;
654
655         return 0;
656 }
657 static void clear_stats(struct sw_flow *flow)
658 {
659         flow->used = 0;
660         flow->tcp_flags = 0;
661         flow->packet_count = 0;
662         flow->byte_count = 0;
663 }
664
665 /* Called with genl_mutex. */
666 static int expand_table(struct datapath *dp)
667 {
668         struct tbl *old_table = get_table_protected(dp);
669         struct tbl *new_table;
670
671         new_table = tbl_expand(old_table);
672         if (IS_ERR(new_table))
673                 return PTR_ERR(new_table);
674
675         rcu_assign_pointer(dp->table, new_table);
676         tbl_deferred_destroy(old_table, NULL);
677
678         return 0;
679 }
680
681 static int odp_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
682 {
683         struct odp_header *odp_header = info->userhdr;
684         struct nlattr **a = info->attrs;
685         struct sk_buff *packet;
686         struct sw_flow_key key;
687         struct datapath *dp;
688         struct ethhdr *eth;
689         bool is_frag;
690         int len;
691         int err;
692
693         err = -EINVAL;
694         if (!a[ODP_PACKET_ATTR_PACKET] || !a[ODP_PACKET_ATTR_ACTIONS] ||
695             nla_len(a[ODP_PACKET_ATTR_PACKET]) < ETH_HLEN)
696                 goto err;
697
698         err = validate_actions(a[ODP_PACKET_ATTR_ACTIONS]);
699         if (err)
700                 goto err;
701
702         len = nla_len(a[ODP_PACKET_ATTR_PACKET]);
703         packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
704         err = -ENOMEM;
705         if (!packet)
706                 goto err;
707         skb_reserve(packet, NET_IP_ALIGN);
708
709         memcpy(__skb_put(packet, len), nla_data(a[ODP_PACKET_ATTR_PACKET]), len);
710
711         skb_reset_mac_header(packet);
712         eth = eth_hdr(packet);
713
714         /* Normally, setting the skb 'protocol' field would be handled by a
715          * call to eth_type_trans(), but it assumes there's a sending
716          * device, which we may not have. */
717         if (ntohs(eth->h_proto) >= 1536)
718                 packet->protocol = eth->h_proto;
719         else
720                 packet->protocol = htons(ETH_P_802_2);
721
722         /* Initialize OVS_CB (it came from Netlink so might not be zeroed). */
723         memset(OVS_CB(packet), 0, sizeof(struct ovs_skb_cb));
724
725         err = flow_extract(packet, -1, &key, &is_frag);
726         if (err)
727                 goto err_kfree_skb;
728
729         rcu_read_lock();
730         dp = get_dp(odp_header->dp_ifindex);
731         err = -ENODEV;
732         if (!dp)
733                 goto err_unlock;
734         err = execute_actions(dp, packet, &key,
735                               nla_data(a[ODP_PACKET_ATTR_ACTIONS]),
736                               nla_len(a[ODP_PACKET_ATTR_ACTIONS]));
737         rcu_read_unlock();
738         return err;
739
740 err_unlock:
741         rcu_read_unlock();
742 err_kfree_skb:
743         kfree_skb(packet);
744 err:
745         return err;
746 }
747
748 static const struct nla_policy packet_policy[ODP_PACKET_ATTR_MAX + 1] = {
749         [ODP_PACKET_ATTR_PACKET] = { .type = NLA_UNSPEC },
750         [ODP_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
751 };
752
753 static struct genl_ops dp_packet_genl_ops[] = {
754         { .cmd = ODP_PACKET_CMD_EXECUTE,
755           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
756           .policy = packet_policy,
757           .doit = odp_packet_cmd_execute
758         }
759 };
760
761 static void get_dp_stats(struct datapath *dp, struct odp_stats *stats)
762 {
763         int i;
764
765         stats->n_frags = stats->n_hit = stats->n_missed = stats->n_lost = 0;
766         for_each_possible_cpu(i) {
767                 const struct dp_stats_percpu *percpu_stats;
768                 struct dp_stats_percpu local_stats;
769                 unsigned seqcount;
770
771                 percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
772
773                 do {
774                         seqcount = read_seqcount_begin(&percpu_stats->seqlock);
775                         local_stats = *percpu_stats;
776                 } while (read_seqcount_retry(&percpu_stats->seqlock, seqcount));
777
778                 stats->n_frags += local_stats.n_frags;
779                 stats->n_hit += local_stats.n_hit;
780                 stats->n_missed += local_stats.n_missed;
781                 stats->n_lost += local_stats.n_lost;
782         }
783 }
784
785 /* MTU of the dp pseudo-device: ETH_DATA_LEN or the minimum of the ports.
786  * Called with RTNL lock.
787  */
788 int dp_min_mtu(const struct datapath *dp)
789 {
790         struct vport *p;
791         int mtu = 0;
792
793         ASSERT_RTNL();
794
795         list_for_each_entry (p, &dp->port_list, node) {
796                 int dev_mtu;
797
798                 /* Skip any internal ports, since that's what we're trying to
799                  * set. */
800                 if (is_internal_vport(p))
801                         continue;
802
803                 dev_mtu = vport_get_mtu(p);
804                 if (!dev_mtu)
805                         continue;
806                 if (!mtu || dev_mtu < mtu)
807                         mtu = dev_mtu;
808         }
809
810         return mtu ? mtu : ETH_DATA_LEN;
811 }
812
813 /* Sets the MTU of all datapath devices to the minimum of the ports
814  * Called with RTNL lock.
815  */
816 void set_internal_devs_mtu(const struct datapath *dp)
817 {
818         struct vport *p;
819         int mtu;
820
821         ASSERT_RTNL();
822
823         mtu = dp_min_mtu(dp);
824
825         list_for_each_entry (p, &dp->port_list, node) {
826                 if (is_internal_vport(p))
827                         vport_set_mtu(p, mtu);
828         }
829 }
830
831 static const struct nla_policy flow_policy[ODP_FLOW_ATTR_MAX + 1] = {
832         [ODP_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
833         [ODP_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
834         [ODP_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
835 };
836
837 static struct genl_family dp_flow_genl_family = {
838         .id = GENL_ID_GENERATE,
839         .hdrsize = sizeof(struct odp_header),
840         .name = ODP_FLOW_FAMILY,
841         .version = 1,
842         .maxattr = ODP_FLOW_ATTR_MAX
843 };
844
845 static struct genl_multicast_group dp_flow_multicast_group = {
846         .name = ODP_FLOW_MCGROUP
847 };
848
849 /* Called with genl_lock. */
850 static int odp_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
851                                   struct sk_buff *skb, u32 pid, u32 seq, u32 flags, u8 cmd)
852 {
853         const int skb_orig_len = skb->len;
854         const struct sw_flow_actions *sf_acts;
855         struct odp_flow_stats stats;
856         struct odp_header *odp_header;
857         struct nlattr *nla;
858         unsigned long used;
859         u8 tcp_flags;
860         int err;
861
862         sf_acts = rcu_dereference_protected(flow->sf_acts,
863                                             lockdep_genl_is_held());
864
865         odp_header = genlmsg_put(skb, pid, seq, &dp_flow_genl_family, flags, cmd);
866         if (!odp_header)
867                 return -EMSGSIZE;
868
869         odp_header->dp_ifindex = dp->dp_ifindex;
870
871         nla = nla_nest_start(skb, ODP_FLOW_ATTR_KEY);
872         if (!nla)
873                 goto nla_put_failure;
874         err = flow_to_nlattrs(&flow->key, skb);
875         if (err)
876                 goto error;
877         nla_nest_end(skb, nla);
878
879         spin_lock_bh(&flow->lock);
880         used = flow->used;
881         stats.n_packets = flow->packet_count;
882         stats.n_bytes = flow->byte_count;
883         tcp_flags = flow->tcp_flags;
884         spin_unlock_bh(&flow->lock);
885
886         if (used)
887                 NLA_PUT_U64(skb, ODP_FLOW_ATTR_USED, flow_used_time(used));
888
889         if (stats.n_packets)
890                 NLA_PUT(skb, ODP_FLOW_ATTR_STATS, sizeof(struct odp_flow_stats), &stats);
891
892         if (tcp_flags)
893                 NLA_PUT_U8(skb, ODP_FLOW_ATTR_TCP_FLAGS, tcp_flags);
894
895         /* If ODP_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
896          * this is the first flow to be dumped into 'skb'.  This is unusual for
897          * Netlink but individual action lists can be longer than
898          * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
899          * The userspace caller can always fetch the actions separately if it
900          * really wants them.  (Most userspace callers in fact don't care.)
901          *
902          * This can only fail for dump operations because the skb is always
903          * properly sized for single flows.
904          */
905         err = nla_put(skb, ODP_FLOW_ATTR_ACTIONS, sf_acts->actions_len,
906                       sf_acts->actions);
907         if (err < 0 && skb_orig_len)
908                 goto error;
909
910         return genlmsg_end(skb, odp_header);
911
912 nla_put_failure:
913         err = -EMSGSIZE;
914 error:
915         genlmsg_cancel(skb, odp_header);
916         return err;
917 }
918
919 static struct sk_buff *odp_flow_cmd_alloc_info(struct sw_flow *flow)
920 {
921         const struct sw_flow_actions *sf_acts;
922         int len;
923
924         sf_acts = rcu_dereference_protected(flow->sf_acts,
925                                             lockdep_genl_is_held());
926
927         len = nla_total_size(FLOW_BUFSIZE); /* ODP_FLOW_ATTR_KEY */
928         len += nla_total_size(sf_acts->actions_len); /* ODP_FLOW_ATTR_ACTIONS */
929         len += nla_total_size(sizeof(struct odp_flow_stats)); /* ODP_FLOW_ATTR_STATS */
930         len += nla_total_size(1); /* ODP_FLOW_ATTR_TCP_FLAGS */
931         len += nla_total_size(8); /* ODP_FLOW_ATTR_USED */
932         return genlmsg_new(NLMSG_ALIGN(sizeof(struct odp_header)) + len, GFP_KERNEL);
933 }
934
935 static struct sk_buff *odp_flow_cmd_build_info(struct sw_flow *flow, struct datapath *dp,
936                                                u32 pid, u32 seq, u8 cmd)
937 {
938         struct sk_buff *skb;
939         int retval;
940
941         skb = odp_flow_cmd_alloc_info(flow);
942         if (!skb)
943                 return ERR_PTR(-ENOMEM);
944
945         retval = odp_flow_cmd_fill_info(flow, dp, skb, pid, seq, 0, cmd);
946         BUG_ON(retval < 0);
947         return skb;
948 }
949
950 static int odp_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
951 {
952         struct nlattr **a = info->attrs;
953         struct odp_header *odp_header = info->userhdr;
954         struct tbl_node *flow_node;
955         struct sw_flow_key key;
956         struct sw_flow *flow;
957         struct sk_buff *reply;
958         struct datapath *dp;
959         struct tbl *table;
960         u32 hash;
961         int error;
962
963         /* Extract key. */
964         error = -EINVAL;
965         if (!a[ODP_FLOW_ATTR_KEY])
966                 goto error;
967         error = flow_from_nlattrs(&key, a[ODP_FLOW_ATTR_KEY]);
968         if (error)
969                 goto error;
970
971         /* Validate actions. */
972         if (a[ODP_FLOW_ATTR_ACTIONS]) {
973                 error = validate_actions(a[ODP_FLOW_ATTR_ACTIONS]);
974                 if (error)
975                         goto error;
976         } else if (info->genlhdr->cmd == ODP_FLOW_CMD_NEW) {
977                 error = -EINVAL;
978                 goto error;
979         }
980
981         dp = get_dp(odp_header->dp_ifindex);
982         error = -ENODEV;
983         if (!dp)
984                 goto error;
985
986         hash = flow_hash(&key);
987         table = get_table_protected(dp);
988         flow_node = tbl_lookup(table, &key, hash, flow_cmp);
989         if (!flow_node) {
990                 struct sw_flow_actions *acts;
991
992                 /* Bail out if we're not allowed to create a new flow. */
993                 error = -ENOENT;
994                 if (info->genlhdr->cmd == ODP_FLOW_CMD_SET)
995                         goto error;
996
997                 /* Expand table, if necessary, to make room. */
998                 if (tbl_count(table) >= tbl_n_buckets(table)) {
999                         error = expand_table(dp);
1000                         if (error)
1001                                 goto error;
1002                         table = get_table_protected(dp);
1003                 }
1004
1005                 /* Allocate flow. */
1006                 flow = flow_alloc();
1007                 if (IS_ERR(flow)) {
1008                         error = PTR_ERR(flow);
1009                         goto error;
1010                 }
1011                 flow->key = key;
1012                 clear_stats(flow);
1013
1014                 /* Obtain actions. */
1015                 acts = flow_actions_alloc(a[ODP_FLOW_ATTR_ACTIONS]);
1016                 error = PTR_ERR(acts);
1017                 if (IS_ERR(acts))
1018                         goto error_free_flow;
1019                 rcu_assign_pointer(flow->sf_acts, acts);
1020
1021                 /* Put flow in bucket. */
1022                 error = tbl_insert(table, &flow->tbl_node, hash);
1023                 if (error)
1024                         goto error_free_flow;
1025
1026                 reply = odp_flow_cmd_build_info(flow, dp, info->snd_pid,
1027                                                 info->snd_seq, ODP_FLOW_CMD_NEW);
1028         } else {
1029                 /* We found a matching flow. */
1030                 struct sw_flow_actions *old_acts;
1031
1032                 /* Bail out if we're not allowed to modify an existing flow.
1033                  * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
1034                  * because Generic Netlink treats the latter as a dump
1035                  * request.  We also accept NLM_F_EXCL in case that bug ever
1036                  * gets fixed.
1037                  */
1038                 error = -EEXIST;
1039                 if (info->genlhdr->cmd == ODP_FLOW_CMD_NEW &&
1040                     info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
1041                         goto error;
1042
1043                 /* Update actions. */
1044                 flow = flow_cast(flow_node);
1045                 old_acts = rcu_dereference_protected(flow->sf_acts,
1046                                                      lockdep_genl_is_held());
1047                 if (a[ODP_FLOW_ATTR_ACTIONS] &&
1048                     (old_acts->actions_len != nla_len(a[ODP_FLOW_ATTR_ACTIONS]) ||
1049                      memcmp(old_acts->actions, nla_data(a[ODP_FLOW_ATTR_ACTIONS]),
1050                             old_acts->actions_len))) {
1051                         struct sw_flow_actions *new_acts;
1052
1053                         new_acts = flow_actions_alloc(a[ODP_FLOW_ATTR_ACTIONS]);
1054                         error = PTR_ERR(new_acts);
1055                         if (IS_ERR(new_acts))
1056                                 goto error;
1057
1058                         rcu_assign_pointer(flow->sf_acts, new_acts);
1059                         flow_deferred_free_acts(old_acts);
1060                 }
1061
1062                 reply = odp_flow_cmd_build_info(flow, dp, info->snd_pid,
1063                                                 info->snd_seq, ODP_FLOW_CMD_NEW);
1064
1065                 /* Clear stats. */
1066                 if (a[ODP_FLOW_ATTR_CLEAR]) {
1067                         spin_lock_bh(&flow->lock);
1068                         clear_stats(flow);
1069                         spin_unlock_bh(&flow->lock);
1070                 }
1071         }
1072
1073         if (!IS_ERR(reply))
1074                 genl_notify(reply, genl_info_net(info), info->snd_pid,
1075                             dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL);
1076         else
1077                 netlink_set_err(INIT_NET_GENL_SOCK, 0,
1078                                 dp_flow_multicast_group.id, PTR_ERR(reply));
1079         return 0;
1080
1081 error_free_flow:
1082         flow_put(flow);
1083 error:
1084         return error;
1085 }
1086
1087 static int odp_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1088 {
1089         struct nlattr **a = info->attrs;
1090         struct odp_header *odp_header = info->userhdr;
1091         struct sw_flow_key key;
1092         struct tbl_node *flow_node;
1093         struct sk_buff *reply;
1094         struct sw_flow *flow;
1095         struct datapath *dp;
1096         struct tbl *table;
1097         int err;
1098
1099         if (!a[ODP_FLOW_ATTR_KEY])
1100                 return -EINVAL;
1101         err = flow_from_nlattrs(&key, a[ODP_FLOW_ATTR_KEY]);
1102         if (err)
1103                 return err;
1104
1105         dp = get_dp(odp_header->dp_ifindex);
1106         if (!dp)
1107                 return -ENODEV;
1108
1109         table = get_table_protected(dp);
1110         flow_node = tbl_lookup(table, &key, flow_hash(&key), flow_cmp);
1111         if (!flow_node)
1112                 return -ENOENT;
1113
1114         flow = flow_cast(flow_node);
1115         reply = odp_flow_cmd_build_info(flow, dp, info->snd_pid, info->snd_seq, ODP_FLOW_CMD_NEW);
1116         if (IS_ERR(reply))
1117                 return PTR_ERR(reply);
1118
1119         return genlmsg_reply(reply, info);
1120 }
1121
1122 static int odp_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1123 {
1124         struct nlattr **a = info->attrs;
1125         struct odp_header *odp_header = info->userhdr;
1126         struct sw_flow_key key;
1127         struct tbl_node *flow_node;
1128         struct sk_buff *reply;
1129         struct sw_flow *flow;
1130         struct datapath *dp;
1131         struct tbl *table;
1132         int err;
1133
1134         if (!a[ODP_FLOW_ATTR_KEY])
1135                 return flush_flows(odp_header->dp_ifindex);
1136         err = flow_from_nlattrs(&key, a[ODP_FLOW_ATTR_KEY]);
1137         if (err)
1138                 return err;
1139
1140         dp = get_dp(odp_header->dp_ifindex);
1141         if (!dp)
1142                 return -ENODEV;
1143
1144         table = get_table_protected(dp);
1145         flow_node = tbl_lookup(table, &key, flow_hash(&key), flow_cmp);
1146         if (!flow_node)
1147                 return -ENOENT;
1148         flow = flow_cast(flow_node);
1149
1150         reply = odp_flow_cmd_alloc_info(flow);
1151         if (!reply)
1152                 return -ENOMEM;
1153
1154         err = tbl_remove(table, flow_node);
1155         if (err) {
1156                 kfree_skb(reply);
1157                 return err;
1158         }
1159
1160         err = odp_flow_cmd_fill_info(flow, dp, reply, info->snd_pid,
1161                                      info->snd_seq, 0, ODP_FLOW_CMD_DEL);
1162         BUG_ON(err < 0);
1163
1164         flow_deferred_free(flow);
1165
1166         genl_notify(reply, genl_info_net(info), info->snd_pid,
1167                     dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL);
1168         return 0;
1169 }
1170
1171 static int odp_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1172 {
1173         struct odp_header *odp_header = genlmsg_data(nlmsg_data(cb->nlh));
1174         struct datapath *dp;
1175
1176         dp = get_dp(odp_header->dp_ifindex);
1177         if (!dp)
1178                 return -ENODEV;
1179
1180         for (;;) {
1181                 struct tbl_node *flow_node;
1182                 struct sw_flow *flow;
1183                 u32 bucket, obj;
1184
1185                 bucket = cb->args[0];
1186                 obj = cb->args[1];
1187                 flow_node = tbl_next(get_table_protected(dp), &bucket, &obj);
1188                 if (!flow_node)
1189                         break;
1190
1191                 flow = flow_cast(flow_node);
1192                 if (odp_flow_cmd_fill_info(flow, dp, skb, NETLINK_CB(cb->skb).pid,
1193                                            cb->nlh->nlmsg_seq, NLM_F_MULTI,
1194                                            ODP_FLOW_CMD_NEW) < 0)
1195                         break;
1196
1197                 cb->args[0] = bucket;
1198                 cb->args[1] = obj;
1199         }
1200         return skb->len;
1201 }
1202
1203 static struct genl_ops dp_flow_genl_ops[] = {
1204         { .cmd = ODP_FLOW_CMD_NEW,
1205           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1206           .policy = flow_policy,
1207           .doit = odp_flow_cmd_new_or_set
1208         },
1209         { .cmd = ODP_FLOW_CMD_DEL,
1210           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1211           .policy = flow_policy,
1212           .doit = odp_flow_cmd_del
1213         },
1214         { .cmd = ODP_FLOW_CMD_GET,
1215           .flags = 0,               /* OK for unprivileged users. */
1216           .policy = flow_policy,
1217           .doit = odp_flow_cmd_get,
1218           .dumpit = odp_flow_cmd_dump
1219         },
1220         { .cmd = ODP_FLOW_CMD_SET,
1221           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1222           .policy = flow_policy,
1223           .doit = odp_flow_cmd_new_or_set,
1224         },
1225 };
1226
1227 static const struct nla_policy datapath_policy[ODP_DP_ATTR_MAX + 1] = {
1228 #ifdef HAVE_NLA_NUL_STRING
1229         [ODP_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1230 #endif
1231         [ODP_DP_ATTR_IPV4_FRAGS] = { .type = NLA_U32 },
1232         [ODP_DP_ATTR_SAMPLING] = { .type = NLA_U32 },
1233 };
1234
1235 static struct genl_family dp_datapath_genl_family = {
1236         .id = GENL_ID_GENERATE,
1237         .hdrsize = sizeof(struct odp_header),
1238         .name = ODP_DATAPATH_FAMILY,
1239         .version = 1,
1240         .maxattr = ODP_DP_ATTR_MAX
1241 };
1242
1243 static struct genl_multicast_group dp_datapath_multicast_group = {
1244         .name = ODP_DATAPATH_MCGROUP
1245 };
1246
1247 static int odp_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
1248                                 u32 pid, u32 seq, u32 flags, u8 cmd)
1249 {
1250         struct odp_header *odp_header;
1251         struct nlattr *nla;
1252         int err;
1253
1254         odp_header = genlmsg_put(skb, pid, seq, &dp_datapath_genl_family,
1255                                    flags, cmd);
1256         if (!odp_header)
1257                 goto error;
1258
1259         odp_header->dp_ifindex = dp->dp_ifindex;
1260
1261         rcu_read_lock();
1262         err = nla_put_string(skb, ODP_DP_ATTR_NAME, dp_name(dp));
1263         rcu_read_unlock();
1264         if (err)
1265                 goto nla_put_failure;
1266
1267         nla = nla_reserve(skb, ODP_DP_ATTR_STATS, sizeof(struct odp_stats));
1268         if (!nla)
1269                 goto nla_put_failure;
1270         get_dp_stats(dp, nla_data(nla));
1271
1272         NLA_PUT_U32(skb, ODP_DP_ATTR_IPV4_FRAGS,
1273                     dp->drop_frags ? ODP_DP_FRAG_DROP : ODP_DP_FRAG_ZERO);
1274
1275         if (dp->sflow_probability)
1276                 NLA_PUT_U32(skb, ODP_DP_ATTR_SAMPLING, dp->sflow_probability);
1277
1278         nla = nla_nest_start(skb, ODP_DP_ATTR_MCGROUPS);
1279         if (!nla)
1280                 goto nla_put_failure;
1281         NLA_PUT_U32(skb, ODP_PACKET_CMD_MISS, packet_mc_group(dp, ODP_PACKET_CMD_MISS));
1282         NLA_PUT_U32(skb, ODP_PACKET_CMD_ACTION, packet_mc_group(dp, ODP_PACKET_CMD_ACTION));
1283         NLA_PUT_U32(skb, ODP_PACKET_CMD_SAMPLE, packet_mc_group(dp, ODP_PACKET_CMD_SAMPLE));
1284         nla_nest_end(skb, nla);
1285
1286         return genlmsg_end(skb, odp_header);
1287
1288 nla_put_failure:
1289         genlmsg_cancel(skb, odp_header);
1290 error:
1291         return -EMSGSIZE;
1292 }
1293
1294 static struct sk_buff *odp_dp_cmd_build_info(struct datapath *dp, u32 pid,
1295                                              u32 seq, u8 cmd)
1296 {
1297         struct sk_buff *skb;
1298         int retval;
1299
1300         skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1301         if (!skb)
1302                 return ERR_PTR(-ENOMEM);
1303
1304         retval = odp_dp_cmd_fill_info(dp, skb, pid, seq, 0, cmd);
1305         if (retval < 0) {
1306                 kfree_skb(skb);
1307                 return ERR_PTR(retval);
1308         }
1309         return skb;
1310 }
1311
1312 static int odp_dp_cmd_validate(struct nlattr *a[ODP_DP_ATTR_MAX + 1])
1313 {
1314         if (a[ODP_DP_ATTR_IPV4_FRAGS]) {
1315                 u32 frags = nla_get_u32(a[ODP_DP_ATTR_IPV4_FRAGS]);
1316
1317                 if (frags != ODP_DP_FRAG_ZERO && frags != ODP_DP_FRAG_DROP)
1318                         return -EINVAL;
1319         }
1320
1321         return CHECK_NUL_STRING(a[ODP_DP_ATTR_NAME], IFNAMSIZ - 1);
1322 }
1323
1324 /* Called with genl_mutex and optionally with RTNL lock also. */
1325 static struct datapath *lookup_datapath(struct odp_header *odp_header, struct nlattr *a[ODP_DP_ATTR_MAX + 1])
1326 {
1327         struct datapath *dp;
1328
1329         if (!a[ODP_DP_ATTR_NAME])
1330                 dp = get_dp(odp_header->dp_ifindex);
1331         else {
1332                 struct vport *vport;
1333
1334                 rcu_read_lock();
1335                 vport = vport_locate(nla_data(a[ODP_DP_ATTR_NAME]));
1336                 dp = vport && vport->port_no == ODPP_LOCAL ? vport->dp : NULL;
1337                 rcu_read_unlock();
1338         }
1339         return dp ? dp : ERR_PTR(-ENODEV);
1340 }
1341
1342 /* Called with genl_mutex. */
1343 static void change_datapath(struct datapath *dp, struct nlattr *a[ODP_DP_ATTR_MAX + 1])
1344 {
1345         if (a[ODP_DP_ATTR_IPV4_FRAGS])
1346                 dp->drop_frags = nla_get_u32(a[ODP_DP_ATTR_IPV4_FRAGS]) == ODP_DP_FRAG_DROP;
1347         if (a[ODP_DP_ATTR_SAMPLING])
1348                 dp->sflow_probability = nla_get_u32(a[ODP_DP_ATTR_SAMPLING]);
1349 }
1350
1351 static int odp_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1352 {
1353         struct nlattr **a = info->attrs;
1354         struct vport_parms parms;
1355         struct sk_buff *reply;
1356         struct datapath *dp;
1357         struct vport *vport;
1358         int err;
1359
1360         err = -EINVAL;
1361         if (!a[ODP_DP_ATTR_NAME])
1362                 goto err;
1363
1364         err = odp_dp_cmd_validate(a);
1365         if (err)
1366                 goto err;
1367
1368         rtnl_lock();
1369         err = -ENODEV;
1370         if (!try_module_get(THIS_MODULE))
1371                 goto err_unlock_rtnl;
1372
1373         err = -ENOMEM;
1374         dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1375         if (dp == NULL)
1376                 goto err_put_module;
1377         INIT_LIST_HEAD(&dp->port_list);
1378
1379         /* Initialize kobject for bridge.  This will be added as
1380          * /sys/class/net/<devname>/brif later, if sysfs is enabled. */
1381         dp->ifobj.kset = NULL;
1382         kobject_init(&dp->ifobj, &dp_ktype);
1383
1384         /* Allocate table. */
1385         err = -ENOMEM;
1386         rcu_assign_pointer(dp->table, tbl_create(TBL_MIN_BUCKETS));
1387         if (!dp->table)
1388                 goto err_free_dp;
1389
1390         /* Set up our datapath device. */
1391         parms.name = nla_data(a[ODP_DP_ATTR_NAME]);
1392         parms.type = ODP_VPORT_TYPE_INTERNAL;
1393         parms.options = NULL;
1394         parms.dp = dp;
1395         parms.port_no = ODPP_LOCAL;
1396         vport = new_vport(&parms);
1397         if (IS_ERR(vport)) {
1398                 err = PTR_ERR(vport);
1399                 if (err == -EBUSY)
1400                         err = -EEXIST;
1401
1402                 goto err_destroy_table;
1403         }
1404         dp->dp_ifindex = vport_get_ifindex(vport);
1405
1406         dp->drop_frags = 0;
1407         dp->stats_percpu = alloc_percpu(struct dp_stats_percpu);
1408         if (!dp->stats_percpu) {
1409                 err = -ENOMEM;
1410                 goto err_destroy_local_port;
1411         }
1412
1413         change_datapath(dp, a);
1414
1415         reply = odp_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, ODP_DP_CMD_NEW);
1416         err = PTR_ERR(reply);
1417         if (IS_ERR(reply))
1418                 goto err_destroy_local_port;
1419
1420         list_add_tail(&dp->list_node, &dps);
1421         dp_sysfs_add_dp(dp);
1422
1423         rtnl_unlock();
1424
1425         genl_notify(reply, genl_info_net(info), info->snd_pid,
1426                     dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL);
1427         return 0;
1428
1429 err_destroy_local_port:
1430         dp_detach_port(get_vport_protected(dp, ODPP_LOCAL));
1431 err_destroy_table:
1432         tbl_destroy(get_table_protected(dp), NULL);
1433 err_free_dp:
1434         kfree(dp);
1435 err_put_module:
1436         module_put(THIS_MODULE);
1437 err_unlock_rtnl:
1438         rtnl_unlock();
1439 err:
1440         return err;
1441 }
1442
1443 static int odp_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1444 {
1445         struct vport *vport, *next_vport;
1446         struct sk_buff *reply;
1447         struct datapath *dp;
1448         int err;
1449
1450         err = odp_dp_cmd_validate(info->attrs);
1451         if (err)
1452                 goto exit;
1453
1454         rtnl_lock();
1455         dp = lookup_datapath(info->userhdr, info->attrs);
1456         err = PTR_ERR(dp);
1457         if (IS_ERR(dp))
1458                 goto exit_unlock;
1459
1460         reply = odp_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, ODP_DP_CMD_DEL);
1461         err = PTR_ERR(reply);
1462         if (IS_ERR(reply))
1463                 goto exit_unlock;
1464
1465         list_for_each_entry_safe (vport, next_vport, &dp->port_list, node)
1466                 if (vport->port_no != ODPP_LOCAL)
1467                         dp_detach_port(vport);
1468
1469         dp_sysfs_del_dp(dp);
1470         list_del(&dp->list_node);
1471         dp_detach_port(get_vport_protected(dp, ODPP_LOCAL));
1472
1473         /* rtnl_unlock() will wait until all the references to devices that
1474          * are pending unregistration have been dropped.  We do it here to
1475          * ensure that any internal devices (which contain DP pointers) are
1476          * fully destroyed before freeing the datapath.
1477          */
1478         rtnl_unlock();
1479
1480         call_rcu(&dp->rcu, destroy_dp_rcu);
1481         module_put(THIS_MODULE);
1482
1483         genl_notify(reply, genl_info_net(info), info->snd_pid,
1484                     dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL);
1485
1486         return 0;
1487
1488 exit_unlock:
1489         rtnl_unlock();
1490 exit:
1491         return err;
1492 }
1493
1494 static int odp_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1495 {
1496         struct sk_buff *reply;
1497         struct datapath *dp;
1498         int err;
1499
1500         err = odp_dp_cmd_validate(info->attrs);
1501         if (err)
1502                 return err;
1503
1504         dp = lookup_datapath(info->userhdr, info->attrs);
1505         if (IS_ERR(dp))
1506                 return PTR_ERR(dp);
1507
1508         change_datapath(dp, info->attrs);
1509
1510         reply = odp_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, ODP_DP_CMD_NEW);
1511         if (IS_ERR(reply)) {
1512                 err = PTR_ERR(reply);
1513                 netlink_set_err(INIT_NET_GENL_SOCK, 0,
1514                                 dp_datapath_multicast_group.id, err);
1515                 return 0;
1516         }
1517
1518         genl_notify(reply, genl_info_net(info), info->snd_pid,
1519                     dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL);
1520         return 0;
1521 }
1522
1523 static int odp_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1524 {
1525         struct sk_buff *reply;
1526         struct datapath *dp;
1527         int err;
1528
1529         err = odp_dp_cmd_validate(info->attrs);
1530         if (err)
1531                 return err;
1532
1533         dp = lookup_datapath(info->userhdr, info->attrs);
1534         if (IS_ERR(dp))
1535                 return PTR_ERR(dp);
1536
1537         reply = odp_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, ODP_DP_CMD_NEW);
1538         if (IS_ERR(reply))
1539                 return PTR_ERR(reply);
1540
1541         return genlmsg_reply(reply, info);
1542 }
1543
1544 static int odp_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1545 {
1546         struct datapath *dp;
1547         int skip = cb->args[0];
1548         int i = 0;
1549
1550         list_for_each_entry (dp, &dps, list_node) {
1551                 if (i < skip)
1552                         continue;
1553                 if (odp_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).pid,
1554                                          cb->nlh->nlmsg_seq, NLM_F_MULTI,
1555                                          ODP_DP_CMD_NEW) < 0)
1556                         break;
1557                 i++;
1558         }
1559
1560         cb->args[0] = i;
1561
1562         return skb->len;
1563 }
1564
1565 static struct genl_ops dp_datapath_genl_ops[] = {
1566         { .cmd = ODP_DP_CMD_NEW,
1567           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1568           .policy = datapath_policy,
1569           .doit = odp_dp_cmd_new
1570         },
1571         { .cmd = ODP_DP_CMD_DEL,
1572           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1573           .policy = datapath_policy,
1574           .doit = odp_dp_cmd_del
1575         },
1576         { .cmd = ODP_DP_CMD_GET,
1577           .flags = 0,               /* OK for unprivileged users. */
1578           .policy = datapath_policy,
1579           .doit = odp_dp_cmd_get,
1580           .dumpit = odp_dp_cmd_dump
1581         },
1582         { .cmd = ODP_DP_CMD_SET,
1583           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1584           .policy = datapath_policy,
1585           .doit = odp_dp_cmd_set,
1586         },
1587 };
1588
1589 static const struct nla_policy vport_policy[ODP_VPORT_ATTR_MAX + 1] = {
1590 #ifdef HAVE_NLA_NUL_STRING
1591         [ODP_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1592         [ODP_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
1593         [ODP_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
1594         [ODP_VPORT_ATTR_STATS] = { .len = sizeof(struct rtnl_link_stats64) },
1595         [ODP_VPORT_ATTR_ADDRESS] = { .len = ETH_ALEN },
1596 #else
1597         [ODP_VPORT_ATTR_STATS] = { .minlen = sizeof(struct rtnl_link_stats64) },
1598         [ODP_VPORT_ATTR_ADDRESS] = { .minlen = ETH_ALEN },
1599 #endif
1600         [ODP_VPORT_ATTR_MTU] = { .type = NLA_U32 },
1601         [ODP_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
1602 };
1603
1604 static struct genl_family dp_vport_genl_family = {
1605         .id = GENL_ID_GENERATE,
1606         .hdrsize = sizeof(struct odp_header),
1607         .name = ODP_VPORT_FAMILY,
1608         .version = 1,
1609         .maxattr = ODP_VPORT_ATTR_MAX
1610 };
1611
1612 static struct genl_multicast_group dp_vport_multicast_group = {
1613         .name = ODP_VPORT_MCGROUP
1614 };
1615
1616 /* Called with RTNL lock or RCU read lock. */
1617 static int odp_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
1618                                    u32 pid, u32 seq, u32 flags, u8 cmd)
1619 {
1620         struct odp_header *odp_header;
1621         struct nlattr *nla;
1622         int ifindex, iflink;
1623         int mtu;
1624         int err;
1625
1626         odp_header = genlmsg_put(skb, pid, seq, &dp_vport_genl_family,
1627                                  flags, cmd);
1628         if (!odp_header)
1629                 return -EMSGSIZE;
1630
1631         odp_header->dp_ifindex = vport->dp->dp_ifindex;
1632
1633         NLA_PUT_U32(skb, ODP_VPORT_ATTR_PORT_NO, vport->port_no);
1634         NLA_PUT_U32(skb, ODP_VPORT_ATTR_TYPE, vport_get_type(vport));
1635         NLA_PUT_STRING(skb, ODP_VPORT_ATTR_NAME, vport_get_name(vport));
1636
1637         nla = nla_reserve(skb, ODP_VPORT_ATTR_STATS, sizeof(struct rtnl_link_stats64));
1638         if (!nla)
1639                 goto nla_put_failure;
1640         if (vport_get_stats(vport, nla_data(nla)))
1641                 __skb_trim(skb, skb->len - nla->nla_len);
1642
1643         NLA_PUT(skb, ODP_VPORT_ATTR_ADDRESS, ETH_ALEN, vport_get_addr(vport));
1644
1645         mtu = vport_get_mtu(vport);
1646         if (mtu)
1647                 NLA_PUT_U32(skb, ODP_VPORT_ATTR_MTU, mtu);
1648
1649         err = vport_get_options(vport, skb);
1650         if (err == -EMSGSIZE)
1651                 goto error;
1652
1653         ifindex = vport_get_ifindex(vport);
1654         if (ifindex > 0)
1655                 NLA_PUT_U32(skb, ODP_VPORT_ATTR_IFINDEX, ifindex);
1656
1657         iflink = vport_get_iflink(vport);
1658         if (iflink > 0)
1659                 NLA_PUT_U32(skb, ODP_VPORT_ATTR_IFLINK, iflink);
1660
1661         return genlmsg_end(skb, odp_header);
1662
1663 nla_put_failure:
1664         err = -EMSGSIZE;
1665 error:
1666         genlmsg_cancel(skb, odp_header);
1667         return err;
1668 }
1669
1670 /* Called with RTNL lock or RCU read lock. */
1671 static struct sk_buff *odp_vport_cmd_build_info(struct vport *vport, u32 pid,
1672                                                 u32 seq, u8 cmd)
1673 {
1674         struct sk_buff *skb;
1675         int retval;
1676
1677         skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
1678         if (!skb)
1679                 return ERR_PTR(-ENOMEM);
1680
1681         retval = odp_vport_cmd_fill_info(vport, skb, pid, seq, 0, cmd);
1682         if (retval < 0) {
1683                 kfree_skb(skb);
1684                 return ERR_PTR(retval);
1685         }
1686         return skb;
1687 }
1688
1689 static int odp_vport_cmd_validate(struct nlattr *a[ODP_VPORT_ATTR_MAX + 1])
1690 {
1691         return CHECK_NUL_STRING(a[ODP_VPORT_ATTR_NAME], IFNAMSIZ - 1);
1692 }
1693
1694 /* Called with RTNL lock or RCU read lock. */
1695 static struct vport *lookup_vport(struct odp_header *odp_header,
1696                                   struct nlattr *a[ODP_VPORT_ATTR_MAX + 1])
1697 {
1698         struct datapath *dp;
1699         struct vport *vport;
1700
1701         if (a[ODP_VPORT_ATTR_NAME]) {
1702                 vport = vport_locate(nla_data(a[ODP_VPORT_ATTR_NAME]));
1703                 if (!vport)
1704                         return ERR_PTR(-ENODEV);
1705                 return vport;
1706         } else if (a[ODP_VPORT_ATTR_PORT_NO]) {
1707                 u32 port_no = nla_get_u32(a[ODP_VPORT_ATTR_PORT_NO]);
1708
1709                 if (port_no >= DP_MAX_PORTS)
1710                         return ERR_PTR(-EFBIG);
1711
1712                 dp = get_dp(odp_header->dp_ifindex);
1713                 if (!dp)
1714                         return ERR_PTR(-ENODEV);
1715
1716                 vport = get_vport_protected(dp, port_no);
1717                 if (!vport)
1718                         return ERR_PTR(-ENOENT);
1719                 return vport;
1720         } else
1721                 return ERR_PTR(-EINVAL);
1722 }
1723
1724 /* Called with RTNL lock. */
1725 static int change_vport(struct vport *vport, struct nlattr *a[ODP_VPORT_ATTR_MAX + 1])
1726 {
1727         int err = 0;
1728         if (a[ODP_VPORT_ATTR_STATS])
1729                 err = vport_set_stats(vport, nla_data(a[ODP_VPORT_ATTR_STATS]));
1730         if (!err && a[ODP_VPORT_ATTR_ADDRESS])
1731                 err = vport_set_addr(vport, nla_data(a[ODP_VPORT_ATTR_ADDRESS]));
1732         if (!err && a[ODP_VPORT_ATTR_MTU])
1733                 err = vport_set_mtu(vport, nla_get_u32(a[ODP_VPORT_ATTR_MTU]));
1734         return err;
1735 }
1736
1737 static int odp_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1738 {
1739         struct nlattr **a = info->attrs;
1740         struct odp_header *odp_header = info->userhdr;
1741         struct vport_parms parms;
1742         struct sk_buff *reply;
1743         struct vport *vport;
1744         struct datapath *dp;
1745         u32 port_no;
1746         int err;
1747
1748         err = -EINVAL;
1749         if (!a[ODP_VPORT_ATTR_NAME] || !a[ODP_VPORT_ATTR_TYPE])
1750                 goto exit;
1751
1752         err = odp_vport_cmd_validate(a);
1753         if (err)
1754                 goto exit;
1755
1756         rtnl_lock();
1757         dp = get_dp(odp_header->dp_ifindex);
1758         err = -ENODEV;
1759         if (!dp)
1760                 goto exit_unlock;
1761
1762         if (a[ODP_VPORT_ATTR_PORT_NO]) {
1763                 port_no = nla_get_u32(a[ODP_VPORT_ATTR_PORT_NO]);
1764
1765                 err = -EFBIG;
1766                 if (port_no >= DP_MAX_PORTS)
1767                         goto exit_unlock;
1768
1769                 vport = get_vport_protected(dp, port_no);
1770                 err = -EBUSY;
1771                 if (vport)
1772                         goto exit_unlock;
1773         } else {
1774                 for (port_no = 1; ; port_no++) {
1775                         if (port_no >= DP_MAX_PORTS) {
1776                                 err = -EFBIG;
1777                                 goto exit_unlock;
1778                         }
1779                         vport = get_vport_protected(dp, port_no);
1780                         if (!vport)
1781                                 break;
1782                 }
1783         }
1784
1785         parms.name = nla_data(a[ODP_VPORT_ATTR_NAME]);
1786         parms.type = nla_get_u32(a[ODP_VPORT_ATTR_TYPE]);
1787         parms.options = a[ODP_VPORT_ATTR_OPTIONS];
1788         parms.dp = dp;
1789         parms.port_no = port_no;
1790
1791         vport = new_vport(&parms);
1792         err = PTR_ERR(vport);
1793         if (IS_ERR(vport))
1794                 goto exit_unlock;
1795
1796         set_internal_devs_mtu(dp);
1797         dp_sysfs_add_if(vport);
1798
1799         err = change_vport(vport, a);
1800         if (!err) {
1801                 reply = odp_vport_cmd_build_info(vport, info->snd_pid,
1802                                                  info->snd_seq, ODP_VPORT_CMD_NEW);
1803                 if (IS_ERR(reply))
1804                         err = PTR_ERR(reply);
1805         }
1806         if (err) {
1807                 dp_detach_port(vport);
1808                 goto exit_unlock;
1809         }
1810         genl_notify(reply, genl_info_net(info), info->snd_pid,
1811                     dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1812
1813
1814 exit_unlock:
1815         rtnl_unlock();
1816 exit:
1817         return err;
1818 }
1819
1820 static int odp_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
1821 {
1822         struct nlattr **a = info->attrs;
1823         struct sk_buff *reply;
1824         struct vport *vport;
1825         int err;
1826
1827         err = odp_vport_cmd_validate(a);
1828         if (err)
1829                 goto exit;
1830
1831         rtnl_lock();
1832         vport = lookup_vport(info->userhdr, a);
1833         err = PTR_ERR(vport);
1834         if (IS_ERR(vport))
1835                 goto exit_unlock;
1836
1837         err = 0;
1838         if (a[ODP_VPORT_ATTR_OPTIONS])
1839                 err = vport_set_options(vport, a[ODP_VPORT_ATTR_OPTIONS]);
1840         if (!err)
1841                 err = change_vport(vport, a);
1842
1843         reply = odp_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
1844                                          ODP_VPORT_CMD_NEW);
1845         if (IS_ERR(reply)) {
1846                 err = PTR_ERR(reply);
1847                 netlink_set_err(INIT_NET_GENL_SOCK, 0,
1848                                 dp_vport_multicast_group.id, err);
1849                 return 0;
1850         }
1851
1852         genl_notify(reply, genl_info_net(info), info->snd_pid,
1853                     dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1854
1855 exit_unlock:
1856         rtnl_unlock();
1857 exit:
1858         return err;
1859 }
1860
1861 static int odp_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
1862 {
1863         struct nlattr **a = info->attrs;
1864         struct sk_buff *reply;
1865         struct vport *vport;
1866         int err;
1867
1868         err = odp_vport_cmd_validate(a);
1869         if (err)
1870                 goto exit;
1871
1872         rtnl_lock();
1873         vport = lookup_vport(info->userhdr, a);
1874         err = PTR_ERR(vport);
1875         if (IS_ERR(vport))
1876                 goto exit_unlock;
1877
1878         if (vport->port_no == ODPP_LOCAL) {
1879                 err = -EINVAL;
1880                 goto exit_unlock;
1881         }
1882
1883         reply = odp_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
1884                                          ODP_VPORT_CMD_DEL);
1885         err = PTR_ERR(reply);
1886         if (IS_ERR(reply))
1887                 goto exit_unlock;
1888
1889         err = dp_detach_port(vport);
1890
1891         genl_notify(reply, genl_info_net(info), info->snd_pid,
1892                     dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1893
1894 exit_unlock:
1895         rtnl_unlock();
1896 exit:
1897         return err;
1898 }
1899
1900 static int odp_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
1901 {
1902         struct nlattr **a = info->attrs;
1903         struct odp_header *odp_header = info->userhdr;
1904         struct sk_buff *reply;
1905         struct vport *vport;
1906         int err;
1907
1908         err = odp_vport_cmd_validate(a);
1909         if (err)
1910                 goto exit;
1911
1912         rcu_read_lock();
1913         vport = lookup_vport(odp_header, a);
1914         err = PTR_ERR(vport);
1915         if (IS_ERR(vport))
1916                 goto exit_unlock;
1917
1918         reply = odp_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
1919                                          ODP_VPORT_CMD_NEW);
1920         err = PTR_ERR(reply);
1921         if (IS_ERR(reply))
1922                 goto exit_unlock;
1923
1924         err = genlmsg_reply(reply, info);
1925
1926 exit_unlock:
1927         rcu_read_unlock();
1928 exit:
1929         return err;
1930 }
1931
1932 static int odp_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1933 {
1934         struct odp_header *odp_header = genlmsg_data(nlmsg_data(cb->nlh));
1935         struct datapath *dp;
1936         u32 port_no;
1937         int retval;
1938
1939         dp = get_dp(odp_header->dp_ifindex);
1940         if (!dp)
1941                 return -ENODEV;
1942
1943         rcu_read_lock();
1944         for (port_no = cb->args[0]; port_no < DP_MAX_PORTS; port_no++) {
1945                 struct vport *vport;
1946
1947                 vport = get_vport_protected(dp, port_no);
1948                 if (!vport)
1949                         continue;
1950
1951                 if (odp_vport_cmd_fill_info(vport, skb, NETLINK_CB(cb->skb).pid,
1952                                             cb->nlh->nlmsg_seq, NLM_F_MULTI,
1953                                             ODP_VPORT_CMD_NEW) < 0)
1954                         break;
1955         }
1956         rcu_read_unlock();
1957
1958         cb->args[0] = port_no;
1959         retval = skb->len;
1960
1961         return retval;
1962 }
1963
1964 static struct genl_ops dp_vport_genl_ops[] = {
1965         { .cmd = ODP_VPORT_CMD_NEW,
1966           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1967           .policy = vport_policy,
1968           .doit = odp_vport_cmd_new
1969         },
1970         { .cmd = ODP_VPORT_CMD_DEL,
1971           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1972           .policy = vport_policy,
1973           .doit = odp_vport_cmd_del
1974         },
1975         { .cmd = ODP_VPORT_CMD_GET,
1976           .flags = 0,               /* OK for unprivileged users. */
1977           .policy = vport_policy,
1978           .doit = odp_vport_cmd_get,
1979           .dumpit = odp_vport_cmd_dump
1980         },
1981         { .cmd = ODP_VPORT_CMD_SET,
1982           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1983           .policy = vport_policy,
1984           .doit = odp_vport_cmd_set,
1985         },
1986 };
1987
1988 struct genl_family_and_ops {
1989         struct genl_family *family;
1990         struct genl_ops *ops;
1991         int n_ops;
1992         struct genl_multicast_group *group;
1993 };
1994
1995 static const struct genl_family_and_ops dp_genl_families[] = {
1996         { &dp_datapath_genl_family,
1997           dp_datapath_genl_ops, ARRAY_SIZE(dp_datapath_genl_ops),
1998           &dp_datapath_multicast_group },
1999         { &dp_vport_genl_family,
2000           dp_vport_genl_ops, ARRAY_SIZE(dp_vport_genl_ops),
2001           &dp_vport_multicast_group },
2002         { &dp_flow_genl_family,
2003           dp_flow_genl_ops, ARRAY_SIZE(dp_flow_genl_ops),
2004           &dp_flow_multicast_group },
2005         { &dp_packet_genl_family,
2006           dp_packet_genl_ops, ARRAY_SIZE(dp_packet_genl_ops),
2007           NULL },
2008 };
2009
2010 static void dp_unregister_genl(int n_families)
2011 {
2012         int i;
2013
2014         for (i = 0; i < n_families; i++) {
2015                 genl_unregister_family(dp_genl_families[i].family);
2016         }
2017 }
2018
2019 static int dp_register_genl(void)
2020 {
2021         int n_registered;
2022         int err;
2023         int i;
2024
2025         n_registered = 0;
2026         for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
2027                 const struct genl_family_and_ops *f = &dp_genl_families[i];
2028
2029                 err = genl_register_family_with_ops(f->family, f->ops,
2030                                                     f->n_ops);
2031                 if (err)
2032                         goto error;
2033                 n_registered++;
2034
2035                 if (f->group) {
2036                         err = genl_register_mc_group(f->family, f->group);
2037                         if (err)
2038                                 goto error;
2039                 }
2040         }
2041
2042         err = packet_register_mc_groups();
2043         if (err)
2044                 goto error;
2045         return 0;
2046
2047 error:
2048         dp_unregister_genl(n_registered);
2049         return err;
2050 }
2051
2052 static int __init dp_init(void)
2053 {
2054         struct sk_buff *dummy_skb;
2055         int err;
2056
2057         BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > sizeof(dummy_skb->cb));
2058
2059         printk("Open vSwitch %s, built "__DATE__" "__TIME__"\n", VERSION BUILDNR);
2060
2061         err = flow_init();
2062         if (err)
2063                 goto error;
2064
2065         err = vport_init();
2066         if (err)
2067                 goto error_flow_exit;
2068
2069         err = register_netdevice_notifier(&dp_device_notifier);
2070         if (err)
2071                 goto error_vport_exit;
2072
2073         err = dp_register_genl();
2074         if (err < 0)
2075                 goto error_unreg_notifier;
2076
2077         return 0;
2078
2079 error_unreg_notifier:
2080         unregister_netdevice_notifier(&dp_device_notifier);
2081 error_vport_exit:
2082         vport_exit();
2083 error_flow_exit:
2084         flow_exit();
2085 error:
2086         return err;
2087 }
2088
2089 static void dp_cleanup(void)
2090 {
2091         rcu_barrier();
2092         dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
2093         unregister_netdevice_notifier(&dp_device_notifier);
2094         vport_exit();
2095         flow_exit();
2096 }
2097
2098 module_init(dp_init);
2099 module_exit(dp_cleanup);
2100
2101 MODULE_DESCRIPTION("Open vSwitch switching datapath");
2102 MODULE_LICENSE("GPL");