2 * Copyright (c) 2007, 2008, 2009, 2010, 2011 Nicira Networks.
3 * Distributed under the terms of the GNU GPL version 2.
5 * Significant portions of this file may be copied from parts of the Linux
6 * kernel, by Linus Torvalds and others.
9 /* Functions for managing the dp interface/device. */
11 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13 #include <linux/init.h>
14 #include <linux/module.h>
15 #include <linux/if_arp.h>
16 #include <linux/if_vlan.h>
19 #include <linux/jhash.h>
20 #include <linux/delay.h>
21 #include <linux/time.h>
22 #include <linux/etherdevice.h>
23 #include <linux/genetlink.h>
24 #include <linux/kernel.h>
25 #include <linux/kthread.h>
26 #include <linux/mutex.h>
27 #include <linux/percpu.h>
28 #include <linux/rcupdate.h>
29 #include <linux/tcp.h>
30 #include <linux/udp.h>
31 #include <linux/version.h>
32 #include <linux/ethtool.h>
33 #include <linux/wait.h>
34 #include <asm/system.h>
35 #include <asm/div64.h>
36 #include <linux/highmem.h>
37 #include <linux/netfilter_bridge.h>
38 #include <linux/netfilter_ipv4.h>
39 #include <linux/inetdevice.h>
40 #include <linux/list.h>
41 #include <linux/openvswitch.h>
42 #include <linux/rculist.h>
43 #include <linux/dmi.h>
44 #include <net/inet_ecn.h>
45 #include <net/genetlink.h>
53 #include "vport-internal_dev.h"
55 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) || \
56 LINUX_VERSION_CODE >= KERNEL_VERSION(3,2,0)
57 #error Kernels before 2.6.18 or after 3.1 are not supported by this version of Open vSwitch.
60 int (*dp_ioctl_hook)(struct net_device *dev, struct ifreq *rq, int cmd);
61 EXPORT_SYMBOL(dp_ioctl_hook);
66 * Writes to device state (add/remove datapath, port, set operations on vports,
67 * etc.) are protected by RTNL.
69 * Writes to other state (flow table modifications, set miscellaneous datapath
70 * parameters, etc.) are protected by genl_mutex. The RTNL lock nests inside
73 * Reads are protected by RCU.
75 * There are a few special cases (mostly stats) that have their own
76 * synchronization but they nest under all of above and don't interact with
80 /* Global list of datapaths to enable dumping them all out.
81 * Protected by genl_mutex.
83 static LIST_HEAD(dps);
85 static struct vport *new_vport(const struct vport_parms *);
86 static int queue_gso_packets(int dp_ifindex, struct sk_buff *,
87 const struct dp_upcall_info *);
88 static int queue_userspace_packet(int dp_ifindex, struct sk_buff *,
89 const struct dp_upcall_info *);
91 /* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */
92 struct datapath *get_dp(int dp_ifindex)
94 struct datapath *dp = NULL;
95 struct net_device *dev;
98 dev = dev_get_by_index_rcu(&init_net, dp_ifindex);
100 struct vport *vport = internal_dev_get_vport(dev);
108 EXPORT_SYMBOL_GPL(get_dp);
110 /* Must be called with genl_mutex. */
111 static struct flow_table *get_table_protected(struct datapath *dp)
113 return rcu_dereference_protected(dp->table, lockdep_genl_is_held());
116 /* Must be called with rcu_read_lock or RTNL lock. */
117 static struct vport *get_vport_protected(struct datapath *dp, u16 port_no)
119 return rcu_dereference_rtnl(dp->ports[port_no]);
122 /* Must be called with rcu_read_lock or RTNL lock. */
123 const char *dp_name(const struct datapath *dp)
125 return vport_get_name(rcu_dereference_rtnl(dp->ports[OVSP_LOCAL]));
128 static int get_dpifindex(struct datapath *dp)
135 local = get_vport_protected(dp, OVSP_LOCAL);
137 ifindex = vport_get_ifindex(local);
146 static size_t br_nlmsg_size(void)
148 return NLMSG_ALIGN(sizeof(struct ifinfomsg))
149 + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
150 + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
151 + nla_total_size(4) /* IFLA_MASTER */
152 + nla_total_size(4) /* IFLA_MTU */
153 + nla_total_size(1); /* IFLA_OPERSTATE */
156 /* Caller must hold RTNL lock. */
157 static int dp_fill_ifinfo(struct sk_buff *skb,
158 const struct vport *port,
159 int event, unsigned int flags)
161 struct datapath *dp = port->dp;
162 int ifindex = vport_get_ifindex(port);
163 struct ifinfomsg *hdr;
164 struct nlmsghdr *nlh;
169 nlh = nlmsg_put(skb, 0, 0, event, sizeof(*hdr), flags);
173 hdr = nlmsg_data(nlh);
174 hdr->ifi_family = AF_BRIDGE;
176 hdr->ifi_type = ARPHRD_ETHER;
177 hdr->ifi_index = ifindex;
178 hdr->ifi_flags = vport_get_flags(port);
181 NLA_PUT_STRING(skb, IFLA_IFNAME, vport_get_name(port));
182 NLA_PUT_U32(skb, IFLA_MASTER, get_dpifindex(dp));
183 NLA_PUT_U32(skb, IFLA_MTU, vport_get_mtu(port));
184 #ifdef IFLA_OPERSTATE
185 NLA_PUT_U8(skb, IFLA_OPERSTATE,
186 vport_is_running(port)
187 ? vport_get_operstate(port)
191 NLA_PUT(skb, IFLA_ADDRESS, ETH_ALEN, vport_get_addr(port));
193 return nlmsg_end(skb, nlh);
196 nlmsg_cancel(skb, nlh);
200 /* Caller must hold RTNL lock. */
201 static void dp_ifinfo_notify(int event, struct vport *port)
206 skb = nlmsg_new(br_nlmsg_size(), GFP_KERNEL);
210 err = dp_fill_ifinfo(skb, port, event, 0);
212 /* -EMSGSIZE implies BUG in br_nlmsg_size() */
213 WARN_ON(err == -EMSGSIZE);
217 rtnl_notify(skb, &init_net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
221 rtnl_set_sk_err(&init_net, RTNLGRP_LINK, err);
224 static void release_dp(struct kobject *kobj)
226 struct datapath *dp = container_of(kobj, struct datapath, ifobj);
230 static struct kobj_type dp_ktype = {
231 .release = release_dp
234 static void destroy_dp_rcu(struct rcu_head *rcu)
236 struct datapath *dp = container_of(rcu, struct datapath, rcu);
238 flow_tbl_destroy(dp->table);
239 free_percpu(dp->stats_percpu);
240 kobject_put(&dp->ifobj);
243 /* Called with RTNL lock and genl_lock. */
244 static struct vport *new_vport(const struct vport_parms *parms)
248 vport = vport_add(parms);
249 if (!IS_ERR(vport)) {
250 struct datapath *dp = parms->dp;
252 rcu_assign_pointer(dp->ports[parms->port_no], vport);
253 list_add(&vport->node, &dp->port_list);
255 dp_ifinfo_notify(RTM_NEWLINK, vport);
261 /* Called with RTNL lock. */
262 void dp_detach_port(struct vport *p)
266 if (p->port_no != OVSP_LOCAL)
268 dp_ifinfo_notify(RTM_DELLINK, p);
270 /* First drop references to device. */
272 rcu_assign_pointer(p->dp->ports[p->port_no], NULL);
274 /* Then destroy it. */
278 /* Must be called with rcu_read_lock. */
279 void dp_process_received_packet(struct vport *p, struct sk_buff *skb)
281 struct datapath *dp = p->dp;
282 struct sw_flow *flow;
283 struct dp_stats_percpu *stats;
287 stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
289 if (!OVS_CB(skb)->flow) {
290 struct sw_flow_key key;
293 /* Extract flow from 'skb' into 'key'. */
294 error = flow_extract(skb, p->port_no, &key, &key_len);
295 if (unlikely(error)) {
301 flow = flow_tbl_lookup(rcu_dereference(dp->table),
303 if (unlikely(!flow)) {
304 struct dp_upcall_info upcall;
306 upcall.cmd = OVS_PACKET_CMD_MISS;
308 upcall.userdata = NULL;
309 upcall.pid = p->upcall_pid;
310 dp_upcall(dp, skb, &upcall);
312 stats_counter = &stats->n_missed;
316 OVS_CB(skb)->flow = flow;
319 stats_counter = &stats->n_hit;
320 flow_used(OVS_CB(skb)->flow, skb);
321 execute_actions(dp, skb);
324 /* Update datapath statistics. */
326 write_seqcount_begin(&stats->seqlock);
328 write_seqcount_end(&stats->seqlock);
331 static void copy_and_csum_skb(struct sk_buff *skb, void *to)
333 u16 csum_start, csum_offset;
336 get_skb_csum_pointers(skb, &csum_start, &csum_offset);
337 csum_start -= skb_headroom(skb);
339 skb_copy_bits(skb, 0, to, csum_start);
341 csum = skb_copy_and_csum_bits(skb, csum_start, to + csum_start,
342 skb->len - csum_start, 0);
343 *(__sum16 *)(to + csum_start + csum_offset) = csum_fold(csum);
346 static struct genl_family dp_packet_genl_family = {
347 .id = GENL_ID_GENERATE,
348 .hdrsize = sizeof(struct ovs_header),
349 .name = OVS_PACKET_FAMILY,
350 .version = OVS_PACKET_VERSION,
351 .maxattr = OVS_PACKET_ATTR_MAX
354 int dp_upcall(struct datapath *dp, struct sk_buff *skb,
355 const struct dp_upcall_info *upcall_info)
357 struct dp_stats_percpu *stats;
361 if (upcall_info->pid == 0) {
366 dp_ifindex = get_dpifindex(dp);
372 forward_ip_summed(skb, true);
374 if (!skb_is_gso(skb))
375 err = queue_userspace_packet(dp_ifindex, skb, upcall_info);
377 err = queue_gso_packets(dp_ifindex, skb, upcall_info);
384 stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
386 write_seqcount_begin(&stats->seqlock);
388 write_seqcount_end(&stats->seqlock);
393 static int queue_gso_packets(int dp_ifindex, struct sk_buff *skb,
394 const struct dp_upcall_info *upcall_info)
396 struct dp_upcall_info later_info;
397 struct sw_flow_key later_key;
398 struct sk_buff *segs, *nskb;
401 segs = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM);
405 /* Queue all of the segments. */
408 err = queue_userspace_packet(dp_ifindex, skb, upcall_info);
412 if (skb == segs && skb_shinfo(skb)->gso_type & SKB_GSO_UDP) {
413 /* The initial flow key extracted by flow_extract() in
414 * this case is for a first fragment, so we need to
415 * properly mark later fragments.
417 later_key = *upcall_info->key;
418 later_key.ip.tos_frag &= ~OVS_FRAG_TYPE_MASK;
419 later_key.ip.tos_frag |= OVS_FRAG_TYPE_LATER;
421 later_info = *upcall_info;
422 later_info.key = &later_key;
423 upcall_info = &later_info;
425 } while ((skb = skb->next));
427 /* Free all of the segments. */
435 } while ((skb = nskb));
439 static int queue_userspace_packet(int dp_ifindex, struct sk_buff *skb,
440 const struct dp_upcall_info *upcall_info)
442 struct ovs_header *upcall;
443 struct sk_buff *user_skb; /* to be queued to userspace */
448 err = vlan_deaccel_tag(skb);
452 if (nla_attr_size(skb->len) > USHRT_MAX)
455 len = sizeof(struct ovs_header);
456 len += nla_total_size(skb->len);
457 len += nla_total_size(FLOW_BUFSIZE);
458 if (upcall_info->cmd == OVS_PACKET_CMD_ACTION)
459 len += nla_total_size(8);
461 user_skb = genlmsg_new(len, GFP_ATOMIC);
465 upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
466 0, upcall_info->cmd);
467 upcall->dp_ifindex = dp_ifindex;
469 nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY);
470 flow_to_nlattrs(upcall_info->key, user_skb);
471 nla_nest_end(user_skb, nla);
473 if (upcall_info->userdata)
474 nla_put_u64(user_skb, OVS_PACKET_ATTR_USERDATA,
475 nla_get_u64(upcall_info->userdata));
477 nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len);
478 if (skb->ip_summed == CHECKSUM_PARTIAL)
479 copy_and_csum_skb(skb, nla_data(nla));
481 skb_copy_bits(skb, 0, nla_data(nla), skb->len);
483 return genlmsg_unicast(&init_net, user_skb, upcall_info->pid);
486 /* Called with genl_mutex. */
487 static int flush_flows(int dp_ifindex)
489 struct flow_table *old_table;
490 struct flow_table *new_table;
493 dp = get_dp(dp_ifindex);
497 old_table = get_table_protected(dp);
498 new_table = flow_tbl_alloc(TBL_MIN_BUCKETS);
502 rcu_assign_pointer(dp->table, new_table);
504 flow_tbl_deferred_destroy(old_table);
508 static int validate_actions(const struct nlattr *attr,
509 const struct sw_flow_key *key, int depth);
511 static int validate_sample(const struct nlattr *attr,
512 const struct sw_flow_key *key, int depth)
514 const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
515 const struct nlattr *probability, *actions;
516 const struct nlattr *a;
519 memset(attrs, 0, sizeof(attrs));
520 nla_for_each_nested(a, attr, rem) {
521 int type = nla_type(a);
522 if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type])
529 probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY];
530 if (!probability || nla_len(probability) != sizeof(u32))
533 actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
534 if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
536 return validate_actions(actions, key, depth + 1);
539 static int validate_action_key(const struct nlattr *a,
540 const struct sw_flow_key *flow_key)
542 int act_type = nla_type(a);
543 const struct nlattr *ovs_key = nla_data(a);
544 int key_type = nla_type(ovs_key);
546 /* There can be only one key in a action */
547 if (nla_total_size(nla_len(ovs_key)) != nla_len(a))
550 if (key_type > OVS_KEY_ATTR_MAX ||
551 nla_len(ovs_key) != ovs_key_lens[key_type])
554 #define ACTION(act, key) (((act) << 8) | (key))
556 switch (ACTION(act_type, key_type)) {
557 const struct ovs_key_ipv4 *ipv4_key;
558 const struct ovs_key_8021q *q_key;
560 case ACTION(OVS_ACTION_ATTR_SET, OVS_KEY_ATTR_PRIORITY):
561 case ACTION(OVS_ACTION_ATTR_SET, OVS_KEY_ATTR_TUN_ID):
562 case ACTION(OVS_ACTION_ATTR_SET, OVS_KEY_ATTR_ETHERNET):
565 case ACTION(OVS_ACTION_ATTR_PUSH, OVS_KEY_ATTR_8021Q):
566 q_key = nla_data(ovs_key);
567 if (q_key->q_tpid != htons(ETH_P_8021Q))
570 if (q_key->q_tci & htons(VLAN_TAG_PRESENT))
574 case ACTION(OVS_ACTION_ATTR_SET, OVS_KEY_ATTR_IPV4):
575 if (flow_key->eth.type != htons(ETH_P_IP))
578 if (!flow_key->ipv4.addr.src || !flow_key->ipv4.addr.dst)
581 ipv4_key = nla_data(ovs_key);
582 if (ipv4_key->ipv4_proto != flow_key->ip.proto)
585 if (ipv4_key->ipv4_tos & INET_ECN_MASK)
588 if (ipv4_key->ipv4_frag !=
589 (flow_key->ip.tos_frag & OVS_FRAG_TYPE_MASK))
594 case ACTION(OVS_ACTION_ATTR_SET, OVS_KEY_ATTR_TCP):
595 if (flow_key->ip.proto != IPPROTO_TCP)
598 if (!flow_key->ipv4.tp.src || !flow_key->ipv4.tp.dst)
603 case ACTION(OVS_ACTION_ATTR_SET, OVS_KEY_ATTR_UDP):
604 if (flow_key->ip.proto != IPPROTO_UDP)
607 if (!flow_key->ipv4.tp.src || !flow_key->ipv4.tp.dst)
618 static int validate_userspace(const struct nlattr *attr)
620 static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = {
621 [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 },
622 [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_U64 },
624 struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
627 error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX,
628 attr, userspace_policy);
632 if (!a[OVS_USERSPACE_ATTR_PID] ||
633 !nla_get_u32(a[OVS_USERSPACE_ATTR_PID]))
639 static int validate_actions(const struct nlattr *attr,
640 const struct sw_flow_key *key, int depth)
642 const struct nlattr *a;
645 if (depth >= SAMPLE_ACTION_DEPTH)
648 nla_for_each_nested(a, attr, rem) {
649 /* Expected argument lengths, (u32)-1 for variable length. */
650 static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
651 [OVS_ACTION_ATTR_OUTPUT] = 4,
652 [OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
653 [OVS_ACTION_ATTR_PUSH] = (u32)-1,
654 [OVS_ACTION_ATTR_POP] = 2,
655 [OVS_ACTION_ATTR_SET] = (u32)-1,
656 [OVS_ACTION_ATTR_SAMPLE] = (u32)-1
658 int type = nla_type(a);
660 if (type > OVS_ACTION_ATTR_MAX ||
661 (action_lens[type] != nla_len(a) &&
662 action_lens[type] != (u32)-1))
666 case OVS_ACTION_ATTR_UNSPEC:
669 case OVS_ACTION_ATTR_USERSPACE:
670 err = validate_userspace(a);
675 case OVS_ACTION_ATTR_OUTPUT:
676 if (nla_get_u32(a) >= DP_MAX_PORTS)
681 case OVS_ACTION_ATTR_POP:
682 if (nla_get_u16(a) != OVS_KEY_ATTR_8021Q)
686 case OVS_ACTION_ATTR_SET:
687 case OVS_ACTION_ATTR_PUSH:
688 err = validate_action_key(a, key);
693 case OVS_ACTION_ATTR_SAMPLE:
694 err = validate_sample(a, key, depth);
710 static void clear_stats(struct sw_flow *flow)
714 flow->packet_count = 0;
715 flow->byte_count = 0;
718 static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
720 struct ovs_header *ovs_header = info->userhdr;
721 struct nlattr **a = info->attrs;
722 struct sw_flow_actions *acts;
723 struct sk_buff *packet;
724 struct sw_flow *flow;
732 if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
733 !a[OVS_PACKET_ATTR_ACTIONS] ||
734 nla_len(a[OVS_PACKET_ATTR_PACKET]) < ETH_HLEN)
737 len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
738 packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
742 skb_reserve(packet, NET_IP_ALIGN);
744 memcpy(__skb_put(packet, len), nla_data(a[OVS_PACKET_ATTR_PACKET]), len);
746 skb_reset_mac_header(packet);
747 eth = eth_hdr(packet);
749 /* Normally, setting the skb 'protocol' field would be handled by a
750 * call to eth_type_trans(), but it assumes there's a sending
751 * device, which we may not have. */
752 if (ntohs(eth->h_proto) >= 1536)
753 packet->protocol = eth->h_proto;
755 packet->protocol = htons(ETH_P_802_2);
757 /* Build an sw_flow for sending this packet. */
763 err = flow_extract(packet, -1, &flow->key, &key_len);
767 err = flow_metadata_from_nlattrs(&flow->key.phy.priority,
768 &flow->key.phy.in_port,
769 &flow->key.phy.tun_id,
770 a[OVS_PACKET_ATTR_KEY]);
774 err = validate_actions(a[OVS_PACKET_ATTR_ACTIONS], &flow->key, 0);
778 flow->hash = flow_hash(&flow->key, key_len);
780 acts = flow_actions_alloc(a[OVS_PACKET_ATTR_ACTIONS]);
784 rcu_assign_pointer(flow->sf_acts, acts);
786 OVS_CB(packet)->flow = flow;
787 packet->priority = flow->key.phy.priority;
790 dp = get_dp(ovs_header->dp_ifindex);
796 err = execute_actions(dp, packet);
813 static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
814 [OVS_PACKET_ATTR_PACKET] = { .type = NLA_UNSPEC },
815 [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
816 [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
819 static struct genl_ops dp_packet_genl_ops[] = {
820 { .cmd = OVS_PACKET_CMD_EXECUTE,
821 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
822 .policy = packet_policy,
823 .doit = ovs_packet_cmd_execute
827 static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats)
830 struct flow_table *table = get_table_protected(dp);
832 stats->n_flows = flow_tbl_count(table);
834 stats->n_hit = stats->n_missed = stats->n_lost = 0;
835 for_each_possible_cpu(i) {
836 const struct dp_stats_percpu *percpu_stats;
837 struct dp_stats_percpu local_stats;
840 percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
843 seqcount = read_seqcount_begin(&percpu_stats->seqlock);
844 local_stats = *percpu_stats;
845 } while (read_seqcount_retry(&percpu_stats->seqlock, seqcount));
847 stats->n_hit += local_stats.n_hit;
848 stats->n_missed += local_stats.n_missed;
849 stats->n_lost += local_stats.n_lost;
853 static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
854 [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
855 [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
856 [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
859 static struct genl_family dp_flow_genl_family = {
860 .id = GENL_ID_GENERATE,
861 .hdrsize = sizeof(struct ovs_header),
862 .name = OVS_FLOW_FAMILY,
863 .version = OVS_FLOW_VERSION,
864 .maxattr = OVS_FLOW_ATTR_MAX
867 static struct genl_multicast_group dp_flow_multicast_group = {
868 .name = OVS_FLOW_MCGROUP
871 /* Called with genl_lock. */
872 static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
873 struct sk_buff *skb, u32 pid,
874 u32 seq, u32 flags, u8 cmd)
876 const int skb_orig_len = skb->len;
877 const struct sw_flow_actions *sf_acts;
878 struct ovs_flow_stats stats;
879 struct ovs_header *ovs_header;
885 sf_acts = rcu_dereference_protected(flow->sf_acts,
886 lockdep_genl_is_held());
888 ovs_header = genlmsg_put(skb, pid, seq, &dp_flow_genl_family, flags, cmd);
892 ovs_header->dp_ifindex = get_dpifindex(dp);
894 nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY);
896 goto nla_put_failure;
897 err = flow_to_nlattrs(&flow->key, skb);
900 nla_nest_end(skb, nla);
902 spin_lock_bh(&flow->lock);
904 stats.n_packets = flow->packet_count;
905 stats.n_bytes = flow->byte_count;
906 tcp_flags = flow->tcp_flags;
907 spin_unlock_bh(&flow->lock);
910 NLA_PUT_U64(skb, OVS_FLOW_ATTR_USED, flow_used_time(used));
913 NLA_PUT(skb, OVS_FLOW_ATTR_STATS,
914 sizeof(struct ovs_flow_stats), &stats);
917 NLA_PUT_U8(skb, OVS_FLOW_ATTR_TCP_FLAGS, tcp_flags);
919 /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
920 * this is the first flow to be dumped into 'skb'. This is unusual for
921 * Netlink but individual action lists can be longer than
922 * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
923 * The userspace caller can always fetch the actions separately if it
924 * really wants them. (Most userspace callers in fact don't care.)
926 * This can only fail for dump operations because the skb is always
927 * properly sized for single flows.
929 err = nla_put(skb, OVS_FLOW_ATTR_ACTIONS, sf_acts->actions_len,
931 if (err < 0 && skb_orig_len)
934 return genlmsg_end(skb, ovs_header);
939 genlmsg_cancel(skb, ovs_header);
943 static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow)
945 const struct sw_flow_actions *sf_acts;
948 sf_acts = rcu_dereference_protected(flow->sf_acts,
949 lockdep_genl_is_held());
951 /* OVS_FLOW_ATTR_KEY */
952 len = nla_total_size(FLOW_BUFSIZE);
953 /* OVS_FLOW_ATTR_ACTIONS */
954 len += nla_total_size(sf_acts->actions_len);
955 /* OVS_FLOW_ATTR_STATS */
956 len += nla_total_size(sizeof(struct ovs_flow_stats));
957 /* OVS_FLOW_ATTR_TCP_FLAGS */
958 len += nla_total_size(1);
959 /* OVS_FLOW_ATTR_USED */
960 len += nla_total_size(8);
962 len += NLMSG_ALIGN(sizeof(struct ovs_header));
964 return genlmsg_new(len, GFP_KERNEL);
967 static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow,
969 u32 pid, u32 seq, u8 cmd)
974 skb = ovs_flow_cmd_alloc_info(flow);
976 return ERR_PTR(-ENOMEM);
978 retval = ovs_flow_cmd_fill_info(flow, dp, skb, pid, seq, 0, cmd);
983 static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
985 struct nlattr **a = info->attrs;
986 struct ovs_header *ovs_header = info->userhdr;
987 struct sw_flow_key key;
988 struct sw_flow *flow;
989 struct sk_buff *reply;
991 struct flow_table *table;
997 if (!a[OVS_FLOW_ATTR_KEY])
999 error = flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
1003 /* Validate actions. */
1004 if (a[OVS_FLOW_ATTR_ACTIONS]) {
1005 error = validate_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, 0);
1008 } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) {
1013 dp = get_dp(ovs_header->dp_ifindex);
1018 table = get_table_protected(dp);
1019 flow = flow_tbl_lookup(table, &key, key_len);
1021 struct sw_flow_actions *acts;
1023 /* Bail out if we're not allowed to create a new flow. */
1025 if (info->genlhdr->cmd == OVS_FLOW_CMD_SET)
1028 /* Expand table, if necessary, to make room. */
1029 if (flow_tbl_need_to_expand(table)) {
1030 struct flow_table *new_table;
1032 new_table = flow_tbl_expand(table);
1033 if (!IS_ERR(new_table)) {
1034 rcu_assign_pointer(dp->table, new_table);
1035 flow_tbl_deferred_destroy(table);
1036 table = get_table_protected(dp);
1040 /* Allocate flow. */
1041 flow = flow_alloc();
1043 error = PTR_ERR(flow);
1049 /* Obtain actions. */
1050 acts = flow_actions_alloc(a[OVS_FLOW_ATTR_ACTIONS]);
1051 error = PTR_ERR(acts);
1053 goto error_free_flow;
1054 rcu_assign_pointer(flow->sf_acts, acts);
1056 /* Put flow in bucket. */
1057 flow->hash = flow_hash(&key, key_len);
1058 flow_tbl_insert(table, flow);
1060 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid,
1064 /* We found a matching flow. */
1065 struct sw_flow_actions *old_acts;
1066 struct nlattr *acts_attrs;
1068 /* Bail out if we're not allowed to modify an existing flow.
1069 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
1070 * because Generic Netlink treats the latter as a dump
1071 * request. We also accept NLM_F_EXCL in case that bug ever
1075 if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW &&
1076 info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
1079 /* Update actions. */
1080 old_acts = rcu_dereference_protected(flow->sf_acts,
1081 lockdep_genl_is_held());
1082 acts_attrs = a[OVS_FLOW_ATTR_ACTIONS];
1084 (old_acts->actions_len != nla_len(acts_attrs) ||
1085 memcmp(old_acts->actions, nla_data(acts_attrs),
1086 old_acts->actions_len))) {
1087 struct sw_flow_actions *new_acts;
1089 new_acts = flow_actions_alloc(acts_attrs);
1090 error = PTR_ERR(new_acts);
1091 if (IS_ERR(new_acts))
1094 rcu_assign_pointer(flow->sf_acts, new_acts);
1095 flow_deferred_free_acts(old_acts);
1098 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid,
1099 info->snd_seq, OVS_FLOW_CMD_NEW);
1102 if (a[OVS_FLOW_ATTR_CLEAR]) {
1103 spin_lock_bh(&flow->lock);
1105 spin_unlock_bh(&flow->lock);
1110 genl_notify(reply, genl_info_net(info), info->snd_pid,
1111 dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL);
1113 netlink_set_err(INIT_NET_GENL_SOCK, 0,
1114 dp_flow_multicast_group.id, PTR_ERR(reply));
1123 static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1125 struct nlattr **a = info->attrs;
1126 struct ovs_header *ovs_header = info->userhdr;
1127 struct sw_flow_key key;
1128 struct sk_buff *reply;
1129 struct sw_flow *flow;
1130 struct datapath *dp;
1131 struct flow_table *table;
1135 if (!a[OVS_FLOW_ATTR_KEY])
1137 err = flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
1141 dp = get_dp(ovs_header->dp_ifindex);
1145 table = get_table_protected(dp);
1146 flow = flow_tbl_lookup(table, &key, key_len);
1150 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid,
1151 info->snd_seq, OVS_FLOW_CMD_NEW);
1153 return PTR_ERR(reply);
1155 return genlmsg_reply(reply, info);
1158 static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1160 struct nlattr **a = info->attrs;
1161 struct ovs_header *ovs_header = info->userhdr;
1162 struct sw_flow_key key;
1163 struct sk_buff *reply;
1164 struct sw_flow *flow;
1165 struct datapath *dp;
1166 struct flow_table *table;
1170 if (!a[OVS_FLOW_ATTR_KEY])
1171 return flush_flows(ovs_header->dp_ifindex);
1172 err = flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
1176 dp = get_dp(ovs_header->dp_ifindex);
1180 table = get_table_protected(dp);
1181 flow = flow_tbl_lookup(table, &key, key_len);
1185 reply = ovs_flow_cmd_alloc_info(flow);
1189 flow_tbl_remove(table, flow);
1191 err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_pid,
1192 info->snd_seq, 0, OVS_FLOW_CMD_DEL);
1195 flow_deferred_free(flow);
1197 genl_notify(reply, genl_info_net(info), info->snd_pid,
1198 dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL);
1202 static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1204 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
1205 struct datapath *dp;
1207 dp = get_dp(ovs_header->dp_ifindex);
1212 struct sw_flow *flow;
1215 bucket = cb->args[0];
1217 flow = flow_tbl_next(get_table_protected(dp), &bucket, &obj);
1221 if (ovs_flow_cmd_fill_info(flow, dp, skb,
1222 NETLINK_CB(cb->skb).pid,
1223 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1224 OVS_FLOW_CMD_NEW) < 0)
1227 cb->args[0] = bucket;
1233 static struct genl_ops dp_flow_genl_ops[] = {
1234 { .cmd = OVS_FLOW_CMD_NEW,
1235 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1236 .policy = flow_policy,
1237 .doit = ovs_flow_cmd_new_or_set
1239 { .cmd = OVS_FLOW_CMD_DEL,
1240 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1241 .policy = flow_policy,
1242 .doit = ovs_flow_cmd_del
1244 { .cmd = OVS_FLOW_CMD_GET,
1245 .flags = 0, /* OK for unprivileged users. */
1246 .policy = flow_policy,
1247 .doit = ovs_flow_cmd_get,
1248 .dumpit = ovs_flow_cmd_dump
1250 { .cmd = OVS_FLOW_CMD_SET,
1251 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1252 .policy = flow_policy,
1253 .doit = ovs_flow_cmd_new_or_set,
1257 static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
1258 #ifdef HAVE_NLA_NUL_STRING
1259 [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1261 [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
1264 static struct genl_family dp_datapath_genl_family = {
1265 .id = GENL_ID_GENERATE,
1266 .hdrsize = sizeof(struct ovs_header),
1267 .name = OVS_DATAPATH_FAMILY,
1268 .version = OVS_DATAPATH_VERSION,
1269 .maxattr = OVS_DP_ATTR_MAX
1272 static struct genl_multicast_group dp_datapath_multicast_group = {
1273 .name = OVS_DATAPATH_MCGROUP
1276 static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
1277 u32 pid, u32 seq, u32 flags, u8 cmd)
1279 struct ovs_header *ovs_header;
1283 ovs_header = genlmsg_put(skb, pid, seq, &dp_datapath_genl_family,
1288 ovs_header->dp_ifindex = get_dpifindex(dp);
1291 err = nla_put_string(skb, OVS_DP_ATTR_NAME, dp_name(dp));
1294 goto nla_put_failure;
1296 nla = nla_reserve(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats));
1298 goto nla_put_failure;
1299 get_dp_stats(dp, nla_data(nla));
1301 return genlmsg_end(skb, ovs_header);
1304 genlmsg_cancel(skb, ovs_header);
1309 static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 pid,
1312 struct sk_buff *skb;
1315 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1317 return ERR_PTR(-ENOMEM);
1319 retval = ovs_dp_cmd_fill_info(dp, skb, pid, seq, 0, cmd);
1322 return ERR_PTR(retval);
1327 static int ovs_dp_cmd_validate(struct nlattr *a[OVS_DP_ATTR_MAX + 1])
1329 return CHECK_NUL_STRING(a[OVS_DP_ATTR_NAME], IFNAMSIZ - 1);
1332 /* Called with genl_mutex and optionally with RTNL lock also. */
1333 static struct datapath *lookup_datapath(struct ovs_header *ovs_header,
1334 struct nlattr *a[OVS_DP_ATTR_MAX + 1])
1336 struct datapath *dp;
1338 if (!a[OVS_DP_ATTR_NAME])
1339 dp = get_dp(ovs_header->dp_ifindex);
1341 struct vport *vport;
1344 vport = vport_locate(nla_data(a[OVS_DP_ATTR_NAME]));
1345 dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
1348 return dp ? dp : ERR_PTR(-ENODEV);
1351 static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1353 struct nlattr **a = info->attrs;
1354 struct vport_parms parms;
1355 struct sk_buff *reply;
1356 struct datapath *dp;
1357 struct vport *vport;
1361 if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
1364 err = ovs_dp_cmd_validate(a);
1370 if (!try_module_get(THIS_MODULE))
1371 goto err_unlock_rtnl;
1374 dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1376 goto err_put_module;
1377 INIT_LIST_HEAD(&dp->port_list);
1379 /* Initialize kobject for bridge. This will be added as
1380 * /sys/class/net/<devname>/brif later, if sysfs is enabled. */
1381 dp->ifobj.kset = NULL;
1382 kobject_init(&dp->ifobj, &dp_ktype);
1384 /* Allocate table. */
1386 rcu_assign_pointer(dp->table, flow_tbl_alloc(TBL_MIN_BUCKETS));
1390 dp->stats_percpu = alloc_percpu(struct dp_stats_percpu);
1391 if (!dp->stats_percpu) {
1393 goto err_destroy_table;
1396 /* Set up our datapath device. */
1397 parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
1398 parms.type = OVS_VPORT_TYPE_INTERNAL;
1399 parms.options = NULL;
1401 parms.port_no = OVSP_LOCAL;
1402 parms.upcall_pid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]);
1404 vport = new_vport(&parms);
1405 if (IS_ERR(vport)) {
1406 err = PTR_ERR(vport);
1410 goto err_destroy_percpu;
1413 reply = ovs_dp_cmd_build_info(dp, info->snd_pid,
1414 info->snd_seq, OVS_DP_CMD_NEW);
1415 err = PTR_ERR(reply);
1417 goto err_destroy_local_port;
1419 list_add_tail(&dp->list_node, &dps);
1420 dp_sysfs_add_dp(dp);
1424 genl_notify(reply, genl_info_net(info), info->snd_pid,
1425 dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL);
1428 err_destroy_local_port:
1429 dp_detach_port(get_vport_protected(dp, OVSP_LOCAL));
1431 free_percpu(dp->stats_percpu);
1433 flow_tbl_destroy(get_table_protected(dp));
1437 module_put(THIS_MODULE);
1444 static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1446 struct vport *vport, *next_vport;
1447 struct sk_buff *reply;
1448 struct datapath *dp;
1451 err = ovs_dp_cmd_validate(info->attrs);
1456 dp = lookup_datapath(info->userhdr, info->attrs);
1461 reply = ovs_dp_cmd_build_info(dp, info->snd_pid,
1462 info->snd_seq, OVS_DP_CMD_DEL);
1463 err = PTR_ERR(reply);
1467 list_for_each_entry_safe(vport, next_vport, &dp->port_list, node)
1468 if (vport->port_no != OVSP_LOCAL)
1469 dp_detach_port(vport);
1471 dp_sysfs_del_dp(dp);
1472 list_del(&dp->list_node);
1473 dp_detach_port(get_vport_protected(dp, OVSP_LOCAL));
1475 /* rtnl_unlock() will wait until all the references to devices that
1476 * are pending unregistration have been dropped. We do it here to
1477 * ensure that any internal devices (which contain DP pointers) are
1478 * fully destroyed before freeing the datapath.
1482 call_rcu(&dp->rcu, destroy_dp_rcu);
1483 module_put(THIS_MODULE);
1485 genl_notify(reply, genl_info_net(info), info->snd_pid,
1486 dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL);
1496 static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1498 struct sk_buff *reply;
1499 struct datapath *dp;
1502 err = ovs_dp_cmd_validate(info->attrs);
1506 dp = lookup_datapath(info->userhdr, info->attrs);
1510 reply = ovs_dp_cmd_build_info(dp, info->snd_pid,
1511 info->snd_seq, OVS_DP_CMD_NEW);
1512 if (IS_ERR(reply)) {
1513 err = PTR_ERR(reply);
1514 netlink_set_err(INIT_NET_GENL_SOCK, 0,
1515 dp_datapath_multicast_group.id, err);
1519 genl_notify(reply, genl_info_net(info), info->snd_pid,
1520 dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL);
1524 static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1526 struct sk_buff *reply;
1527 struct datapath *dp;
1530 err = ovs_dp_cmd_validate(info->attrs);
1534 dp = lookup_datapath(info->userhdr, info->attrs);
1538 reply = ovs_dp_cmd_build_info(dp, info->snd_pid,
1539 info->snd_seq, OVS_DP_CMD_NEW);
1541 return PTR_ERR(reply);
1543 return genlmsg_reply(reply, info);
1546 static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1548 struct datapath *dp;
1549 int skip = cb->args[0];
1552 list_for_each_entry(dp, &dps, list_node) {
1555 if (ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).pid,
1556 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1557 OVS_DP_CMD_NEW) < 0)
1567 static struct genl_ops dp_datapath_genl_ops[] = {
1568 { .cmd = OVS_DP_CMD_NEW,
1569 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1570 .policy = datapath_policy,
1571 .doit = ovs_dp_cmd_new
1573 { .cmd = OVS_DP_CMD_DEL,
1574 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1575 .policy = datapath_policy,
1576 .doit = ovs_dp_cmd_del
1578 { .cmd = OVS_DP_CMD_GET,
1579 .flags = 0, /* OK for unprivileged users. */
1580 .policy = datapath_policy,
1581 .doit = ovs_dp_cmd_get,
1582 .dumpit = ovs_dp_cmd_dump
1584 { .cmd = OVS_DP_CMD_SET,
1585 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1586 .policy = datapath_policy,
1587 .doit = ovs_dp_cmd_set,
1591 static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
1592 #ifdef HAVE_NLA_NUL_STRING
1593 [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1594 [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
1595 [OVS_VPORT_ATTR_ADDRESS] = { .len = ETH_ALEN },
1597 [OVS_VPORT_ATTR_STATS] = { .minlen = sizeof(struct ovs_vport_stats) },
1598 [OVS_VPORT_ATTR_ADDRESS] = { .minlen = ETH_ALEN },
1600 [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
1601 [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
1602 [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 },
1603 [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
1606 static struct genl_family dp_vport_genl_family = {
1607 .id = GENL_ID_GENERATE,
1608 .hdrsize = sizeof(struct ovs_header),
1609 .name = OVS_VPORT_FAMILY,
1610 .version = OVS_VPORT_VERSION,
1611 .maxattr = OVS_VPORT_ATTR_MAX
1614 struct genl_multicast_group dp_vport_multicast_group = {
1615 .name = OVS_VPORT_MCGROUP
1618 /* Called with RTNL lock or RCU read lock. */
1619 static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
1620 u32 pid, u32 seq, u32 flags, u8 cmd)
1622 struct ovs_header *ovs_header;
1626 ovs_header = genlmsg_put(skb, pid, seq, &dp_vport_genl_family,
1631 ovs_header->dp_ifindex = get_dpifindex(vport->dp);
1633 NLA_PUT_U32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no);
1634 NLA_PUT_U32(skb, OVS_VPORT_ATTR_TYPE, vport_get_type(vport));
1635 NLA_PUT_STRING(skb, OVS_VPORT_ATTR_NAME, vport_get_name(vport));
1636 NLA_PUT_U32(skb, OVS_VPORT_ATTR_UPCALL_PID, vport->upcall_pid);
1638 nla = nla_reserve(skb, OVS_VPORT_ATTR_STATS,
1639 sizeof(struct ovs_vport_stats));
1641 goto nla_put_failure;
1643 vport_get_stats(vport, nla_data(nla));
1645 NLA_PUT(skb, OVS_VPORT_ATTR_ADDRESS, ETH_ALEN, vport_get_addr(vport));
1647 err = vport_get_options(vport, skb);
1648 if (err == -EMSGSIZE)
1651 return genlmsg_end(skb, ovs_header);
1656 genlmsg_cancel(skb, ovs_header);
1660 /* Called with RTNL lock or RCU read lock. */
1661 struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 pid,
1664 struct sk_buff *skb;
1667 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
1669 return ERR_PTR(-ENOMEM);
1671 retval = ovs_vport_cmd_fill_info(vport, skb, pid, seq, 0, cmd);
1674 return ERR_PTR(retval);
1679 static int ovs_vport_cmd_validate(struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
1681 return CHECK_NUL_STRING(a[OVS_VPORT_ATTR_NAME], IFNAMSIZ - 1);
1684 /* Called with RTNL lock or RCU read lock. */
1685 static struct vport *lookup_vport(struct ovs_header *ovs_header,
1686 struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
1688 struct datapath *dp;
1689 struct vport *vport;
1691 if (a[OVS_VPORT_ATTR_NAME]) {
1692 vport = vport_locate(nla_data(a[OVS_VPORT_ATTR_NAME]));
1694 return ERR_PTR(-ENODEV);
1696 } else if (a[OVS_VPORT_ATTR_PORT_NO]) {
1697 u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
1699 if (port_no >= DP_MAX_PORTS)
1700 return ERR_PTR(-EFBIG);
1702 dp = get_dp(ovs_header->dp_ifindex);
1704 return ERR_PTR(-ENODEV);
1706 vport = get_vport_protected(dp, port_no);
1708 return ERR_PTR(-ENOENT);
1711 return ERR_PTR(-EINVAL);
1714 /* Called with RTNL lock. */
1715 static int change_vport(struct vport *vport,
1716 struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
1720 if (a[OVS_VPORT_ATTR_STATS])
1721 vport_set_stats(vport, nla_data(a[OVS_VPORT_ATTR_STATS]));
1723 if (a[OVS_VPORT_ATTR_ADDRESS])
1724 err = vport_set_addr(vport, nla_data(a[OVS_VPORT_ATTR_ADDRESS]));
1729 static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1731 struct nlattr **a = info->attrs;
1732 struct ovs_header *ovs_header = info->userhdr;
1733 struct vport_parms parms;
1734 struct sk_buff *reply;
1735 struct vport *vport;
1736 struct datapath *dp;
1741 if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
1742 !a[OVS_VPORT_ATTR_UPCALL_PID])
1745 err = ovs_vport_cmd_validate(a);
1750 dp = get_dp(ovs_header->dp_ifindex);
1755 if (a[OVS_VPORT_ATTR_PORT_NO]) {
1756 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
1759 if (port_no >= DP_MAX_PORTS)
1762 vport = get_vport_protected(dp, port_no);
1767 for (port_no = 1; ; port_no++) {
1768 if (port_no >= DP_MAX_PORTS) {
1772 vport = get_vport_protected(dp, port_no);
1778 parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
1779 parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
1780 parms.options = a[OVS_VPORT_ATTR_OPTIONS];
1782 parms.port_no = port_no;
1783 parms.upcall_pid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
1785 vport = new_vport(&parms);
1786 err = PTR_ERR(vport);
1790 dp_sysfs_add_if(vport);
1792 err = change_vport(vport, a);
1794 reply = ovs_vport_cmd_build_info(vport, info->snd_pid,
1798 err = PTR_ERR(reply);
1801 dp_detach_port(vport);
1804 genl_notify(reply, genl_info_net(info), info->snd_pid,
1805 dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1814 static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
1816 struct nlattr **a = info->attrs;
1817 struct sk_buff *reply;
1818 struct vport *vport;
1821 err = ovs_vport_cmd_validate(a);
1826 vport = lookup_vport(info->userhdr, a);
1827 err = PTR_ERR(vport);
1832 if (a[OVS_VPORT_ATTR_TYPE] &&
1833 nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport_get_type(vport))
1836 if (!err && a[OVS_VPORT_ATTR_OPTIONS])
1837 err = vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
1839 err = change_vport(vport, a);
1840 if (!err && a[OVS_VPORT_ATTR_UPCALL_PID])
1841 vport->upcall_pid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
1843 reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
1845 if (IS_ERR(reply)) {
1846 err = PTR_ERR(reply);
1847 netlink_set_err(INIT_NET_GENL_SOCK, 0,
1848 dp_vport_multicast_group.id, err);
1852 genl_notify(reply, genl_info_net(info), info->snd_pid,
1853 dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1861 static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
1863 struct nlattr **a = info->attrs;
1864 struct sk_buff *reply;
1865 struct vport *vport;
1868 err = ovs_vport_cmd_validate(a);
1873 vport = lookup_vport(info->userhdr, a);
1874 err = PTR_ERR(vport);
1878 if (vport->port_no == OVSP_LOCAL) {
1883 reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
1885 err = PTR_ERR(reply);
1889 dp_detach_port(vport);
1891 genl_notify(reply, genl_info_net(info), info->snd_pid,
1892 dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1900 static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
1902 struct nlattr **a = info->attrs;
1903 struct ovs_header *ovs_header = info->userhdr;
1904 struct sk_buff *reply;
1905 struct vport *vport;
1908 err = ovs_vport_cmd_validate(a);
1913 vport = lookup_vport(ovs_header, a);
1914 err = PTR_ERR(vport);
1918 reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
1920 err = PTR_ERR(reply);
1926 return genlmsg_reply(reply, info);
1934 static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1936 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
1937 struct datapath *dp;
1941 dp = get_dp(ovs_header->dp_ifindex);
1946 for (port_no = cb->args[0]; port_no < DP_MAX_PORTS; port_no++) {
1947 struct vport *vport;
1949 vport = get_vport_protected(dp, port_no);
1953 if (ovs_vport_cmd_fill_info(vport, skb, NETLINK_CB(cb->skb).pid,
1954 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1955 OVS_VPORT_CMD_NEW) < 0)
1960 cb->args[0] = port_no;
1966 static struct genl_ops dp_vport_genl_ops[] = {
1967 { .cmd = OVS_VPORT_CMD_NEW,
1968 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1969 .policy = vport_policy,
1970 .doit = ovs_vport_cmd_new
1972 { .cmd = OVS_VPORT_CMD_DEL,
1973 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1974 .policy = vport_policy,
1975 .doit = ovs_vport_cmd_del
1977 { .cmd = OVS_VPORT_CMD_GET,
1978 .flags = 0, /* OK for unprivileged users. */
1979 .policy = vport_policy,
1980 .doit = ovs_vport_cmd_get,
1981 .dumpit = ovs_vport_cmd_dump
1983 { .cmd = OVS_VPORT_CMD_SET,
1984 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1985 .policy = vport_policy,
1986 .doit = ovs_vport_cmd_set,
1990 struct genl_family_and_ops {
1991 struct genl_family *family;
1992 struct genl_ops *ops;
1994 struct genl_multicast_group *group;
1997 static const struct genl_family_and_ops dp_genl_families[] = {
1998 { &dp_datapath_genl_family,
1999 dp_datapath_genl_ops, ARRAY_SIZE(dp_datapath_genl_ops),
2000 &dp_datapath_multicast_group },
2001 { &dp_vport_genl_family,
2002 dp_vport_genl_ops, ARRAY_SIZE(dp_vport_genl_ops),
2003 &dp_vport_multicast_group },
2004 { &dp_flow_genl_family,
2005 dp_flow_genl_ops, ARRAY_SIZE(dp_flow_genl_ops),
2006 &dp_flow_multicast_group },
2007 { &dp_packet_genl_family,
2008 dp_packet_genl_ops, ARRAY_SIZE(dp_packet_genl_ops),
2012 static void dp_unregister_genl(int n_families)
2016 for (i = 0; i < n_families; i++)
2017 genl_unregister_family(dp_genl_families[i].family);
2020 static int dp_register_genl(void)
2027 for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
2028 const struct genl_family_and_ops *f = &dp_genl_families[i];
2030 err = genl_register_family_with_ops(f->family, f->ops,
2037 err = genl_register_mc_group(f->family, f->group);
2046 dp_unregister_genl(n_registered);
2050 static int __init dp_init(void)
2052 struct sk_buff *dummy_skb;
2055 BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > sizeof(dummy_skb->cb));
2057 pr_info("Open vSwitch %s, built "__DATE__" "__TIME__"\n",
2066 goto error_tnl_exit;
2070 goto error_flow_exit;
2072 err = register_netdevice_notifier(&dp_device_notifier);
2074 goto error_vport_exit;
2076 err = dp_register_genl();
2078 goto error_unreg_notifier;
2082 error_unreg_notifier:
2083 unregister_netdevice_notifier(&dp_device_notifier);
2094 static void dp_cleanup(void)
2097 dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
2098 unregister_netdevice_notifier(&dp_device_notifier);
2104 module_init(dp_init);
2105 module_exit(dp_cleanup);
2107 MODULE_DESCRIPTION("Open vSwitch switching datapath");
2108 MODULE_LICENSE("GPL");