X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=datapath%2Fdatapath.c;h=a6bf70ca9b99e86ede19ebe59dfb2c6aa8a3ff22;hb=14002a5984e97f0f2482acdbb445c45266e2c6a1;hp=cf901d2584b5bc7814757aa161a594fd6fdd8d7b;hpb=d5de5b0d7ec2c3bf0eb1243145bd31efbf3078fd;p=sliver-openvswitch.git diff --git a/datapath/datapath.c b/datapath/datapath.c index cf901d258..a6bf70ca9 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2012 Nicira, Inc. + * Copyright (c) 2007-2013 Nicira, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@ -63,8 +63,8 @@ #include "vport-netdev.h" #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) || \ - LINUX_VERSION_CODE >= KERNEL_VERSION(3,9,0) -#error Kernels before 2.6.18 or after 3.8 are not supported by this version of Open vSwitch. + LINUX_VERSION_CODE >= KERNEL_VERSION(3,11,0) +#error Kernels before 2.6.18 or after 3.9 are not supported by this version of Open vSwitch. #endif #define REHASH_FLOW_INTERVAL (10 * 60 * HZ) @@ -173,7 +173,7 @@ static void destroy_dp_rcu(struct rcu_head *rcu) { struct datapath *dp = container_of(rcu, struct datapath, rcu); - ovs_flow_tbl_destroy((__force struct flow_table *)dp->table); + ovs_flow_tbl_destroy((__force struct flow_table *)dp->table, false); free_percpu(dp->stats_percpu); release_net(ovs_dp_get_net(dp)); kfree(dp->ports); @@ -234,19 +234,18 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb) struct sw_flow_key key; u64 *stats_counter; int error; - int key_len; stats = this_cpu_ptr(dp->stats_percpu); /* Extract flow from 'skb' into 'key'. */ - error = ovs_flow_extract(skb, p->port_no, &key, &key_len); + error = ovs_flow_extract(skb, p->port_no, &key); if (unlikely(error)) { kfree_skb(skb); return; } /* Look up flow. */ - flow = ovs_flow_tbl_lookup(rcu_dereference(dp->table), &key, key_len); + flow = ovs_flow_lookup(rcu_dereference(dp->table), &key); if (unlikely(!flow)) { struct dp_upcall_info upcall; @@ -261,6 +260,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb) } OVS_CB(skb)->flow = flow; + OVS_CB(skb)->pkt_key = &key; stats_counter = &stats->n_hit; ovs_flow_used(OVS_CB(skb)->flow, skb); @@ -280,6 +280,7 @@ static struct genl_family dp_packet_genl_family = { .version = OVS_PACKET_VERSION, .maxattr = OVS_PACKET_ATTR_MAX, SET_NETNSOK + SET_PARALLEL_OPS }; int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb, @@ -443,7 +444,7 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex, upcall->dp_ifindex = dp_ifindex; nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY); - ovs_flow_to_nlattrs(upcall_info->key, user_skb); + ovs_flow_to_nlattrs(upcall_info->key, upcall_info->key, user_skb); nla_nest_end(user_skb, nla); if (upcall_info->userdata) @@ -476,7 +477,7 @@ static int flush_flows(struct datapath *dp) rcu_assign_pointer(dp->table, new_table); - ovs_flow_tbl_deferred_destroy(old_table); + ovs_flow_tbl_destroy(old_table, true); return 0; } @@ -619,10 +620,12 @@ static int validate_tp_port(const struct sw_flow_key *flow_key) static int validate_and_copy_set_tun(const struct nlattr *attr, struct sw_flow_actions **sfa) { - struct ovs_key_ipv4_tunnel tun_key; + struct sw_flow_match match; + struct sw_flow_key key; int err, start; - err = ipv4_tun_from_nlattr(nla_data(attr), &tun_key); + ovs_match_init(&match, &key, NULL); + err = ovs_ipv4_tun_from_nlattr(nla_data(attr), &match, false); if (err) return err; @@ -630,7 +633,8 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, if (start < 0) return start; - err = add_action(sfa, OVS_KEY_ATTR_IPV4_TUNNEL, &tun_key, sizeof(tun_key)); + err = add_action(sfa, OVS_KEY_ATTR_IPV4_TUNNEL, &match.key->tun_key, + sizeof(match.key->tun_key)); add_nested_action_end(*sfa, start); return err; @@ -871,7 +875,6 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) struct ethhdr *eth; int len; int err; - int key_len; err = -EINVAL; if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] || @@ -904,11 +907,11 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(flow)) goto err_kfree_skb; - err = ovs_flow_extract(packet, -1, &flow->key, &key_len); + err = ovs_flow_extract(packet, -1, &flow->key); if (err) goto err_flow_free; - err = ovs_flow_metadata_from_nlattrs(flow, key_len, a[OVS_PACKET_ATTR_KEY]); + err = ovs_flow_metadata_from_nlattrs(flow, a[OVS_PACKET_ATTR_KEY]); if (err) goto err_flow_free; acts = ovs_flow_actions_alloc(nla_len(a[OVS_PACKET_ATTR_ACTIONS])); @@ -922,6 +925,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) goto err_flow_free; OVS_CB(packet)->flow = flow; + OVS_CB(packet)->pkt_key = &flow->key; packet->priority = flow->key.phy.priority; skb_set_mark(packet, flow->key.phy.skb_mark); @@ -936,13 +940,13 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) local_bh_enable(); rcu_read_unlock(); - ovs_flow_free(flow); + ovs_flow_free(flow, false); return err; err_unlock: rcu_read_unlock(); err_flow_free: - ovs_flow_free(flow); + ovs_flow_free(flow, false); err_kfree_skb: kfree_skb(packet); err: @@ -969,9 +973,10 @@ static struct genl_ops dp_packet_genl_ops[] = { static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats) { + struct flow_table *table; int i; - struct flow_table *table = ovsl_dereference(dp->table); + table = rcu_dereference_check(dp->table, lockdep_ovsl_is_held()); stats->n_flows = ovs_flow_tbl_count(table); stats->n_hit = stats->n_missed = stats->n_lost = 0; @@ -1006,6 +1011,7 @@ static struct genl_family dp_flow_genl_family = { .version = OVS_FLOW_VERSION, .maxattr = OVS_FLOW_ATTR_MAX, SET_NETNSOK + SET_PARALLEL_OPS }; static struct genl_multicast_group ovs_dp_flow_multicast_group = { @@ -1061,7 +1067,8 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb) if (!start) return -EMSGSIZE; - err = ipv4_tun_to_nlattr(skb, nla_data(ovs_key)); + err = ovs_ipv4_tun_to_nlattr(skb, nla_data(ovs_key), + nla_data(ovs_key)); if (err) return err; nla_nest_end(skb, start); @@ -1109,6 +1116,7 @@ static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts) { return NLMSG_ALIGN(sizeof(struct ovs_header)) + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_KEY */ + + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_MASK */ + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */ + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */ + nla_total_size(8) /* OVS_FLOW_ATTR_USED */ @@ -1121,7 +1129,6 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, u32 seq, u32 flags, u8 cmd) { const int skb_orig_len = skb->len; - const struct sw_flow_actions *sf_acts; struct nlattr *start; struct ovs_flow_stats stats; struct ovs_header *ovs_header; @@ -1130,20 +1137,31 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, u8 tcp_flags; int err; - sf_acts = ovsl_dereference(flow->sf_acts); - ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd); if (!ovs_header) return -EMSGSIZE; ovs_header->dp_ifindex = get_dpifindex(dp); + /* Fill flow key. */ nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY); if (!nla) goto nla_put_failure; - err = ovs_flow_to_nlattrs(&flow->key, skb); + + err = ovs_flow_to_nlattrs(&flow->unmasked_key, + &flow->unmasked_key, skb); + if (err) + goto error; + nla_nest_end(skb, nla); + + nla = nla_nest_start(skb, OVS_FLOW_ATTR_MASK); + if (!nla) + goto nla_put_failure; + + err = ovs_flow_to_nlattrs(&flow->key, &flow->mask->key, skb); if (err) goto error; + nla_nest_end(skb, nla); spin_lock_bh(&flow->lock); @@ -1178,6 +1196,11 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, */ start = nla_nest_start(skb, OVS_FLOW_ATTR_ACTIONS); if (start) { + const struct sw_flow_actions *sf_acts; + + sf_acts = rcu_dereference_check(flow->sf_acts, + lockdep_ovsl_is_held()); + err = actions_to_attr(sf_acts->actions, sf_acts->actions_len, skb); if (!err) nla_nest_end(skb, start); @@ -1228,20 +1251,24 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) { struct nlattr **a = info->attrs; struct ovs_header *ovs_header = info->userhdr; - struct sw_flow_key key; - struct sw_flow *flow; + struct sw_flow_key key, masked_key; + struct sw_flow *flow = NULL; + struct sw_flow_mask mask; struct sk_buff *reply; struct datapath *dp; struct flow_table *table; struct sw_flow_actions *acts = NULL; + struct sw_flow_match match; int error; - int key_len; /* Extract key. */ error = -EINVAL; if (!a[OVS_FLOW_ATTR_KEY]) goto error; - error = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]); + + ovs_match_init(&match, &key, &mask); + error = ovs_match_from_nlattrs(&match, + a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]); if (error) goto error; @@ -1252,9 +1279,13 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(acts)) goto error; - error = validate_and_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, 0, &acts); - if (error) + ovs_flow_key_mask(&masked_key, &key, &mask); + error = validate_and_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], + &masked_key, 0, &acts); + if (error) { + OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); goto err_kfree; + } } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) { error = -EINVAL; goto error; @@ -1267,8 +1298,11 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) goto err_unlock_ovs; table = ovsl_dereference(dp->table); - flow = ovs_flow_tbl_lookup(table, &key, key_len); + + /* Check if this is a duplicate flow */ + flow = ovs_flow_lookup(table, &key); if (!flow) { + struct sw_flow_mask *mask_p; /* Bail out if we're not allowed to create a new flow. */ error = -ENOENT; if (info->genlhdr->cmd == OVS_FLOW_CMD_SET) @@ -1281,7 +1315,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) new_table = ovs_flow_tbl_expand(table); if (!IS_ERR(new_table)) { rcu_assign_pointer(dp->table, new_table); - ovs_flow_tbl_deferred_destroy(table); + ovs_flow_tbl_destroy(table, true); table = ovsl_dereference(dp->table); } } @@ -1294,14 +1328,30 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) } clear_stats(flow); + flow->key = masked_key; + flow->unmasked_key = key; + + /* Make sure mask is unique in the system */ + mask_p = ovs_sw_flow_mask_find(table, &mask); + if (!mask_p) { + /* Allocate a new mask if none exsits. */ + mask_p = ovs_sw_flow_mask_alloc(); + if (!mask_p) + goto err_flow_free; + mask_p->key = mask.key; + mask_p->range = mask.range; + ovs_sw_flow_mask_insert(table, mask_p); + } + + ovs_sw_flow_mask_add_ref(mask_p); + flow->mask = mask_p; rcu_assign_pointer(flow->sf_acts, acts); /* Put flow in bucket. */ - ovs_flow_tbl_insert(table, flow, &key, key_len); + ovs_flow_insert(table, flow); reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, - info->snd_seq, - OVS_FLOW_CMD_NEW); + info->snd_seq, OVS_FLOW_CMD_NEW); } else { /* We found a matching flow. */ struct sw_flow_actions *old_acts; @@ -1317,6 +1367,13 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) goto err_unlock_ovs; + /* The unmasked key has to be the same for flow updates. */ + error = -EINVAL; + if (!ovs_flow_cmp_unmasked_key(flow, &key, match.range.end)) { + OVS_NLERR("Flow modification message rejected, unmasked key does not match.\n"); + goto err_unlock_ovs; + } + /* Update actions. */ old_acts = ovsl_dereference(flow->sf_acts); rcu_assign_pointer(flow->sf_acts, acts); @@ -1341,6 +1398,8 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) ovs_dp_flow_multicast_group.id, PTR_ERR(reply)); return 0; +err_flow_free: + ovs_flow_free(flow, false); err_unlock_ovs: ovs_unlock(); err_kfree: @@ -1358,12 +1417,16 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) struct sw_flow *flow; struct datapath *dp; struct flow_table *table; + struct sw_flow_match match; int err; - int key_len; - if (!a[OVS_FLOW_ATTR_KEY]) + if (!a[OVS_FLOW_ATTR_KEY]) { + OVS_NLERR("Flow get message rejected, Key attribute missing.\n"); return -EINVAL; - err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]); + } + + ovs_match_init(&match, &key, NULL); + err = ovs_match_from_nlattrs(&match, a[OVS_FLOW_ATTR_KEY], NULL); if (err) return err; @@ -1375,7 +1438,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) } table = ovsl_dereference(dp->table); - flow = ovs_flow_tbl_lookup(table, &key, key_len); + flow = ovs_flow_lookup_unmasked_key(table, &match); if (!flow) { err = -ENOENT; goto unlock; @@ -1404,8 +1467,8 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) struct sw_flow *flow; struct datapath *dp; struct flow_table *table; + struct sw_flow_match match; int err; - int key_len; ovs_lock(); dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); @@ -1418,12 +1481,14 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) err = flush_flows(dp); goto unlock; } - err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]); + + ovs_match_init(&match, &key, NULL); + err = ovs_match_from_nlattrs(&match, a[OVS_FLOW_ATTR_KEY], NULL); if (err) goto unlock; table = ovsl_dereference(dp->table); - flow = ovs_flow_tbl_lookup(table, &key, key_len); + flow = ovs_flow_lookup_unmasked_key(table, &match); if (!flow) { err = -ENOENT; goto unlock; @@ -1435,13 +1500,13 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) goto unlock; } - ovs_flow_tbl_remove(table, flow); + ovs_flow_remove(table, flow); err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid, info->snd_seq, 0, OVS_FLOW_CMD_DEL); BUG_ON(err < 0); - ovs_flow_deferred_free(flow); + ovs_flow_free(flow, true); ovs_unlock(); ovs_notify(reply, info, &ovs_dp_flow_multicast_group); @@ -1457,22 +1522,21 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) struct datapath *dp; struct flow_table *table; - ovs_lock(); + rcu_read_lock(); dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); if (!dp) { - ovs_unlock(); + rcu_read_unlock(); return -ENODEV; } - table = ovsl_dereference(dp->table); - + table = rcu_dereference(dp->table); for (;;) { struct sw_flow *flow; u32 bucket, obj; bucket = cb->args[0]; obj = cb->args[1]; - flow = ovs_flow_tbl_next(table, &bucket, &obj); + flow = ovs_flow_dump_next(table, &bucket, &obj); if (!flow) break; @@ -1485,7 +1549,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) cb->args[0] = bucket; cb->args[1] = obj; } - ovs_unlock(); + rcu_read_unlock(); return skb->len; } @@ -1527,6 +1591,7 @@ static struct genl_family dp_datapath_genl_family = { .version = OVS_DATAPATH_VERSION, .maxattr = OVS_DP_ATTR_MAX, SET_NETNSOK + SET_PARALLEL_OPS }; static struct genl_multicast_group ovs_dp_datapath_multicast_group = { @@ -1691,7 +1756,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) goto err_destroy_local_port; ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id); - list_add_tail(&dp->list_node, &ovs_net->dps); + list_add_tail_rcu(&dp->list_node, &ovs_net->dps); ovs_unlock(); @@ -1705,7 +1770,7 @@ err_destroy_ports_array: err_destroy_percpu: free_percpu(dp->stats_percpu); err_destroy_table: - ovs_flow_tbl_destroy(ovsl_dereference(dp->table)); + ovs_flow_tbl_destroy(ovsl_dereference(dp->table), false); err_free_dp: release_net(ovs_dp_get_net(dp)); kfree(dp); @@ -1729,7 +1794,7 @@ static void __dp_destroy(struct datapath *dp) ovs_dp_detach_port(vport); } - list_del(&dp->list_node); + list_del_rcu(&dp->list_node); /* OVSP_LOCAL is datapath internal port. We need to make sure that * all port in datapath are destroyed first before freeing datapath. @@ -1846,8 +1911,8 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) int skip = cb->args[0]; int i = 0; - ovs_lock(); - list_for_each_entry(dp, &ovs_net->dps, list_node) { + rcu_read_lock(); + list_for_each_entry_rcu(dp, &ovs_net->dps, list_node) { if (i >= skip && ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, @@ -1855,7 +1920,7 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) break; i++; } - ovs_unlock(); + rcu_read_unlock(); cb->args[0] = i; @@ -1906,6 +1971,7 @@ static struct genl_family dp_vport_genl_family = { .version = OVS_VPORT_VERSION, .maxattr = OVS_VPORT_ATTR_MAX, SET_NETNSOK + SET_PARALLEL_OPS }; struct genl_multicast_group ovs_dp_vport_multicast_group = { @@ -2105,10 +2171,11 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(vport)) goto exit_unlock; - err = 0; if (a[OVS_VPORT_ATTR_TYPE] && - nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) + nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) { err = -EINVAL; + goto exit_unlock; + } reply = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!reply) { @@ -2116,10 +2183,11 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) goto exit_unlock; } - if (!err && a[OVS_VPORT_ATTR_OPTIONS]) + if (a[OVS_VPORT_ATTR_OPTIONS]) { err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]); - if (err) - goto exit_free; + if (err) + goto exit_free; + } if (a[OVS_VPORT_ATTR_STATS]) ovs_vport_set_stats(vport, nla_data(a[OVS_VPORT_ATTR_STATS])); @@ -2355,7 +2423,7 @@ static void rehash_flow_table(struct work_struct *work) new_table = ovs_flow_tbl_rehash(old_table); if (!IS_ERR(new_table)) { rcu_assign_pointer(dp->table, new_table); - ovs_flow_tbl_deferred_destroy(old_table); + ovs_flow_tbl_destroy(old_table, true); } } } @@ -2393,6 +2461,8 @@ static struct pernet_operations ovs_net_ops = { .size = sizeof(struct ovs_net), }; +DEFINE_COMPAT_PNET_REG_FUNC(device); + static int __init dp_init(void) { int err;