From: Giuseppe Lettieri Date: Mon, 7 Oct 2013 10:34:23 +0000 (+0200) Subject: Merge branch 'mainstream' X-Git-Tag: sliver-openvswitch-2.0.90-1~11 X-Git-Url: http://git.onelab.eu/?a=commitdiff_plain;h=b2f2acd543f159ba984a00059892917933612a10;hp=86c1d8dcc38489c9b04ec242a14f6ec64c81fb24;p=sliver-openvswitch.git Merge branch 'mainstream' --- diff --git a/AUTHORS b/AUTHORS index 63c1ef872..78923284b 100644 --- a/AUTHORS +++ b/AUTHORS @@ -33,6 +33,7 @@ Duffie Cooley dcooley@nicira.com Ed Maste emaste at freebsd.org Edward Tomasz Napierała trasz@freebsd.org Ethan Jackson ethan@nicira.com +Flavio Leitner fbl@redhat.com FUJITA Tomonori fujita.tomonori@lab.ntt.co.jp Gaetano Catalli gaetano.catalli@gmail.com Giuseppe Lettieri g.lettieri@iet.unipi.it diff --git a/FAQ b/FAQ index 5744d5abf..d36495c8a 100644 --- a/FAQ +++ b/FAQ @@ -1299,6 +1299,39 @@ A: Yes, OpenFlow requires a switch to ignore attempts to send a packet 2,3,4,5,6,\ pop:NXM_OF_IN_PORT[] +Q: My bridge br0 has host 192.168.0.1 on port 1 and host 192.168.0.2 + on port 2. I set up flows to forward only traffic destined to the + other host and drop other traffic, like this: + + priority=5,in_port=1,ip,nw_dst=192.168.0.2,actions=2 + priority=5,in_port=2,ip,nw_dst=192.168.0.1,actions=1 + priority=0,actions=drop + + But it doesn't work--I don't get any connectivity when I do this. + Why? + +A: These flows drop the ARP packets that IP hosts use to establish IP + connectivity over Ethernet. To solve the problem, add flows to + allow ARP to pass between the hosts: + + priority=5,in_port=1,arp,actions=2 + priority=5,in_port=2,arp,actions=1 + + This issue can manifest other ways, too. The following flows that + match on Ethernet addresses instead of IP addresses will also drop + ARP packets, because ARP requests are broadcast instead of being + directed to a specific host: + + priority=5,in_port=1,dl_dst=54:00:00:00:00:02,actions=2 + priority=5,in_port=2,dl_dst=54:00:00:00:00:01,actions=1 + priority=0,actions=drop + + The solution already described above will also work in this case. + It may be better to add flows to allow all multicast and broadcast + traffic: + + priority=5,in_port=1,dl_dst=01:00:00:00:00:00/01:00:00:00:00:00,actions=2 + priority=5,in_port=2,dl_dst=01:00:00:00:00:00/01:00:00:00:00:00,actions=1 Contact ------- diff --git a/NEWS b/NEWS index eae1146dc..94e0da9c1 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,9 @@ Post-v2.0.0 --------------------- + - The default OpenFlow and OVSDB ports will change to + IANA-assigned numbers in a future release. Consider updating + your installations to specify port numbers instead of using the + defaults. v2.0.0 - xx xxx xxxx diff --git a/datapath/Modules.mk b/datapath/Modules.mk index 7ddf79c1f..b652411a4 100644 --- a/datapath/Modules.mk +++ b/datapath/Modules.mk @@ -11,6 +11,8 @@ openvswitch_sources = \ datapath.c \ dp_notify.c \ flow.c \ + flow_netlink.c \ + flow_table.c \ vport.c \ vport-gre.c \ vport-internal_dev.c \ @@ -22,6 +24,8 @@ openvswitch_headers = \ compat.h \ datapath.h \ flow.h \ + flow_netlink.h \ + flow_table.h \ vlan.h \ vport.h \ vport-internal_dev.h \ diff --git a/datapath/datapath.c b/datapath/datapath.c index 4defcdb7d..9e6df12bc 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -56,12 +56,11 @@ #include "datapath.h" #include "flow.h" +#include "flow_netlink.h" #include "vlan.h" #include "vport-internal_dev.h" #include "vport-netdev.h" -#define REHASH_FLOW_INTERVAL (10 * 60 * HZ) - int ovs_net_id __read_mostly; static void ovs_notify(struct sk_buff *skb, struct genl_info *info, @@ -164,7 +163,7 @@ static void destroy_dp_rcu(struct rcu_head *rcu) { struct datapath *dp = container_of(rcu, struct datapath, rcu); - ovs_flow_tbl_destroy((__force struct flow_table *)dp->table, false); + ovs_flow_tbl_destroy(&dp->table); free_percpu(dp->stats_percpu); release_net(ovs_dp_get_net(dp)); kfree(dp->ports); @@ -236,7 +235,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb) } /* Look up flow. */ - flow = ovs_flow_lookup(rcu_dereference(dp->table), &key); + flow = ovs_flow_tbl_lookup(&dp->table, &key); if (unlikely(!flow)) { struct dp_upcall_info upcall; @@ -435,7 +434,7 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex, upcall->dp_ifindex = dp_ifindex; nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY); - ovs_flow_to_nlattrs(upcall_info->key, upcall_info->key, user_skb); + ovs_nla_put_flow(upcall_info->key, upcall_info->key, user_skb); nla_nest_end(user_skb, nla); if (upcall_info->userdata) @@ -455,398 +454,6 @@ out: return err; } -/* Called with ovs_mutex. */ -static int flush_flows(struct datapath *dp) -{ - struct flow_table *old_table; - struct flow_table *new_table; - - old_table = ovsl_dereference(dp->table); - new_table = ovs_flow_tbl_alloc(TBL_MIN_BUCKETS); - if (!new_table) - return -ENOMEM; - - rcu_assign_pointer(dp->table, new_table); - - ovs_flow_tbl_destroy(old_table, true); - return 0; -} - -static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, int attr_len) -{ - - struct sw_flow_actions *acts; - int new_acts_size; - int req_size = NLA_ALIGN(attr_len); - int next_offset = offsetof(struct sw_flow_actions, actions) + - (*sfa)->actions_len; - - if (req_size <= (ksize(*sfa) - next_offset)) - goto out; - - new_acts_size = ksize(*sfa) * 2; - - if (new_acts_size > MAX_ACTIONS_BUFSIZE) { - if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) - return ERR_PTR(-EMSGSIZE); - new_acts_size = MAX_ACTIONS_BUFSIZE; - } - - acts = ovs_flow_actions_alloc(new_acts_size); - if (IS_ERR(acts)) - return (void *)acts; - - memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len); - acts->actions_len = (*sfa)->actions_len; - kfree(*sfa); - *sfa = acts; - -out: - (*sfa)->actions_len += req_size; - return (struct nlattr *) ((unsigned char *)(*sfa) + next_offset); -} - -static int add_action(struct sw_flow_actions **sfa, int attrtype, void *data, int len) -{ - struct nlattr *a; - - a = reserve_sfa_size(sfa, nla_attr_size(len)); - if (IS_ERR(a)) - return PTR_ERR(a); - - a->nla_type = attrtype; - a->nla_len = nla_attr_size(len); - - if (data) - memcpy(nla_data(a), data, len); - memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len)); - - return 0; -} - -static inline int add_nested_action_start(struct sw_flow_actions **sfa, int attrtype) -{ - int used = (*sfa)->actions_len; - int err; - - err = add_action(sfa, attrtype, NULL, 0); - if (err) - return err; - - return used; -} - -static inline void add_nested_action_end(struct sw_flow_actions *sfa, int st_offset) -{ - struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions + st_offset); - - a->nla_len = sfa->actions_len - st_offset; -} - -static int validate_and_copy_actions(const struct nlattr *attr, - const struct sw_flow_key *key, int depth, - struct sw_flow_actions **sfa); - -static int validate_and_copy_sample(const struct nlattr *attr, - const struct sw_flow_key *key, int depth, - struct sw_flow_actions **sfa) -{ - const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; - const struct nlattr *probability, *actions; - const struct nlattr *a; - int rem, start, err, st_acts; - - memset(attrs, 0, sizeof(attrs)); - nla_for_each_nested(a, attr, rem) { - int type = nla_type(a); - if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type]) - return -EINVAL; - attrs[type] = a; - } - if (rem) - return -EINVAL; - - probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY]; - if (!probability || nla_len(probability) != sizeof(u32)) - return -EINVAL; - - actions = attrs[OVS_SAMPLE_ATTR_ACTIONS]; - if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN)) - return -EINVAL; - - /* validation done, copy sample action. */ - start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE); - if (start < 0) - return start; - err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY, nla_data(probability), sizeof(u32)); - if (err) - return err; - st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS); - if (st_acts < 0) - return st_acts; - - err = validate_and_copy_actions(actions, key, depth + 1, sfa); - if (err) - return err; - - add_nested_action_end(*sfa, st_acts); - add_nested_action_end(*sfa, start); - - return 0; -} - -static int validate_tp_port(const struct sw_flow_key *flow_key) -{ - if (flow_key->eth.type == htons(ETH_P_IP)) { - if (flow_key->ipv4.tp.src || flow_key->ipv4.tp.dst) - return 0; - } else if (flow_key->eth.type == htons(ETH_P_IPV6)) { - if (flow_key->ipv6.tp.src || flow_key->ipv6.tp.dst) - return 0; - } - - return -EINVAL; -} - -static int validate_and_copy_set_tun(const struct nlattr *attr, - struct sw_flow_actions **sfa) -{ - struct sw_flow_match match; - struct sw_flow_key key; - int err, start; - - ovs_match_init(&match, &key, NULL); - err = ovs_ipv4_tun_from_nlattr(nla_data(attr), &match, false); - if (err) - return err; - - start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET); - if (start < 0) - return start; - - err = add_action(sfa, OVS_KEY_ATTR_IPV4_TUNNEL, &match.key->tun_key, - sizeof(match.key->tun_key)); - add_nested_action_end(*sfa, start); - - return err; -} - -static int validate_set(const struct nlattr *a, - const struct sw_flow_key *flow_key, - struct sw_flow_actions **sfa, - bool *set_tun) -{ - const struct nlattr *ovs_key = nla_data(a); - int key_type = nla_type(ovs_key); - - /* There can be only one key in a action */ - if (nla_total_size(nla_len(ovs_key)) != nla_len(a)) - return -EINVAL; - - if (key_type > OVS_KEY_ATTR_MAX || - (ovs_key_lens[key_type] != nla_len(ovs_key) && - ovs_key_lens[key_type] != -1)) - return -EINVAL; - - switch (key_type) { - const struct ovs_key_ipv4 *ipv4_key; - const struct ovs_key_ipv6 *ipv6_key; - int err; - - case OVS_KEY_ATTR_PRIORITY: - case OVS_KEY_ATTR_SKB_MARK: - case OVS_KEY_ATTR_ETHERNET: - break; - - case OVS_KEY_ATTR_TUNNEL: - *set_tun = true; - err = validate_and_copy_set_tun(a, sfa); - if (err) - return err; - break; - - case OVS_KEY_ATTR_IPV4: - if (flow_key->eth.type != htons(ETH_P_IP)) - return -EINVAL; - - if (!flow_key->ip.proto) - return -EINVAL; - - ipv4_key = nla_data(ovs_key); - if (ipv4_key->ipv4_proto != flow_key->ip.proto) - return -EINVAL; - - if (ipv4_key->ipv4_frag != flow_key->ip.frag) - return -EINVAL; - - break; - - case OVS_KEY_ATTR_IPV6: - if (flow_key->eth.type != htons(ETH_P_IPV6)) - return -EINVAL; - - if (!flow_key->ip.proto) - return -EINVAL; - - ipv6_key = nla_data(ovs_key); - if (ipv6_key->ipv6_proto != flow_key->ip.proto) - return -EINVAL; - - if (ipv6_key->ipv6_frag != flow_key->ip.frag) - return -EINVAL; - - if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000) - return -EINVAL; - - break; - - case OVS_KEY_ATTR_TCP: - if (flow_key->ip.proto != IPPROTO_TCP) - return -EINVAL; - - return validate_tp_port(flow_key); - - case OVS_KEY_ATTR_UDP: - if (flow_key->ip.proto != IPPROTO_UDP) - return -EINVAL; - - return validate_tp_port(flow_key); - - case OVS_KEY_ATTR_SCTP: - if (flow_key->ip.proto != IPPROTO_SCTP) - return -EINVAL; - - return validate_tp_port(flow_key); - - default: - return -EINVAL; - } - - return 0; -} - -static int validate_userspace(const struct nlattr *attr) -{ - static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = { - [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 }, - [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC }, - }; - struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1]; - int error; - - error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX, - attr, userspace_policy); - if (error) - return error; - - if (!a[OVS_USERSPACE_ATTR_PID] || - !nla_get_u32(a[OVS_USERSPACE_ATTR_PID])) - return -EINVAL; - - return 0; -} - -static int copy_action(const struct nlattr *from, - struct sw_flow_actions **sfa) -{ - int totlen = NLA_ALIGN(from->nla_len); - struct nlattr *to; - - to = reserve_sfa_size(sfa, from->nla_len); - if (IS_ERR(to)) - return PTR_ERR(to); - - memcpy(to, from, totlen); - return 0; -} - -static int validate_and_copy_actions(const struct nlattr *attr, - const struct sw_flow_key *key, - int depth, - struct sw_flow_actions **sfa) -{ - const struct nlattr *a; - int rem, err; - - if (depth >= SAMPLE_ACTION_DEPTH) - return -EOVERFLOW; - - nla_for_each_nested(a, attr, rem) { - /* Expected argument lengths, (u32)-1 for variable length. */ - static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = { - [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32), - [OVS_ACTION_ATTR_USERSPACE] = (u32)-1, - [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan), - [OVS_ACTION_ATTR_POP_VLAN] = 0, - [OVS_ACTION_ATTR_SET] = (u32)-1, - [OVS_ACTION_ATTR_SAMPLE] = (u32)-1 - }; - const struct ovs_action_push_vlan *vlan; - int type = nla_type(a); - bool skip_copy; - - if (type > OVS_ACTION_ATTR_MAX || - (action_lens[type] != nla_len(a) && - action_lens[type] != (u32)-1)) - return -EINVAL; - - skip_copy = false; - switch (type) { - case OVS_ACTION_ATTR_UNSPEC: - return -EINVAL; - - case OVS_ACTION_ATTR_USERSPACE: - err = validate_userspace(a); - if (err) - return err; - break; - - case OVS_ACTION_ATTR_OUTPUT: - if (nla_get_u32(a) >= DP_MAX_PORTS) - return -EINVAL; - break; - - - case OVS_ACTION_ATTR_POP_VLAN: - break; - - case OVS_ACTION_ATTR_PUSH_VLAN: - vlan = nla_data(a); - if (vlan->vlan_tpid != htons(ETH_P_8021Q)) - return -EINVAL; - if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT))) - return -EINVAL; - break; - - case OVS_ACTION_ATTR_SET: - err = validate_set(a, key, sfa, &skip_copy); - if (err) - return err; - break; - - case OVS_ACTION_ATTR_SAMPLE: - err = validate_and_copy_sample(a, key, depth, sfa); - if (err) - return err; - skip_copy = true; - break; - - default: - return -EINVAL; - } - if (!skip_copy) { - err = copy_action(a, sfa); - if (err) - return err; - } - } - - if (rem > 0) - return -EINVAL; - - return 0; -} - static void clear_stats(struct sw_flow *flow) { flow->used = 0; @@ -902,15 +509,16 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) if (err) goto err_flow_free; - err = ovs_flow_metadata_from_nlattrs(flow, a[OVS_PACKET_ATTR_KEY]); + err = ovs_nla_get_flow_metadata(flow, a[OVS_PACKET_ATTR_KEY]); if (err) goto err_flow_free; - acts = ovs_flow_actions_alloc(nla_len(a[OVS_PACKET_ATTR_ACTIONS])); + acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_PACKET_ATTR_ACTIONS])); err = PTR_ERR(acts); if (IS_ERR(acts)) goto err_flow_free; - err = validate_and_copy_actions(a[OVS_PACKET_ATTR_ACTIONS], &flow->key, 0, &acts); + err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS], + &flow->key, 0, &acts); rcu_assign_pointer(flow->sf_acts, acts); if (err) goto err_flow_free; @@ -960,11 +568,9 @@ static struct genl_ops dp_packet_genl_ops[] = { static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats) { - struct flow_table *table; int i; - table = rcu_dereference_check(dp->table, lockdep_ovsl_is_held()); - stats->n_flows = ovs_flow_tbl_count(table); + stats->n_flows = ovs_flow_tbl_count(&dp->table); stats->n_hit = stats->n_missed = stats->n_lost = 0; for_each_possible_cpu(i) { @@ -1005,100 +611,6 @@ static struct genl_multicast_group ovs_dp_flow_multicast_group = { .name = OVS_FLOW_MCGROUP }; -static int actions_to_attr(const struct nlattr *attr, int len, struct sk_buff *skb); -static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb) -{ - const struct nlattr *a; - struct nlattr *start; - int err = 0, rem; - - start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE); - if (!start) - return -EMSGSIZE; - - nla_for_each_nested(a, attr, rem) { - int type = nla_type(a); - struct nlattr *st_sample; - - switch (type) { - case OVS_SAMPLE_ATTR_PROBABILITY: - if (nla_put(skb, OVS_SAMPLE_ATTR_PROBABILITY, sizeof(u32), nla_data(a))) - return -EMSGSIZE; - break; - case OVS_SAMPLE_ATTR_ACTIONS: - st_sample = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS); - if (!st_sample) - return -EMSGSIZE; - err = actions_to_attr(nla_data(a), nla_len(a), skb); - if (err) - return err; - nla_nest_end(skb, st_sample); - break; - } - } - - nla_nest_end(skb, start); - return err; -} - -static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb) -{ - const struct nlattr *ovs_key = nla_data(a); - int key_type = nla_type(ovs_key); - struct nlattr *start; - int err; - - switch (key_type) { - case OVS_KEY_ATTR_IPV4_TUNNEL: - start = nla_nest_start(skb, OVS_ACTION_ATTR_SET); - if (!start) - return -EMSGSIZE; - - err = ovs_ipv4_tun_to_nlattr(skb, nla_data(ovs_key), - nla_data(ovs_key)); - if (err) - return err; - nla_nest_end(skb, start); - break; - default: - if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key)) - return -EMSGSIZE; - break; - } - - return 0; -} - -static int actions_to_attr(const struct nlattr *attr, int len, struct sk_buff *skb) -{ - const struct nlattr *a; - int rem, err; - - nla_for_each_attr(a, attr, len, rem) { - int type = nla_type(a); - - switch (type) { - case OVS_ACTION_ATTR_SET: - err = set_action_to_attr(a, skb); - if (err) - return err; - break; - - case OVS_ACTION_ATTR_SAMPLE: - err = sample_action_to_attr(a, skb); - if (err) - return err; - break; - default: - if (nla_put(skb, type, nla_len(a), nla_data(a))) - return -EMSGSIZE; - break; - } - } - - return 0; -} - static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts) { return NLMSG_ALIGN(sizeof(struct ovs_header)) @@ -1135,8 +647,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, if (!nla) goto nla_put_failure; - err = ovs_flow_to_nlattrs(&flow->unmasked_key, - &flow->unmasked_key, skb); + err = ovs_nla_put_flow(&flow->unmasked_key, &flow->unmasked_key, skb); if (err) goto error; nla_nest_end(skb, nla); @@ -1145,7 +656,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, if (!nla) goto nla_put_failure; - err = ovs_flow_to_nlattrs(&flow->key, &flow->mask->key, skb); + err = ovs_nla_put_flow(&flow->key, &flow->mask->key, skb); if (err) goto error; @@ -1188,7 +699,8 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, sf_acts = rcu_dereference_check(flow->sf_acts, lockdep_ovsl_is_held()); - err = actions_to_attr(sf_acts->actions, sf_acts->actions_len, skb); + err = ovs_nla_put_actions(sf_acts->actions, + sf_acts->actions_len, skb); if (!err) nla_nest_end(skb, start); else { @@ -1243,7 +755,6 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) struct sw_flow_mask mask; struct sk_buff *reply; struct datapath *dp; - struct flow_table *table; struct sw_flow_actions *acts = NULL; struct sw_flow_match match; int error; @@ -1254,21 +765,21 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) goto error; ovs_match_init(&match, &key, &mask); - error = ovs_match_from_nlattrs(&match, - a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]); + error = ovs_nla_get_match(&match, + a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]); if (error) goto error; /* Validate actions. */ if (a[OVS_FLOW_ATTR_ACTIONS]) { - acts = ovs_flow_actions_alloc(nla_len(a[OVS_FLOW_ATTR_ACTIONS])); + acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_FLOW_ATTR_ACTIONS])); error = PTR_ERR(acts); if (IS_ERR(acts)) goto error; - ovs_flow_key_mask(&masked_key, &key, &mask); - error = validate_and_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], - &masked_key, 0, &acts); + ovs_flow_mask_key(&masked_key, &key, &mask); + error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], + &masked_key, 0, &acts); if (error) { OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); goto err_kfree; @@ -1284,32 +795,14 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) if (!dp) goto err_unlock_ovs; - table = ovsl_dereference(dp->table); - /* Check if this is a duplicate flow */ - flow = ovs_flow_lookup(table, &key); + flow = ovs_flow_tbl_lookup(&dp->table, &key); if (!flow) { - struct flow_table *new_table = NULL; - struct sw_flow_mask *mask_p; - /* Bail out if we're not allowed to create a new flow. */ error = -ENOENT; if (info->genlhdr->cmd == OVS_FLOW_CMD_SET) goto err_unlock_ovs; - /* Expand table, if necessary, to make room. */ - if (ovs_flow_tbl_need_to_expand(table)) - new_table = ovs_flow_tbl_expand(table); - else if (time_after(jiffies, dp->last_rehash + REHASH_FLOW_INTERVAL)) - new_table = ovs_flow_tbl_rehash(table); - - if (new_table && !IS_ERR(new_table)) { - rcu_assign_pointer(dp->table, new_table); - ovs_flow_tbl_destroy(table, true); - table = ovsl_dereference(dp->table); - dp->last_rehash = jiffies; - } - /* Allocate flow. */ flow = ovs_flow_alloc(); if (IS_ERR(flow)) { @@ -1320,25 +813,14 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) flow->key = masked_key; flow->unmasked_key = key; - - /* Make sure mask is unique in the system */ - mask_p = ovs_sw_flow_mask_find(table, &mask); - if (!mask_p) { - /* Allocate a new mask if none exsits. */ - mask_p = ovs_sw_flow_mask_alloc(); - if (!mask_p) - goto err_flow_free; - mask_p->key = mask.key; - mask_p->range = mask.range; - ovs_sw_flow_mask_insert(table, mask_p); - } - - ovs_sw_flow_mask_add_ref(mask_p); - flow->mask = mask_p; rcu_assign_pointer(flow->sf_acts, acts); /* Put flow in bucket. */ - ovs_flow_insert(table, flow); + error = ovs_flow_tbl_insert(&dp->table, flow, &mask); + if (error) { + acts = NULL; + goto err_flow_free; + } reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, info->snd_seq, OVS_FLOW_CMD_NEW); @@ -1359,7 +841,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) /* The unmasked key has to be the same for flow updates. */ error = -EINVAL; - if (!ovs_flow_cmp_unmasked_key(flow, &key, match.range.end)) { + if (!ovs_flow_cmp_unmasked_key(flow, &match)) { OVS_NLERR("Flow modification message rejected, unmasked key does not match.\n"); goto err_unlock_ovs; } @@ -1367,7 +849,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) /* Update actions. */ old_acts = ovsl_dereference(flow->sf_acts); rcu_assign_pointer(flow->sf_acts, acts); - ovs_flow_deferred_free_acts(old_acts); + ovs_nla_free_flow_actions(old_acts); reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, info->snd_seq, OVS_FLOW_CMD_NEW); @@ -1406,7 +888,6 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) struct sk_buff *reply; struct sw_flow *flow; struct datapath *dp; - struct flow_table *table; struct sw_flow_match match; int err; @@ -1416,7 +897,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) } ovs_match_init(&match, &key, NULL); - err = ovs_match_from_nlattrs(&match, a[OVS_FLOW_ATTR_KEY], NULL); + err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL); if (err) return err; @@ -1427,9 +908,8 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) goto unlock; } - table = ovsl_dereference(dp->table); - flow = ovs_flow_lookup_unmasked_key(table, &match); - if (!flow) { + flow = ovs_flow_tbl_lookup(&dp->table, &key); + if (!flow || !ovs_flow_cmp_unmasked_key(flow, &match)) { err = -ENOENT; goto unlock; } @@ -1456,7 +936,6 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) struct sk_buff *reply; struct sw_flow *flow; struct datapath *dp; - struct flow_table *table; struct sw_flow_match match; int err; @@ -1468,18 +947,17 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) } if (!a[OVS_FLOW_ATTR_KEY]) { - err = flush_flows(dp); + err = ovs_flow_tbl_flush(&dp->table); goto unlock; } ovs_match_init(&match, &key, NULL); - err = ovs_match_from_nlattrs(&match, a[OVS_FLOW_ATTR_KEY], NULL); + err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL); if (err) goto unlock; - table = ovsl_dereference(dp->table); - flow = ovs_flow_lookup_unmasked_key(table, &match); - if (!flow) { + flow = ovs_flow_tbl_lookup(&dp->table, &key); + if (!flow || !ovs_flow_cmp_unmasked_key(flow, &match)) { err = -ENOENT; goto unlock; } @@ -1490,7 +968,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) goto unlock; } - ovs_flow_remove(table, flow); + ovs_flow_tbl_remove(&dp->table, flow); err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid, info->snd_seq, 0, OVS_FLOW_CMD_DEL); @@ -1509,8 +987,8 @@ unlock: static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh)); + struct table_instance *ti; struct datapath *dp; - struct flow_table *table; rcu_read_lock(); dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); @@ -1519,14 +997,14 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) return -ENODEV; } - table = rcu_dereference(dp->table); + ti = rcu_dereference(dp->table.ti); for (;;) { struct sw_flow *flow; u32 bucket, obj; bucket = cb->args[0]; obj = cb->args[1]; - flow = ovs_flow_dump_next(table, &bucket, &obj); + flow = ovs_flow_tbl_dump_next(ti, &bucket, &obj); if (!flow) break; @@ -1690,9 +1168,8 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_dp_set_net(dp, hold_net(sock_net(skb->sk))); /* Allocate table. */ - err = -ENOMEM; - rcu_assign_pointer(dp->table, ovs_flow_tbl_alloc(TBL_MIN_BUCKETS)); - if (!dp->table) + err = ovs_flow_tbl_init(&dp->table); + if (err) goto err_free_dp; dp->stats_percpu = alloc_percpu(struct dp_stats_percpu); @@ -1749,7 +1226,7 @@ err_destroy_ports_array: err_destroy_percpu: free_percpu(dp->stats_percpu); err_destroy_table: - ovs_flow_tbl_destroy(ovsl_dereference(dp->table), false); + ovs_flow_tbl_destroy(&dp->table); err_free_dp: release_net(ovs_dp_get_net(dp)); kfree(dp); diff --git a/datapath/datapath.h b/datapath/datapath.h index 4a49a7dda..64920de9b 100644 --- a/datapath/datapath.h +++ b/datapath/datapath.h @@ -28,6 +28,7 @@ #include "compat.h" #include "flow.h" +#include "flow_table.h" #include "vlan.h" #include "vport.h" @@ -59,12 +60,11 @@ struct dp_stats_percpu { * struct datapath - datapath for flow-based packet switching * @rcu: RCU callback head for deferred destruction. * @list_node: Element in global 'dps' list. - * @table: Current flow table. Protected by ovs_mutex and RCU. + * @table: flow table. * @ports: Hash table for ports. %OVSP_LOCAL port always exists. Protected by * ovs_mutex and RCU. * @stats_percpu: Per-CPU datapath statistics. * @net: Reference to net namespace. - * @last_rehash: Timestamp of last rehash. * * Context: See the comment on locking at the top of datapath.c for additional * locking information. @@ -74,7 +74,7 @@ struct datapath { struct list_head list_node; /* Flow table. */ - struct flow_table __rcu *table; + struct flow_table table; /* Switch ports. */ struct hlist_head *ports; @@ -86,7 +86,6 @@ struct datapath { /* Network namespace ref. */ struct net *net; #endif - unsigned long last_rehash; }; /** diff --git a/datapath/flow.c b/datapath/flow.c index 29122af7a..faa4e158f 100644 --- a/datapath/flow.c +++ b/datapath/flow.c @@ -46,202 +46,40 @@ #include "vlan.h" -static struct kmem_cache *flow_cache; - -static void ovs_sw_flow_mask_set(struct sw_flow_mask *mask, - struct sw_flow_key_range *range, u8 val); - -static void update_range__(struct sw_flow_match *match, - size_t offset, size_t size, bool is_mask) +u64 ovs_flow_used_time(unsigned long flow_jiffies) { - struct sw_flow_key_range *range = NULL; - size_t start = rounddown(offset, sizeof(long)); - size_t end = roundup(offset + size, sizeof(long)); - - if (!is_mask) - range = &match->range; - else if (match->mask) - range = &match->mask->range; - - if (!range) - return; - - if (range->start == range->end) { - range->start = start; - range->end = end; - return; - } - - if (range->start > start) - range->start = start; + struct timespec cur_ts; + u64 cur_ms, idle_ms; - if (range->end < end) - range->end = end; -} + ktime_get_ts(&cur_ts); + idle_ms = jiffies_to_msecs(jiffies - flow_jiffies); + cur_ms = (u64)cur_ts.tv_sec * MSEC_PER_SEC + + cur_ts.tv_nsec / NSEC_PER_MSEC; -#define SW_FLOW_KEY_PUT(match, field, value, is_mask) \ - do { \ - update_range__(match, offsetof(struct sw_flow_key, field), \ - sizeof((match)->key->field), is_mask); \ - if (is_mask) { \ - if ((match)->mask) \ - (match)->mask->key.field = value; \ - } else { \ - (match)->key->field = value; \ - } \ - } while (0) - -#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \ - do { \ - update_range__(match, offsetof(struct sw_flow_key, field), \ - len, is_mask); \ - if (is_mask) { \ - if ((match)->mask) \ - memcpy(&(match)->mask->key.field, value_p, len);\ - } else { \ - memcpy(&(match)->key->field, value_p, len); \ - } \ - } while (0) - -static u16 range_n_bytes(const struct sw_flow_key_range *range) -{ - return range->end - range->start; + return cur_ms - idle_ms; } -void ovs_match_init(struct sw_flow_match *match, - struct sw_flow_key *key, - struct sw_flow_mask *mask) -{ - memset(match, 0, sizeof(*match)); - match->key = key; - match->mask = mask; - - memset(key, 0, sizeof(*key)); - - if (mask) { - memset(&mask->key, 0, sizeof(mask->key)); - mask->range.start = mask->range.end = 0; - } -} +#define TCP_FLAGS_OFFSET 13 +#define TCP_FLAG_MASK 0x3f -static bool ovs_match_validate(const struct sw_flow_match *match, - u64 key_attrs, u64 mask_attrs) +void ovs_flow_used(struct sw_flow *flow, struct sk_buff *skb) { - u64 key_expected = 1ULL << OVS_KEY_ATTR_ETHERNET; - u64 mask_allowed = key_attrs; /* At most allow all key attributes */ - - /* The following mask attributes allowed only if they - * pass the validation tests. */ - mask_allowed &= ~((1ULL << OVS_KEY_ATTR_IPV4) - | (1ULL << OVS_KEY_ATTR_IPV6) - | (1ULL << OVS_KEY_ATTR_TCP) - | (1ULL << OVS_KEY_ATTR_UDP) - | (1ULL << OVS_KEY_ATTR_SCTP) - | (1ULL << OVS_KEY_ATTR_ICMP) - | (1ULL << OVS_KEY_ATTR_ICMPV6) - | (1ULL << OVS_KEY_ATTR_ARP) - | (1ULL << OVS_KEY_ATTR_ND)); - - /* Always allowed mask fields. */ - mask_allowed |= ((1ULL << OVS_KEY_ATTR_TUNNEL) - | (1ULL << OVS_KEY_ATTR_IN_PORT) - | (1ULL << OVS_KEY_ATTR_ETHERTYPE)); - - /* Check key attributes. */ - if (match->key->eth.type == htons(ETH_P_ARP) - || match->key->eth.type == htons(ETH_P_RARP)) { - key_expected |= 1ULL << OVS_KEY_ATTR_ARP; - if (match->mask && (match->mask->key.eth.type == htons(0xffff))) - mask_allowed |= 1ULL << OVS_KEY_ATTR_ARP; - } - - if (match->key->eth.type == htons(ETH_P_IP)) { - key_expected |= 1ULL << OVS_KEY_ATTR_IPV4; - if (match->mask && (match->mask->key.eth.type == htons(0xffff))) - mask_allowed |= 1ULL << OVS_KEY_ATTR_IPV4; - - if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) { - if (match->key->ip.proto == IPPROTO_UDP) { - key_expected |= 1ULL << OVS_KEY_ATTR_UDP; - if (match->mask && (match->mask->key.ip.proto == 0xff)) - mask_allowed |= 1ULL << OVS_KEY_ATTR_UDP; - } - - if (match->key->ip.proto == IPPROTO_SCTP) { - key_expected |= 1ULL << OVS_KEY_ATTR_SCTP; - if (match->mask && (match->mask->key.ip.proto == 0xff)) - mask_allowed |= 1ULL << OVS_KEY_ATTR_SCTP; - } - - if (match->key->ip.proto == IPPROTO_TCP) { - key_expected |= 1ULL << OVS_KEY_ATTR_TCP; - if (match->mask && (match->mask->key.ip.proto == 0xff)) - mask_allowed |= 1ULL << OVS_KEY_ATTR_TCP; - } - - if (match->key->ip.proto == IPPROTO_ICMP) { - key_expected |= 1ULL << OVS_KEY_ATTR_ICMP; - if (match->mask && (match->mask->key.ip.proto == 0xff)) - mask_allowed |= 1ULL << OVS_KEY_ATTR_ICMP; - } - } - } - - if (match->key->eth.type == htons(ETH_P_IPV6)) { - key_expected |= 1ULL << OVS_KEY_ATTR_IPV6; - if (match->mask && (match->mask->key.eth.type == htons(0xffff))) - mask_allowed |= 1ULL << OVS_KEY_ATTR_IPV6; - - if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) { - if (match->key->ip.proto == IPPROTO_UDP) { - key_expected |= 1ULL << OVS_KEY_ATTR_UDP; - if (match->mask && (match->mask->key.ip.proto == 0xff)) - mask_allowed |= 1ULL << OVS_KEY_ATTR_UDP; - } - - if (match->key->ip.proto == IPPROTO_SCTP) { - key_expected |= 1ULL << OVS_KEY_ATTR_SCTP; - if (match->mask && (match->mask->key.ip.proto == 0xff)) - mask_allowed |= 1ULL << OVS_KEY_ATTR_SCTP; - } - - if (match->key->ip.proto == IPPROTO_TCP) { - key_expected |= 1ULL << OVS_KEY_ATTR_TCP; - if (match->mask && (match->mask->key.ip.proto == 0xff)) - mask_allowed |= 1ULL << OVS_KEY_ATTR_TCP; - } - - if (match->key->ip.proto == IPPROTO_ICMPV6) { - key_expected |= 1ULL << OVS_KEY_ATTR_ICMPV6; - if (match->mask && (match->mask->key.ip.proto == 0xff)) - mask_allowed |= 1ULL << OVS_KEY_ATTR_ICMPV6; - - if (match->key->ipv6.tp.src == - htons(NDISC_NEIGHBOUR_SOLICITATION) || - match->key->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) { - key_expected |= 1ULL << OVS_KEY_ATTR_ND; - if (match->mask && (match->mask->key.ipv6.tp.src == htons(0xffff))) - mask_allowed |= 1ULL << OVS_KEY_ATTR_ND; - } - } - } - } - - if ((key_attrs & key_expected) != key_expected) { - /* Key attributes check failed. */ - OVS_NLERR("Missing expected key attributes (key_attrs=%llx, expected=%llx).\n", - key_attrs, key_expected); - return false; - } + u8 tcp_flags = 0; - if ((mask_attrs & mask_allowed) != mask_attrs) { - /* Mask attributes check failed. */ - OVS_NLERR("Contain more than allowed mask fields (mask_attrs=%llx, mask_allowed=%llx).\n", - mask_attrs, mask_allowed); - return false; + if ((flow->key.eth.type == htons(ETH_P_IP) || + flow->key.eth.type == htons(ETH_P_IPV6)) && + flow->key.ip.proto == IPPROTO_TCP && + likely(skb->len >= skb_transport_offset(skb) + sizeof(struct tcphdr))) { + u8 *tcp = (u8 *)tcp_hdr(skb); + tcp_flags = *(tcp + TCP_FLAGS_OFFSET) & TCP_FLAG_MASK; } - return true; + spin_lock(&flow->lock); + flow->used = jiffies; + flow->packet_count++; + flow->byte_count += skb->len; + flow->tcp_flags |= tcp_flags; + spin_unlock(&flow->lock); } static int check_header(struct sk_buff *skb, int len) @@ -312,19 +150,6 @@ static bool icmphdr_ok(struct sk_buff *skb) sizeof(struct icmphdr)); } -u64 ovs_flow_used_time(unsigned long flow_jiffies) -{ - struct timespec cur_ts; - u64 cur_ms, idle_ms; - - ktime_get_ts(&cur_ts); - idle_ms = jiffies_to_msecs(jiffies - flow_jiffies); - cur_ms = (u64)cur_ts.tv_sec * MSEC_PER_SEC + - cur_ts.tv_nsec / NSEC_PER_MSEC; - - return cur_ms - idle_ms; -} - static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key) { unsigned int nh_ofs = skb_network_offset(skb); @@ -373,319 +198,6 @@ static bool icmp6hdr_ok(struct sk_buff *skb) sizeof(struct icmp6hdr)); } -void ovs_flow_key_mask(struct sw_flow_key *dst, const struct sw_flow_key *src, - const struct sw_flow_mask *mask) -{ - const long *m = (long *)((u8 *)&mask->key + mask->range.start); - const long *s = (long *)((u8 *)src + mask->range.start); - long *d = (long *)((u8 *)dst + mask->range.start); - int i; - - /* The memory outside of the 'mask->range' are not set since - * further operations on 'dst' only uses contents within - * 'mask->range'. - */ - for (i = 0; i < range_n_bytes(&mask->range); i += sizeof(long)) - *d++ = *s++ & *m++; -} - -#define TCP_FLAGS_OFFSET 13 -#define TCP_FLAG_MASK 0x3f - -void ovs_flow_used(struct sw_flow *flow, struct sk_buff *skb) -{ - u8 tcp_flags = 0; - - if ((flow->key.eth.type == htons(ETH_P_IP) || - flow->key.eth.type == htons(ETH_P_IPV6)) && - flow->key.ip.proto == IPPROTO_TCP && - likely(skb->len >= skb_transport_offset(skb) + sizeof(struct tcphdr))) { - u8 *tcp = (u8 *)tcp_hdr(skb); - tcp_flags = *(tcp + TCP_FLAGS_OFFSET) & TCP_FLAG_MASK; - } - - spin_lock(&flow->lock); - flow->used = jiffies; - flow->packet_count++; - flow->byte_count += skb->len; - flow->tcp_flags |= tcp_flags; - spin_unlock(&flow->lock); -} - -struct sw_flow_actions *ovs_flow_actions_alloc(int size) -{ - struct sw_flow_actions *sfa; - - if (size > MAX_ACTIONS_BUFSIZE) - return ERR_PTR(-EINVAL); - - sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL); - if (!sfa) - return ERR_PTR(-ENOMEM); - - sfa->actions_len = 0; - return sfa; -} - -struct sw_flow *ovs_flow_alloc(void) -{ - struct sw_flow *flow; - - flow = kmem_cache_alloc(flow_cache, GFP_KERNEL); - if (!flow) - return ERR_PTR(-ENOMEM); - - spin_lock_init(&flow->lock); - flow->sf_acts = NULL; - flow->mask = NULL; - - return flow; -} - -static struct hlist_head *find_bucket(struct flow_table *table, u32 hash) -{ - hash = jhash_1word(hash, table->hash_seed); - return flex_array_get(table->buckets, - (hash & (table->n_buckets - 1))); -} - -static struct flex_array *alloc_buckets(unsigned int n_buckets) -{ - struct flex_array *buckets; - int i, err; - - buckets = flex_array_alloc(sizeof(struct hlist_head), - n_buckets, GFP_KERNEL); - if (!buckets) - return NULL; - - err = flex_array_prealloc(buckets, 0, n_buckets, GFP_KERNEL); - if (err) { - flex_array_free(buckets); - return NULL; - } - - for (i = 0; i < n_buckets; i++) - INIT_HLIST_HEAD((struct hlist_head *) - flex_array_get(buckets, i)); - - return buckets; -} - -static void free_buckets(struct flex_array *buckets) -{ - flex_array_free(buckets); -} - -static struct flow_table *__flow_tbl_alloc(int new_size) -{ - struct flow_table *table = kmalloc(sizeof(*table), GFP_KERNEL); - - if (!table) - return NULL; - - table->buckets = alloc_buckets(new_size); - - if (!table->buckets) { - kfree(table); - return NULL; - } - table->n_buckets = new_size; - table->count = 0; - table->node_ver = 0; - table->keep_flows = false; - get_random_bytes(&table->hash_seed, sizeof(u32)); - table->mask_list = NULL; - - return table; -} - -static void __flow_tbl_destroy(struct flow_table *table) -{ - int i; - - if (table->keep_flows) - goto skip_flows; - - for (i = 0; i < table->n_buckets; i++) { - struct sw_flow *flow; - struct hlist_head *head = flex_array_get(table->buckets, i); - struct hlist_node *n; - int ver = table->node_ver; - - hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) { - hlist_del(&flow->hash_node[ver]); - ovs_flow_free(flow, false); - } - } - - BUG_ON(!list_empty(table->mask_list)); - kfree(table->mask_list); - -skip_flows: - free_buckets(table->buckets); - kfree(table); -} - -struct flow_table *ovs_flow_tbl_alloc(int new_size) -{ - struct flow_table *table = __flow_tbl_alloc(new_size); - - if (!table) - return NULL; - - table->mask_list = kmalloc(sizeof(struct list_head), GFP_KERNEL); - if (!table->mask_list) { - table->keep_flows = true; - __flow_tbl_destroy(table); - return NULL; - } - INIT_LIST_HEAD(table->mask_list); - - return table; -} - -static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu) -{ - struct flow_table *table = container_of(rcu, struct flow_table, rcu); - - __flow_tbl_destroy(table); -} - -void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred) -{ - if (!table) - return; - - if (deferred) - call_rcu(&table->rcu, flow_tbl_destroy_rcu_cb); - else - __flow_tbl_destroy(table); -} - -struct sw_flow *ovs_flow_dump_next(struct flow_table *table, u32 *bucket, u32 *last) -{ - struct sw_flow *flow; - struct hlist_head *head; - int ver; - int i; - - ver = table->node_ver; - while (*bucket < table->n_buckets) { - i = 0; - head = flex_array_get(table->buckets, *bucket); - hlist_for_each_entry_rcu(flow, head, hash_node[ver]) { - if (i < *last) { - i++; - continue; - } - *last = i + 1; - return flow; - } - (*bucket)++; - *last = 0; - } - - return NULL; -} - -static void __tbl_insert(struct flow_table *table, struct sw_flow *flow) -{ - struct hlist_head *head; - - head = find_bucket(table, flow->hash); - hlist_add_head_rcu(&flow->hash_node[table->node_ver], head); - - table->count++; -} - -static void flow_table_copy_flows(struct flow_table *old, struct flow_table *new) -{ - int old_ver; - int i; - - old_ver = old->node_ver; - new->node_ver = !old_ver; - - /* Insert in new table. */ - for (i = 0; i < old->n_buckets; i++) { - struct sw_flow *flow; - struct hlist_head *head; - - head = flex_array_get(old->buckets, i); - - hlist_for_each_entry(flow, head, hash_node[old_ver]) - __tbl_insert(new, flow); - } - - new->mask_list = old->mask_list; - old->keep_flows = true; -} - -static struct flow_table *__flow_tbl_rehash(struct flow_table *table, int n_buckets) -{ - struct flow_table *new_table; - - new_table = __flow_tbl_alloc(n_buckets); - if (!new_table) - return ERR_PTR(-ENOMEM); - - flow_table_copy_flows(table, new_table); - - return new_table; -} - -struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table) -{ - return __flow_tbl_rehash(table, table->n_buckets); -} - -struct flow_table *ovs_flow_tbl_expand(struct flow_table *table) -{ - return __flow_tbl_rehash(table, table->n_buckets * 2); -} - -static void __flow_free(struct sw_flow *flow) -{ - kfree((struct sf_flow_acts __force *)flow->sf_acts); - kmem_cache_free(flow_cache, flow); -} - -static void rcu_free_flow_callback(struct rcu_head *rcu) -{ - struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu); - - __flow_free(flow); -} - -void ovs_flow_free(struct sw_flow *flow, bool deferred) -{ - if (!flow) - return; - - ovs_sw_flow_mask_del_ref(flow->mask, deferred); - - if (deferred) - call_rcu(&flow->rcu, rcu_free_flow_callback); - else - __flow_free(flow); -} - -/* RCU callback used by ovs_flow_deferred_free_acts. */ -static void rcu_free_acts_callback(struct rcu_head *rcu) -{ - struct sw_flow_actions *sf_acts = container_of(rcu, - struct sw_flow_actions, rcu); - kfree(sf_acts); -} - -/* Schedules 'sf_acts' to be freed after the next RCU grace period. - * The caller must hold rcu_read_lock for this to be sensible. */ -void ovs_flow_deferred_free_acts(struct sw_flow_actions *sf_acts) -{ - call_rcu(&sf_acts->rcu, rcu_free_acts_callback); -} - static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key) { struct qtag_prefix { @@ -1010,1088 +522,3 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key) return 0; } - -static u32 ovs_flow_hash(const struct sw_flow_key *key, int key_start, - int key_end) -{ - u32 *hash_key = (u32 *)((u8 *)key + key_start); - int hash_u32s = (key_end - key_start) >> 2; - - /* Make sure number of hash bytes are multiple of u32. */ - BUILD_BUG_ON(sizeof(long) % sizeof(u32)); - - return jhash2(hash_key, hash_u32s, 0); -} - -static int flow_key_start(const struct sw_flow_key *key) -{ - if (key->tun_key.ipv4_dst) - return 0; - else - return rounddown(offsetof(struct sw_flow_key, phy), - sizeof(long)); -} - -static bool __cmp_key(const struct sw_flow_key *key1, - const struct sw_flow_key *key2, int key_start, int key_end) -{ - const long *cp1 = (long *)((u8 *)key1 + key_start); - const long *cp2 = (long *)((u8 *)key2 + key_start); - long diffs = 0; - int i; - - for (i = key_start; i < key_end; i += sizeof(long)) - diffs |= *cp1++ ^ *cp2++; - - return diffs == 0; -} - -static bool __flow_cmp_masked_key(const struct sw_flow *flow, - const struct sw_flow_key *key, int key_start, int key_end) -{ - return __cmp_key(&flow->key, key, key_start, key_end); -} - -static bool __flow_cmp_unmasked_key(const struct sw_flow *flow, - const struct sw_flow_key *key, int key_start, int key_end) -{ - return __cmp_key(&flow->unmasked_key, key, key_start, key_end); -} - -bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, - const struct sw_flow_key *key, int key_end) -{ - int key_start; - key_start = flow_key_start(key); - - return __flow_cmp_unmasked_key(flow, key, key_start, key_end); - -} - -struct sw_flow *ovs_flow_lookup_unmasked_key(struct flow_table *table, - struct sw_flow_match *match) -{ - struct sw_flow_key *unmasked = match->key; - int key_end = match->range.end; - struct sw_flow *flow; - - flow = ovs_flow_lookup(table, unmasked); - if (flow && (!ovs_flow_cmp_unmasked_key(flow, unmasked, key_end))) - flow = NULL; - - return flow; -} - -static struct sw_flow *ovs_masked_flow_lookup(struct flow_table *table, - const struct sw_flow_key *unmasked, - struct sw_flow_mask *mask) -{ - struct sw_flow *flow; - struct hlist_head *head; - int key_start = mask->range.start; - int key_end = mask->range.end; - u32 hash; - struct sw_flow_key masked_key; - - ovs_flow_key_mask(&masked_key, unmasked, mask); - hash = ovs_flow_hash(&masked_key, key_start, key_end); - head = find_bucket(table, hash); - hlist_for_each_entry_rcu(flow, head, hash_node[table->node_ver]) { - if (flow->mask == mask && - __flow_cmp_masked_key(flow, &masked_key, - key_start, key_end)) - return flow; - } - return NULL; -} - -struct sw_flow *ovs_flow_lookup(struct flow_table *tbl, - const struct sw_flow_key *key) -{ - struct sw_flow *flow = NULL; - struct sw_flow_mask *mask; - - list_for_each_entry_rcu(mask, tbl->mask_list, list) { - flow = ovs_masked_flow_lookup(tbl, key, mask); - if (flow) /* Found */ - break; - } - - return flow; -} - - -void ovs_flow_insert(struct flow_table *table, struct sw_flow *flow) -{ - flow->hash = ovs_flow_hash(&flow->key, flow->mask->range.start, - flow->mask->range.end); - __tbl_insert(table, flow); -} - -void ovs_flow_remove(struct flow_table *table, struct sw_flow *flow) -{ - BUG_ON(table->count == 0); - hlist_del_rcu(&flow->hash_node[table->node_ver]); - table->count--; -} - -/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ -const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { - [OVS_KEY_ATTR_ENCAP] = -1, - [OVS_KEY_ATTR_PRIORITY] = sizeof(u32), - [OVS_KEY_ATTR_IN_PORT] = sizeof(u32), - [OVS_KEY_ATTR_SKB_MARK] = sizeof(u32), - [OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet), - [OVS_KEY_ATTR_VLAN] = sizeof(__be16), - [OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16), - [OVS_KEY_ATTR_IPV4] = sizeof(struct ovs_key_ipv4), - [OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6), - [OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp), - [OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp), - [OVS_KEY_ATTR_SCTP] = sizeof(struct ovs_key_sctp), - [OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp), - [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6), - [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp), - [OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd), - [OVS_KEY_ATTR_TUNNEL] = -1, -}; - -static bool is_all_zero(const u8 *fp, size_t size) -{ - int i; - - if (!fp) - return false; - - for (i = 0; i < size; i++) - if (fp[i]) - return false; - - return true; -} - -static int __parse_flow_nlattrs(const struct nlattr *attr, - const struct nlattr *a[], - u64 *attrsp, bool nz) -{ - const struct nlattr *nla; - u64 attrs; - int rem; - - attrs = *attrsp; - nla_for_each_nested(nla, attr, rem) { - u16 type = nla_type(nla); - int expected_len; - - if (type > OVS_KEY_ATTR_MAX) { - OVS_NLERR("Unknown key attribute (type=%d, max=%d).\n", - type, OVS_KEY_ATTR_MAX); - return -EINVAL; - } - - if (attrs & (1ULL << type)) { - OVS_NLERR("Duplicate key attribute (type %d).\n", type); - return -EINVAL; - } - - expected_len = ovs_key_lens[type]; - if (nla_len(nla) != expected_len && expected_len != -1) { - OVS_NLERR("Key attribute has unexpected length (type=%d" - ", length=%d, expected=%d).\n", type, - nla_len(nla), expected_len); - return -EINVAL; - } - - if (!nz || !is_all_zero(nla_data(nla), expected_len)) { - attrs |= 1ULL << type; - a[type] = nla; - } - } - if (rem) { - OVS_NLERR("Message has %d unknown bytes.\n", rem); - return -EINVAL; - } - - *attrsp = attrs; - return 0; -} - -static int parse_flow_mask_nlattrs(const struct nlattr *attr, - const struct nlattr *a[], u64 *attrsp) -{ - return __parse_flow_nlattrs(attr, a, attrsp, true); -} - -static int parse_flow_nlattrs(const struct nlattr *attr, - const struct nlattr *a[], u64 *attrsp) -{ - return __parse_flow_nlattrs(attr, a, attrsp, false); -} - -int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr, - struct sw_flow_match *match, bool is_mask) -{ - struct nlattr *a; - int rem; - bool ttl = false; - __be16 tun_flags = 0; - - nla_for_each_nested(a, attr, rem) { - int type = nla_type(a); - static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = { - [OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64), - [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32), - [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = sizeof(u32), - [OVS_TUNNEL_KEY_ATTR_TOS] = 1, - [OVS_TUNNEL_KEY_ATTR_TTL] = 1, - [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0, - [OVS_TUNNEL_KEY_ATTR_CSUM] = 0, - }; - - if (type > OVS_TUNNEL_KEY_ATTR_MAX) { - OVS_NLERR("Unknown IPv4 tunnel attribute (type=%d, max=%d).\n", - type, OVS_TUNNEL_KEY_ATTR_MAX); - return -EINVAL; - } - - if (ovs_tunnel_key_lens[type] != nla_len(a)) { - OVS_NLERR("IPv4 tunnel attribute type has unexpected " - " length (type=%d, length=%d, expected=%d).\n", - type, nla_len(a), ovs_tunnel_key_lens[type]); - return -EINVAL; - } - - switch (type) { - case OVS_TUNNEL_KEY_ATTR_ID: - SW_FLOW_KEY_PUT(match, tun_key.tun_id, - nla_get_be64(a), is_mask); - tun_flags |= TUNNEL_KEY; - break; - case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: - SW_FLOW_KEY_PUT(match, tun_key.ipv4_src, - nla_get_be32(a), is_mask); - break; - case OVS_TUNNEL_KEY_ATTR_IPV4_DST: - SW_FLOW_KEY_PUT(match, tun_key.ipv4_dst, - nla_get_be32(a), is_mask); - break; - case OVS_TUNNEL_KEY_ATTR_TOS: - SW_FLOW_KEY_PUT(match, tun_key.ipv4_tos, - nla_get_u8(a), is_mask); - break; - case OVS_TUNNEL_KEY_ATTR_TTL: - SW_FLOW_KEY_PUT(match, tun_key.ipv4_ttl, - nla_get_u8(a), is_mask); - ttl = true; - break; - case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: - tun_flags |= TUNNEL_DONT_FRAGMENT; - break; - case OVS_TUNNEL_KEY_ATTR_CSUM: - tun_flags |= TUNNEL_CSUM; - break; - default: - return -EINVAL; - } - } - - SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask); - - if (rem > 0) { - OVS_NLERR("IPv4 tunnel attribute has %d unknown bytes.\n", rem); - return -EINVAL; - } - - if (!is_mask) { - if (!match->key->tun_key.ipv4_dst) { - OVS_NLERR("IPv4 tunnel destination address is zero.\n"); - return -EINVAL; - } - - if (!ttl) { - OVS_NLERR("IPv4 tunnel TTL not specified.\n"); - return -EINVAL; - } - } - - return 0; -} - -int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb, - const struct ovs_key_ipv4_tunnel *tun_key, - const struct ovs_key_ipv4_tunnel *output) -{ - struct nlattr *nla; - - nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL); - if (!nla) - return -EMSGSIZE; - - if (output->tun_flags & TUNNEL_KEY && - nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id)) - return -EMSGSIZE; - if (output->ipv4_src && - nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, output->ipv4_src)) - return -EMSGSIZE; - if (output->ipv4_dst && - nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, output->ipv4_dst)) - return -EMSGSIZE; - if (output->ipv4_tos && - nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->ipv4_tos)) - return -EMSGSIZE; - if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ipv4_ttl)) - return -EMSGSIZE; - if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) && - nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT)) - return -EMSGSIZE; - if ((output->tun_flags & TUNNEL_CSUM) && - nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM)) - return -EMSGSIZE; - - nla_nest_end(skb, nla); - return 0; -} - - -static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, - const struct nlattr **a, bool is_mask) -{ - if (*attrs & (1ULL << OVS_KEY_ATTR_PRIORITY)) { - SW_FLOW_KEY_PUT(match, phy.priority, - nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask); - *attrs &= ~(1ULL << OVS_KEY_ATTR_PRIORITY); - } - - if (*attrs & (1ULL << OVS_KEY_ATTR_IN_PORT)) { - u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]); - - if (is_mask) - in_port = 0xffffffff; /* Always exact match in_port. */ - else if (in_port >= DP_MAX_PORTS) - return -EINVAL; - - SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask); - *attrs &= ~(1ULL << OVS_KEY_ATTR_IN_PORT); - } else if (!is_mask) { - SW_FLOW_KEY_PUT(match, phy.in_port, DP_MAX_PORTS, is_mask); - } - - if (*attrs & (1ULL << OVS_KEY_ATTR_SKB_MARK)) { - uint32_t mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]); - - SW_FLOW_KEY_PUT(match, phy.skb_mark, mark, is_mask); - *attrs &= ~(1ULL << OVS_KEY_ATTR_SKB_MARK); - } - if (*attrs & (1ULL << OVS_KEY_ATTR_TUNNEL)) { - if (ovs_ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match, - is_mask)) - return -EINVAL; - *attrs &= ~(1ULL << OVS_KEY_ATTR_TUNNEL); - } - return 0; -} - -static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, - const struct nlattr **a, bool is_mask) -{ - int err; - u64 orig_attrs = attrs; - - err = metadata_from_nlattrs(match, &attrs, a, is_mask); - if (err) - return err; - - if (attrs & (1ULL << OVS_KEY_ATTR_ETHERNET)) { - const struct ovs_key_ethernet *eth_key; - - eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]); - SW_FLOW_KEY_MEMCPY(match, eth.src, - eth_key->eth_src, ETH_ALEN, is_mask); - SW_FLOW_KEY_MEMCPY(match, eth.dst, - eth_key->eth_dst, ETH_ALEN, is_mask); - attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERNET); - } - - if (attrs & (1ULL << OVS_KEY_ATTR_VLAN)) { - __be16 tci; - - tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); - if (!(tci & htons(VLAN_TAG_PRESENT))) { - if (is_mask) - OVS_NLERR("VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit.\n"); - else - OVS_NLERR("VLAN TCI does not have VLAN_TAG_PRESENT bit set.\n"); - - return -EINVAL; - } - - SW_FLOW_KEY_PUT(match, eth.tci, tci, is_mask); - attrs &= ~(1ULL << OVS_KEY_ATTR_VLAN); - } else if (!is_mask) - SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true); - - if (attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE)) { - __be16 eth_type; - - eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); - if (is_mask) { - /* Always exact match EtherType. */ - eth_type = htons(0xffff); - } else if (ntohs(eth_type) < ETH_P_802_3_MIN) { - OVS_NLERR("EtherType is less than minimum (type=%x, min=%x).\n", - ntohs(eth_type), ETH_P_802_3_MIN); - return -EINVAL; - } - - SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask); - attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERTYPE); - } else if (!is_mask) { - SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask); - } - - if (attrs & (1ULL << OVS_KEY_ATTR_IPV4)) { - const struct ovs_key_ipv4 *ipv4_key; - - ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]); - if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) { - OVS_NLERR("Unknown IPv4 fragment type (value=%d, max=%d).\n", - ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX); - return -EINVAL; - } - SW_FLOW_KEY_PUT(match, ip.proto, - ipv4_key->ipv4_proto, is_mask); - SW_FLOW_KEY_PUT(match, ip.tos, - ipv4_key->ipv4_tos, is_mask); - SW_FLOW_KEY_PUT(match, ip.ttl, - ipv4_key->ipv4_ttl, is_mask); - SW_FLOW_KEY_PUT(match, ip.frag, - ipv4_key->ipv4_frag, is_mask); - SW_FLOW_KEY_PUT(match, ipv4.addr.src, - ipv4_key->ipv4_src, is_mask); - SW_FLOW_KEY_PUT(match, ipv4.addr.dst, - ipv4_key->ipv4_dst, is_mask); - attrs &= ~(1ULL << OVS_KEY_ATTR_IPV4); - } - - if (attrs & (1ULL << OVS_KEY_ATTR_IPV6)) { - const struct ovs_key_ipv6 *ipv6_key; - - ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]); - if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) { - OVS_NLERR("Unknown IPv6 fragment type (value=%d, max=%d).\n", - ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX); - return -EINVAL; - } - SW_FLOW_KEY_PUT(match, ipv6.label, - ipv6_key->ipv6_label, is_mask); - SW_FLOW_KEY_PUT(match, ip.proto, - ipv6_key->ipv6_proto, is_mask); - SW_FLOW_KEY_PUT(match, ip.tos, - ipv6_key->ipv6_tclass, is_mask); - SW_FLOW_KEY_PUT(match, ip.ttl, - ipv6_key->ipv6_hlimit, is_mask); - SW_FLOW_KEY_PUT(match, ip.frag, - ipv6_key->ipv6_frag, is_mask); - SW_FLOW_KEY_MEMCPY(match, ipv6.addr.src, - ipv6_key->ipv6_src, - sizeof(match->key->ipv6.addr.src), - is_mask); - SW_FLOW_KEY_MEMCPY(match, ipv6.addr.dst, - ipv6_key->ipv6_dst, - sizeof(match->key->ipv6.addr.dst), - is_mask); - - attrs &= ~(1ULL << OVS_KEY_ATTR_IPV6); - } - - if (attrs & (1ULL << OVS_KEY_ATTR_ARP)) { - const struct ovs_key_arp *arp_key; - - arp_key = nla_data(a[OVS_KEY_ATTR_ARP]); - if (!is_mask && (arp_key->arp_op & htons(0xff00))) { - OVS_NLERR("Unknown ARP opcode (opcode=%d).\n", - arp_key->arp_op); - return -EINVAL; - } - - SW_FLOW_KEY_PUT(match, ipv4.addr.src, - arp_key->arp_sip, is_mask); - SW_FLOW_KEY_PUT(match, ipv4.addr.dst, - arp_key->arp_tip, is_mask); - SW_FLOW_KEY_PUT(match, ip.proto, - ntohs(arp_key->arp_op), is_mask); - SW_FLOW_KEY_MEMCPY(match, ipv4.arp.sha, - arp_key->arp_sha, ETH_ALEN, is_mask); - SW_FLOW_KEY_MEMCPY(match, ipv4.arp.tha, - arp_key->arp_tha, ETH_ALEN, is_mask); - - attrs &= ~(1ULL << OVS_KEY_ATTR_ARP); - } - - if (attrs & (1ULL << OVS_KEY_ATTR_TCP)) { - const struct ovs_key_tcp *tcp_key; - - tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]); - if (orig_attrs & (1ULL << OVS_KEY_ATTR_IPV4)) { - SW_FLOW_KEY_PUT(match, ipv4.tp.src, - tcp_key->tcp_src, is_mask); - SW_FLOW_KEY_PUT(match, ipv4.tp.dst, - tcp_key->tcp_dst, is_mask); - } else { - SW_FLOW_KEY_PUT(match, ipv6.tp.src, - tcp_key->tcp_src, is_mask); - SW_FLOW_KEY_PUT(match, ipv6.tp.dst, - tcp_key->tcp_dst, is_mask); - } - attrs &= ~(1ULL << OVS_KEY_ATTR_TCP); - } - - if (attrs & (1ULL << OVS_KEY_ATTR_UDP)) { - const struct ovs_key_udp *udp_key; - - udp_key = nla_data(a[OVS_KEY_ATTR_UDP]); - if (orig_attrs & (1ULL << OVS_KEY_ATTR_IPV4)) { - SW_FLOW_KEY_PUT(match, ipv4.tp.src, - udp_key->udp_src, is_mask); - SW_FLOW_KEY_PUT(match, ipv4.tp.dst, - udp_key->udp_dst, is_mask); - } else { - SW_FLOW_KEY_PUT(match, ipv6.tp.src, - udp_key->udp_src, is_mask); - SW_FLOW_KEY_PUT(match, ipv6.tp.dst, - udp_key->udp_dst, is_mask); - } - attrs &= ~(1ULL << OVS_KEY_ATTR_UDP); - } - - if (attrs & (1ULL << OVS_KEY_ATTR_SCTP)) { - const struct ovs_key_sctp *sctp_key; - - sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]); - if (orig_attrs & (1ULL << OVS_KEY_ATTR_IPV4)) { - SW_FLOW_KEY_PUT(match, ipv4.tp.src, - sctp_key->sctp_src, is_mask); - SW_FLOW_KEY_PUT(match, ipv4.tp.dst, - sctp_key->sctp_dst, is_mask); - } else { - SW_FLOW_KEY_PUT(match, ipv6.tp.src, - sctp_key->sctp_src, is_mask); - SW_FLOW_KEY_PUT(match, ipv6.tp.dst, - sctp_key->sctp_dst, is_mask); - } - attrs &= ~(1ULL << OVS_KEY_ATTR_SCTP); - } - - if (attrs & (1ULL << OVS_KEY_ATTR_ICMP)) { - const struct ovs_key_icmp *icmp_key; - - icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]); - SW_FLOW_KEY_PUT(match, ipv4.tp.src, - htons(icmp_key->icmp_type), is_mask); - SW_FLOW_KEY_PUT(match, ipv4.tp.dst, - htons(icmp_key->icmp_code), is_mask); - attrs &= ~(1ULL << OVS_KEY_ATTR_ICMP); - } - - if (attrs & (1ULL << OVS_KEY_ATTR_ICMPV6)) { - const struct ovs_key_icmpv6 *icmpv6_key; - - icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]); - SW_FLOW_KEY_PUT(match, ipv6.tp.src, - htons(icmpv6_key->icmpv6_type), is_mask); - SW_FLOW_KEY_PUT(match, ipv6.tp.dst, - htons(icmpv6_key->icmpv6_code), is_mask); - attrs &= ~(1ULL << OVS_KEY_ATTR_ICMPV6); - } - - if (attrs & (1ULL << OVS_KEY_ATTR_ND)) { - const struct ovs_key_nd *nd_key; - - nd_key = nla_data(a[OVS_KEY_ATTR_ND]); - SW_FLOW_KEY_MEMCPY(match, ipv6.nd.target, - nd_key->nd_target, - sizeof(match->key->ipv6.nd.target), - is_mask); - SW_FLOW_KEY_MEMCPY(match, ipv6.nd.sll, - nd_key->nd_sll, ETH_ALEN, is_mask); - SW_FLOW_KEY_MEMCPY(match, ipv6.nd.tll, - nd_key->nd_tll, ETH_ALEN, is_mask); - attrs &= ~(1ULL << OVS_KEY_ATTR_ND); - } - - if (attrs != 0) - return -EINVAL; - - return 0; -} - -/** - * ovs_match_from_nlattrs - parses Netlink attributes into a flow key and - * mask. In case the 'mask' is NULL, the flow is treated as exact match - * flow. Otherwise, it is treated as a wildcarded flow, except the mask - * does not include any don't care bit. - * @match: receives the extracted flow match information. - * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute - * sequence. The fields should of the packet that triggered the creation - * of this flow. - * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink - * attribute specifies the mask field of the wildcarded flow. - */ -int ovs_match_from_nlattrs(struct sw_flow_match *match, - const struct nlattr *key, - const struct nlattr *mask) -{ - const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; - const struct nlattr *encap; - u64 key_attrs = 0; - u64 mask_attrs = 0; - bool encap_valid = false; - int err; - - err = parse_flow_nlattrs(key, a, &key_attrs); - if (err) - return err; - - if ((key_attrs & (1ULL << OVS_KEY_ATTR_ETHERNET)) && - (key_attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE)) && - (nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q))) { - __be16 tci; - - if (!((key_attrs & (1ULL << OVS_KEY_ATTR_VLAN)) && - (key_attrs & (1ULL << OVS_KEY_ATTR_ENCAP)))) { - OVS_NLERR("Invalid Vlan frame.\n"); - return -EINVAL; - } - - key_attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERTYPE); - tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); - encap = a[OVS_KEY_ATTR_ENCAP]; - key_attrs &= ~(1ULL << OVS_KEY_ATTR_ENCAP); - encap_valid = true; - - if (tci & htons(VLAN_TAG_PRESENT)) { - err = parse_flow_nlattrs(encap, a, &key_attrs); - if (err) - return err; - } else if (!tci) { - /* Corner case for truncated 802.1Q header. */ - if (nla_len(encap)) { - OVS_NLERR("Truncated 802.1Q header has non-zero encap attribute.\n"); - return -EINVAL; - } - } else { - OVS_NLERR("Encap attribute is set for a non-VLAN frame.\n"); - return -EINVAL; - } - } - - err = ovs_key_from_nlattrs(match, key_attrs, a, false); - if (err) - return err; - - if (mask) { - err = parse_flow_mask_nlattrs(mask, a, &mask_attrs); - if (err) - return err; - - if (mask_attrs & 1ULL << OVS_KEY_ATTR_ENCAP) { - __be16 eth_type = 0; - __be16 tci = 0; - - if (!encap_valid) { - OVS_NLERR("Encap mask attribute is set for non-VLAN frame.\n"); - return -EINVAL; - } - - mask_attrs &= ~(1ULL << OVS_KEY_ATTR_ENCAP); - if (a[OVS_KEY_ATTR_ETHERTYPE]) - eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); - - if (eth_type == htons(0xffff)) { - mask_attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERTYPE); - encap = a[OVS_KEY_ATTR_ENCAP]; - err = parse_flow_mask_nlattrs(encap, a, &mask_attrs); - } else { - OVS_NLERR("VLAN frames must have an exact match on the TPID (mask=%x).\n", - ntohs(eth_type)); - return -EINVAL; - } - - if (a[OVS_KEY_ATTR_VLAN]) - tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); - - if (!(tci & htons(VLAN_TAG_PRESENT))) { - OVS_NLERR("VLAN tag present bit must have an exact match (tci_mask=%x).\n", ntohs(tci)); - return -EINVAL; - } - } - - err = ovs_key_from_nlattrs(match, mask_attrs, a, true); - if (err) - return err; - } else { - /* Populate exact match flow's key mask. */ - if (match->mask) - ovs_sw_flow_mask_set(match->mask, &match->range, 0xff); - } - - if (!ovs_match_validate(match, key_attrs, mask_attrs)) - return -EINVAL; - - return 0; -} - -/** - * ovs_flow_metadata_from_nlattrs - parses Netlink attributes into a flow key. - * @flow: Receives extracted in_port, priority, tun_key and skb_mark. - * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute - * sequence. - * - * This parses a series of Netlink attributes that form a flow key, which must - * take the same form accepted by flow_from_nlattrs(), but only enough of it to - * get the metadata, that is, the parts of the flow key that cannot be - * extracted from the packet itself. - */ - -int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, - const struct nlattr *attr) -{ - struct ovs_key_ipv4_tunnel *tun_key = &flow->key.tun_key; - const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; - u64 attrs = 0; - int err; - struct sw_flow_match match; - - flow->key.phy.in_port = DP_MAX_PORTS; - flow->key.phy.priority = 0; - flow->key.phy.skb_mark = 0; - memset(tun_key, 0, sizeof(flow->key.tun_key)); - - err = parse_flow_nlattrs(attr, a, &attrs); - if (err) - return -EINVAL; - - memset(&match, 0, sizeof(match)); - match.key = &flow->key; - - err = metadata_from_nlattrs(&match, &attrs, a, false); - if (err) - return err; - - return 0; -} - -int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, - const struct sw_flow_key *output, struct sk_buff *skb) -{ - struct ovs_key_ethernet *eth_key; - struct nlattr *nla, *encap; - bool is_mask = (swkey != output); - - if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority)) - goto nla_put_failure; - - if ((swkey->tun_key.ipv4_dst || is_mask) && - ovs_ipv4_tun_to_nlattr(skb, &swkey->tun_key, &output->tun_key)) - goto nla_put_failure; - - if (swkey->phy.in_port == DP_MAX_PORTS) { - if (is_mask && (output->phy.in_port == 0xffff)) - if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff)) - goto nla_put_failure; - } else { - u16 upper_u16; - upper_u16 = !is_mask ? 0 : 0xffff; - - if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, - (upper_u16 << 16) | output->phy.in_port)) - goto nla_put_failure; - } - - if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark)) - goto nla_put_failure; - - nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key)); - if (!nla) - goto nla_put_failure; - - eth_key = nla_data(nla); - memcpy(eth_key->eth_src, output->eth.src, ETH_ALEN); - memcpy(eth_key->eth_dst, output->eth.dst, ETH_ALEN); - - if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) { - __be16 eth_type; - eth_type = !is_mask ? htons(ETH_P_8021Q) : htons(0xffff); - if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) || - nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.tci)) - goto nla_put_failure; - encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); - if (!swkey->eth.tci) - goto unencap; - } else - encap = NULL; - - if (swkey->eth.type == htons(ETH_P_802_2)) { - /* - * Ethertype 802.2 is represented in the netlink with omitted - * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and - * 0xffff in the mask attribute. Ethertype can also - * be wildcarded. - */ - if (is_mask && output->eth.type) - if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, - output->eth.type)) - goto nla_put_failure; - goto unencap; - } - - if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type)) - goto nla_put_failure; - - if (swkey->eth.type == htons(ETH_P_IP)) { - struct ovs_key_ipv4 *ipv4_key; - - nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key)); - if (!nla) - goto nla_put_failure; - ipv4_key = nla_data(nla); - ipv4_key->ipv4_src = output->ipv4.addr.src; - ipv4_key->ipv4_dst = output->ipv4.addr.dst; - ipv4_key->ipv4_proto = output->ip.proto; - ipv4_key->ipv4_tos = output->ip.tos; - ipv4_key->ipv4_ttl = output->ip.ttl; - ipv4_key->ipv4_frag = output->ip.frag; - } else if (swkey->eth.type == htons(ETH_P_IPV6)) { - struct ovs_key_ipv6 *ipv6_key; - - nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key)); - if (!nla) - goto nla_put_failure; - ipv6_key = nla_data(nla); - memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src, - sizeof(ipv6_key->ipv6_src)); - memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst, - sizeof(ipv6_key->ipv6_dst)); - ipv6_key->ipv6_label = output->ipv6.label; - ipv6_key->ipv6_proto = output->ip.proto; - ipv6_key->ipv6_tclass = output->ip.tos; - ipv6_key->ipv6_hlimit = output->ip.ttl; - ipv6_key->ipv6_frag = output->ip.frag; - } else if (swkey->eth.type == htons(ETH_P_ARP) || - swkey->eth.type == htons(ETH_P_RARP)) { - struct ovs_key_arp *arp_key; - - nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key)); - if (!nla) - goto nla_put_failure; - arp_key = nla_data(nla); - memset(arp_key, 0, sizeof(struct ovs_key_arp)); - arp_key->arp_sip = output->ipv4.addr.src; - arp_key->arp_tip = output->ipv4.addr.dst; - arp_key->arp_op = htons(output->ip.proto); - memcpy(arp_key->arp_sha, output->ipv4.arp.sha, ETH_ALEN); - memcpy(arp_key->arp_tha, output->ipv4.arp.tha, ETH_ALEN); - } - - if ((swkey->eth.type == htons(ETH_P_IP) || - swkey->eth.type == htons(ETH_P_IPV6)) && - swkey->ip.frag != OVS_FRAG_TYPE_LATER) { - - if (swkey->ip.proto == IPPROTO_TCP) { - struct ovs_key_tcp *tcp_key; - - nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key)); - if (!nla) - goto nla_put_failure; - tcp_key = nla_data(nla); - if (swkey->eth.type == htons(ETH_P_IP)) { - tcp_key->tcp_src = output->ipv4.tp.src; - tcp_key->tcp_dst = output->ipv4.tp.dst; - } else if (swkey->eth.type == htons(ETH_P_IPV6)) { - tcp_key->tcp_src = output->ipv6.tp.src; - tcp_key->tcp_dst = output->ipv6.tp.dst; - } - } else if (swkey->ip.proto == IPPROTO_UDP) { - struct ovs_key_udp *udp_key; - - nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key)); - if (!nla) - goto nla_put_failure; - udp_key = nla_data(nla); - if (swkey->eth.type == htons(ETH_P_IP)) { - udp_key->udp_src = output->ipv4.tp.src; - udp_key->udp_dst = output->ipv4.tp.dst; - } else if (swkey->eth.type == htons(ETH_P_IPV6)) { - udp_key->udp_src = output->ipv6.tp.src; - udp_key->udp_dst = output->ipv6.tp.dst; - } - } else if (swkey->ip.proto == IPPROTO_SCTP) { - struct ovs_key_sctp *sctp_key; - - nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key)); - if (!nla) - goto nla_put_failure; - sctp_key = nla_data(nla); - if (swkey->eth.type == htons(ETH_P_IP)) { - sctp_key->sctp_src = swkey->ipv4.tp.src; - sctp_key->sctp_dst = swkey->ipv4.tp.dst; - } else if (swkey->eth.type == htons(ETH_P_IPV6)) { - sctp_key->sctp_src = swkey->ipv6.tp.src; - sctp_key->sctp_dst = swkey->ipv6.tp.dst; - } - } else if (swkey->eth.type == htons(ETH_P_IP) && - swkey->ip.proto == IPPROTO_ICMP) { - struct ovs_key_icmp *icmp_key; - - nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key)); - if (!nla) - goto nla_put_failure; - icmp_key = nla_data(nla); - icmp_key->icmp_type = ntohs(output->ipv4.tp.src); - icmp_key->icmp_code = ntohs(output->ipv4.tp.dst); - } else if (swkey->eth.type == htons(ETH_P_IPV6) && - swkey->ip.proto == IPPROTO_ICMPV6) { - struct ovs_key_icmpv6 *icmpv6_key; - - nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6, - sizeof(*icmpv6_key)); - if (!nla) - goto nla_put_failure; - icmpv6_key = nla_data(nla); - icmpv6_key->icmpv6_type = ntohs(output->ipv6.tp.src); - icmpv6_key->icmpv6_code = ntohs(output->ipv6.tp.dst); - - if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION || - icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) { - struct ovs_key_nd *nd_key; - - nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key)); - if (!nla) - goto nla_put_failure; - nd_key = nla_data(nla); - memcpy(nd_key->nd_target, &output->ipv6.nd.target, - sizeof(nd_key->nd_target)); - memcpy(nd_key->nd_sll, output->ipv6.nd.sll, ETH_ALEN); - memcpy(nd_key->nd_tll, output->ipv6.nd.tll, ETH_ALEN); - } - } - } - -unencap: - if (encap) - nla_nest_end(skb, encap); - - return 0; - -nla_put_failure: - return -EMSGSIZE; -} - -/* Initializes the flow module. - * Returns zero if successful or a negative error code. */ -int ovs_flow_init(void) -{ - BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long)); - BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long)); - - flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0, - 0, NULL); - if (flow_cache == NULL) - return -ENOMEM; - - return 0; -} - -/* Uninitializes the flow module. */ -void ovs_flow_exit(void) -{ - kmem_cache_destroy(flow_cache); -} - -struct sw_flow_mask *ovs_sw_flow_mask_alloc(void) -{ - struct sw_flow_mask *mask; - - mask = kmalloc(sizeof(*mask), GFP_KERNEL); - if (mask) - mask->ref_count = 0; - - return mask; -} - -void ovs_sw_flow_mask_add_ref(struct sw_flow_mask *mask) -{ - mask->ref_count++; -} - -static void rcu_free_sw_flow_mask_cb(struct rcu_head *rcu) -{ - struct sw_flow_mask *mask = container_of(rcu, struct sw_flow_mask, rcu); - - kfree(mask); -} - -void ovs_sw_flow_mask_del_ref(struct sw_flow_mask *mask, bool deferred) -{ - if (!mask) - return; - - BUG_ON(!mask->ref_count); - mask->ref_count--; - - if (!mask->ref_count) { - list_del_rcu(&mask->list); - if (deferred) - call_rcu(&mask->rcu, rcu_free_sw_flow_mask_cb); - else - kfree(mask); - } -} - -static bool ovs_sw_flow_mask_equal(const struct sw_flow_mask *a, - const struct sw_flow_mask *b) -{ - u8 *a_ = (u8 *)&a->key + a->range.start; - u8 *b_ = (u8 *)&b->key + b->range.start; - - return (a->range.end == b->range.end) - && (a->range.start == b->range.start) - && (memcmp(a_, b_, range_n_bytes(&a->range)) == 0); -} - -struct sw_flow_mask *ovs_sw_flow_mask_find(const struct flow_table *tbl, - const struct sw_flow_mask *mask) -{ - struct list_head *ml; - - list_for_each(ml, tbl->mask_list) { - struct sw_flow_mask *m; - m = container_of(ml, struct sw_flow_mask, list); - if (ovs_sw_flow_mask_equal(mask, m)) - return m; - } - - return NULL; -} - -/** - * add a new mask into the mask list. - * The caller needs to make sure that 'mask' is not the same - * as any masks that are already on the list. - */ -void ovs_sw_flow_mask_insert(struct flow_table *tbl, struct sw_flow_mask *mask) -{ - list_add_rcu(&mask->list, tbl->mask_list); -} - -/** - * Set 'range' fields in the mask to the value of 'val'. - */ -static void ovs_sw_flow_mask_set(struct sw_flow_mask *mask, - struct sw_flow_key_range *range, u8 val) -{ - u8 *m = (u8 *)&mask->key + range->start; - - mask->range = *range; - memset(m, val, range_n_bytes(range)); -} diff --git a/datapath/flow.h b/datapath/flow.h index 03eae0352..91a3022cd 100644 --- a/datapath/flow.h +++ b/datapath/flow.h @@ -35,14 +35,6 @@ #include struct sk_buff; -struct sw_flow_mask; -struct flow_table; - -struct sw_flow_actions { - struct rcu_head rcu; - u32 actions_len; - struct nlattr actions[]; -}; /* Used to memset ovs_key_ipv4_tunnel padding. */ #define OVS_TUNNEL_KEY_SIZE \ @@ -129,6 +121,31 @@ struct sw_flow_key { }; } __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */ +struct sw_flow_key_range { + size_t start; + size_t end; +}; + +struct sw_flow_mask { + int ref_count; + struct rcu_head rcu; + struct list_head list; + struct sw_flow_key_range range; + struct sw_flow_key key; +}; + +struct sw_flow_match { + struct sw_flow_key *key; + struct sw_flow_key_range range; + struct sw_flow_mask *mask; +}; + +struct sw_flow_actions { + struct rcu_head rcu; + u32 actions_len; + struct nlattr actions[]; +}; + struct sw_flow { struct rcu_head rcu; struct hlist_node hash_node[2]; @@ -146,20 +163,6 @@ struct sw_flow { u8 tcp_flags; /* Union of seen TCP flags. */ }; -struct sw_flow_key_range { - size_t start; - size_t end; -}; - -struct sw_flow_match { - struct sw_flow_key *key; - struct sw_flow_key_range range; - struct sw_flow_mask *mask; -}; - -void ovs_match_init(struct sw_flow_match *match, - struct sw_flow_key *key, struct sw_flow_mask *mask); - struct arp_eth_header { __be16 ar_hrd; /* format of hardware address */ __be16 ar_pro; /* format of protocol address */ @@ -174,88 +177,9 @@ struct arp_eth_header { unsigned char ar_tip[4]; /* target IP address */ } __packed; -int ovs_flow_init(void); -void ovs_flow_exit(void); - -struct sw_flow *ovs_flow_alloc(void); -void ovs_flow_deferred_free(struct sw_flow *); -void ovs_flow_free(struct sw_flow *, bool deferred); - -struct sw_flow_actions *ovs_flow_actions_alloc(int actions_len); -void ovs_flow_deferred_free_acts(struct sw_flow_actions *); - -int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *); void ovs_flow_used(struct sw_flow *, struct sk_buff *); u64 ovs_flow_used_time(unsigned long flow_jiffies); -int ovs_flow_to_nlattrs(const struct sw_flow_key *, - const struct sw_flow_key *, struct sk_buff *); -int ovs_match_from_nlattrs(struct sw_flow_match *match, - const struct nlattr *, - const struct nlattr *); -int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, - const struct nlattr *attr); -#define MAX_ACTIONS_BUFSIZE (32 * 1024) -#define TBL_MIN_BUCKETS 1024 - -struct flow_table { - struct flex_array *buckets; - unsigned int count, n_buckets; - struct rcu_head rcu; - struct list_head *mask_list; - int node_ver; - u32 hash_seed; - bool keep_flows; -}; - -static inline int ovs_flow_tbl_count(struct flow_table *table) -{ - return table->count; -} - -static inline int ovs_flow_tbl_need_to_expand(struct flow_table *table) -{ - return (table->count > table->n_buckets); -} - -struct sw_flow *ovs_flow_lookup(struct flow_table *, - const struct sw_flow_key *); -struct sw_flow *ovs_flow_lookup_unmasked_key(struct flow_table *table, - struct sw_flow_match *match); - -void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred); -struct flow_table *ovs_flow_tbl_alloc(int new_size); -struct flow_table *ovs_flow_tbl_expand(struct flow_table *table); -struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table); - -void ovs_flow_insert(struct flow_table *table, struct sw_flow *flow); -void ovs_flow_remove(struct flow_table *table, struct sw_flow *flow); - -struct sw_flow *ovs_flow_dump_next(struct flow_table *table, u32 *bucket, u32 *idx); -extern const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1]; -int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr, - struct sw_flow_match *match, bool is_mask); -int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb, - const struct ovs_key_ipv4_tunnel *tun_key, - const struct ovs_key_ipv4_tunnel *output); - -bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, - const struct sw_flow_key *key, int key_end); - -struct sw_flow_mask { - int ref_count; - struct rcu_head rcu; - struct list_head list; - struct sw_flow_key_range range; - struct sw_flow_key key; -}; +int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *); -struct sw_flow_mask *ovs_sw_flow_mask_alloc(void); -void ovs_sw_flow_mask_add_ref(struct sw_flow_mask *); -void ovs_sw_flow_mask_del_ref(struct sw_flow_mask *, bool deferred); -void ovs_sw_flow_mask_insert(struct flow_table *, struct sw_flow_mask *); -struct sw_flow_mask *ovs_sw_flow_mask_find(const struct flow_table *, - const struct sw_flow_mask *); -void ovs_flow_key_mask(struct sw_flow_key *dst, const struct sw_flow_key *src, - const struct sw_flow_mask *mask); #endif /* flow.h */ diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c new file mode 100644 index 000000000..515a9f61e --- /dev/null +++ b/datapath/flow_netlink.c @@ -0,0 +1,1603 @@ +/* + * Copyright (c) 2007-2013 Nicira, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#include "flow.h" +#include "datapath.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "flow_netlink.h" + +static void update_range__(struct sw_flow_match *match, + size_t offset, size_t size, bool is_mask) +{ + struct sw_flow_key_range *range = NULL; + size_t start = rounddown(offset, sizeof(long)); + size_t end = roundup(offset + size, sizeof(long)); + + if (!is_mask) + range = &match->range; + else if (match->mask) + range = &match->mask->range; + + if (!range) + return; + + if (range->start == range->end) { + range->start = start; + range->end = end; + return; + } + + if (range->start > start) + range->start = start; + + if (range->end < end) + range->end = end; +} + +#define SW_FLOW_KEY_PUT(match, field, value, is_mask) \ + do { \ + update_range__(match, offsetof(struct sw_flow_key, field), \ + sizeof((match)->key->field), is_mask); \ + if (is_mask) { \ + if ((match)->mask) \ + (match)->mask->key.field = value; \ + } else { \ + (match)->key->field = value; \ + } \ + } while (0) + +#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \ + do { \ + update_range__(match, offsetof(struct sw_flow_key, field), \ + len, is_mask); \ + if (is_mask) { \ + if ((match)->mask) \ + memcpy(&(match)->mask->key.field, value_p, len);\ + } else { \ + memcpy(&(match)->key->field, value_p, len); \ + } \ + } while (0) + +static u16 range_n_bytes(const struct sw_flow_key_range *range) +{ + return range->end - range->start; +} + +static bool match_validate(const struct sw_flow_match *match, + u64 key_attrs, u64 mask_attrs) +{ + u64 key_expected = 1ULL << OVS_KEY_ATTR_ETHERNET; + u64 mask_allowed = key_attrs; /* At most allow all key attributes */ + + /* The following mask attributes allowed only if they + * pass the validation tests. */ + mask_allowed &= ~((1ULL << OVS_KEY_ATTR_IPV4) + | (1ULL << OVS_KEY_ATTR_IPV6) + | (1ULL << OVS_KEY_ATTR_TCP) + | (1ULL << OVS_KEY_ATTR_UDP) + | (1ULL << OVS_KEY_ATTR_SCTP) + | (1ULL << OVS_KEY_ATTR_ICMP) + | (1ULL << OVS_KEY_ATTR_ICMPV6) + | (1ULL << OVS_KEY_ATTR_ARP) + | (1ULL << OVS_KEY_ATTR_ND)); + + /* Always allowed mask fields. */ + mask_allowed |= ((1ULL << OVS_KEY_ATTR_TUNNEL) + | (1ULL << OVS_KEY_ATTR_IN_PORT) + | (1ULL << OVS_KEY_ATTR_ETHERTYPE)); + + /* Check key attributes. */ + if (match->key->eth.type == htons(ETH_P_ARP) + || match->key->eth.type == htons(ETH_P_RARP)) { + key_expected |= 1ULL << OVS_KEY_ATTR_ARP; + if (match->mask && (match->mask->key.eth.type == htons(0xffff))) + mask_allowed |= 1ULL << OVS_KEY_ATTR_ARP; + } + + if (match->key->eth.type == htons(ETH_P_IP)) { + key_expected |= 1ULL << OVS_KEY_ATTR_IPV4; + if (match->mask && (match->mask->key.eth.type == htons(0xffff))) + mask_allowed |= 1ULL << OVS_KEY_ATTR_IPV4; + + if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) { + if (match->key->ip.proto == IPPROTO_UDP) { + key_expected |= 1ULL << OVS_KEY_ATTR_UDP; + if (match->mask && (match->mask->key.ip.proto == 0xff)) + mask_allowed |= 1ULL << OVS_KEY_ATTR_UDP; + } + + if (match->key->ip.proto == IPPROTO_SCTP) { + key_expected |= 1ULL << OVS_KEY_ATTR_SCTP; + if (match->mask && (match->mask->key.ip.proto == 0xff)) + mask_allowed |= 1ULL << OVS_KEY_ATTR_SCTP; + } + + if (match->key->ip.proto == IPPROTO_TCP) { + key_expected |= 1ULL << OVS_KEY_ATTR_TCP; + if (match->mask && (match->mask->key.ip.proto == 0xff)) + mask_allowed |= 1ULL << OVS_KEY_ATTR_TCP; + } + + if (match->key->ip.proto == IPPROTO_ICMP) { + key_expected |= 1ULL << OVS_KEY_ATTR_ICMP; + if (match->mask && (match->mask->key.ip.proto == 0xff)) + mask_allowed |= 1ULL << OVS_KEY_ATTR_ICMP; + } + } + } + + if (match->key->eth.type == htons(ETH_P_IPV6)) { + key_expected |= 1ULL << OVS_KEY_ATTR_IPV6; + if (match->mask && (match->mask->key.eth.type == htons(0xffff))) + mask_allowed |= 1ULL << OVS_KEY_ATTR_IPV6; + + if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) { + if (match->key->ip.proto == IPPROTO_UDP) { + key_expected |= 1ULL << OVS_KEY_ATTR_UDP; + if (match->mask && (match->mask->key.ip.proto == 0xff)) + mask_allowed |= 1ULL << OVS_KEY_ATTR_UDP; + } + + if (match->key->ip.proto == IPPROTO_SCTP) { + key_expected |= 1ULL << OVS_KEY_ATTR_SCTP; + if (match->mask && (match->mask->key.ip.proto == 0xff)) + mask_allowed |= 1ULL << OVS_KEY_ATTR_SCTP; + } + + if (match->key->ip.proto == IPPROTO_TCP) { + key_expected |= 1ULL << OVS_KEY_ATTR_TCP; + if (match->mask && (match->mask->key.ip.proto == 0xff)) + mask_allowed |= 1ULL << OVS_KEY_ATTR_TCP; + } + + if (match->key->ip.proto == IPPROTO_ICMPV6) { + key_expected |= 1ULL << OVS_KEY_ATTR_ICMPV6; + if (match->mask && (match->mask->key.ip.proto == 0xff)) + mask_allowed |= 1ULL << OVS_KEY_ATTR_ICMPV6; + + if (match->key->ipv6.tp.src == + htons(NDISC_NEIGHBOUR_SOLICITATION) || + match->key->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) { + key_expected |= 1ULL << OVS_KEY_ATTR_ND; + if (match->mask && (match->mask->key.ipv6.tp.src == htons(0xffff))) + mask_allowed |= 1ULL << OVS_KEY_ATTR_ND; + } + } + } + } + + if ((key_attrs & key_expected) != key_expected) { + /* Key attributes check failed. */ + OVS_NLERR("Missing expected key attributes (key_attrs=%llx, expected=%llx).\n", + key_attrs, key_expected); + return false; + } + + if ((mask_attrs & mask_allowed) != mask_attrs) { + /* Mask attributes check failed. */ + OVS_NLERR("Contain more than allowed mask fields (mask_attrs=%llx, mask_allowed=%llx).\n", + mask_attrs, mask_allowed); + return false; + } + + return true; +} + +/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ +static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { + [OVS_KEY_ATTR_ENCAP] = -1, + [OVS_KEY_ATTR_PRIORITY] = sizeof(u32), + [OVS_KEY_ATTR_IN_PORT] = sizeof(u32), + [OVS_KEY_ATTR_SKB_MARK] = sizeof(u32), + [OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet), + [OVS_KEY_ATTR_VLAN] = sizeof(__be16), + [OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16), + [OVS_KEY_ATTR_IPV4] = sizeof(struct ovs_key_ipv4), + [OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6), + [OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp), + [OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp), + [OVS_KEY_ATTR_SCTP] = sizeof(struct ovs_key_sctp), + [OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp), + [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6), + [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp), + [OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd), + [OVS_KEY_ATTR_TUNNEL] = -1, +}; + +static bool is_all_zero(const u8 *fp, size_t size) +{ + int i; + + if (!fp) + return false; + + for (i = 0; i < size; i++) + if (fp[i]) + return false; + + return true; +} + +static int __parse_flow_nlattrs(const struct nlattr *attr, + const struct nlattr *a[], + u64 *attrsp, bool nz) +{ + const struct nlattr *nla; + u64 attrs; + int rem; + + attrs = *attrsp; + nla_for_each_nested(nla, attr, rem) { + u16 type = nla_type(nla); + int expected_len; + + if (type > OVS_KEY_ATTR_MAX) { + OVS_NLERR("Unknown key attribute (type=%d, max=%d).\n", + type, OVS_KEY_ATTR_MAX); + return -EINVAL; + } + + if (attrs & (1ULL << type)) { + OVS_NLERR("Duplicate key attribute (type %d).\n", type); + return -EINVAL; + } + + expected_len = ovs_key_lens[type]; + if (nla_len(nla) != expected_len && expected_len != -1) { + OVS_NLERR("Key attribute has unexpected length (type=%d" + ", length=%d, expected=%d).\n", type, + nla_len(nla), expected_len); + return -EINVAL; + } + + if (!nz || !is_all_zero(nla_data(nla), expected_len)) { + attrs |= 1ULL << type; + a[type] = nla; + } + } + if (rem) { + OVS_NLERR("Message has %d unknown bytes.\n", rem); + return -EINVAL; + } + + *attrsp = attrs; + return 0; +} + +static int parse_flow_mask_nlattrs(const struct nlattr *attr, + const struct nlattr *a[], u64 *attrsp) +{ + return __parse_flow_nlattrs(attr, a, attrsp, true); +} + +static int parse_flow_nlattrs(const struct nlattr *attr, + const struct nlattr *a[], u64 *attrsp) +{ + return __parse_flow_nlattrs(attr, a, attrsp, false); +} + +static int ipv4_tun_from_nlattr(const struct nlattr *attr, + struct sw_flow_match *match, bool is_mask) +{ + struct nlattr *a; + int rem; + bool ttl = false; + __be16 tun_flags = 0; + + nla_for_each_nested(a, attr, rem) { + int type = nla_type(a); + static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = { + [OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64), + [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32), + [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = sizeof(u32), + [OVS_TUNNEL_KEY_ATTR_TOS] = 1, + [OVS_TUNNEL_KEY_ATTR_TTL] = 1, + [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0, + [OVS_TUNNEL_KEY_ATTR_CSUM] = 0, + }; + + if (type > OVS_TUNNEL_KEY_ATTR_MAX) { + OVS_NLERR("Unknown IPv4 tunnel attribute (type=%d, max=%d).\n", + type, OVS_TUNNEL_KEY_ATTR_MAX); + return -EINVAL; + } + + if (ovs_tunnel_key_lens[type] != nla_len(a)) { + OVS_NLERR("IPv4 tunnel attribute type has unexpected " + " length (type=%d, length=%d, expected=%d).\n", + type, nla_len(a), ovs_tunnel_key_lens[type]); + return -EINVAL; + } + + switch (type) { + case OVS_TUNNEL_KEY_ATTR_ID: + SW_FLOW_KEY_PUT(match, tun_key.tun_id, + nla_get_be64(a), is_mask); + tun_flags |= TUNNEL_KEY; + break; + case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: + SW_FLOW_KEY_PUT(match, tun_key.ipv4_src, + nla_get_be32(a), is_mask); + break; + case OVS_TUNNEL_KEY_ATTR_IPV4_DST: + SW_FLOW_KEY_PUT(match, tun_key.ipv4_dst, + nla_get_be32(a), is_mask); + break; + case OVS_TUNNEL_KEY_ATTR_TOS: + SW_FLOW_KEY_PUT(match, tun_key.ipv4_tos, + nla_get_u8(a), is_mask); + break; + case OVS_TUNNEL_KEY_ATTR_TTL: + SW_FLOW_KEY_PUT(match, tun_key.ipv4_ttl, + nla_get_u8(a), is_mask); + ttl = true; + break; + case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: + tun_flags |= TUNNEL_DONT_FRAGMENT; + break; + case OVS_TUNNEL_KEY_ATTR_CSUM: + tun_flags |= TUNNEL_CSUM; + break; + default: + return -EINVAL; + } + } + + SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask); + + if (rem > 0) { + OVS_NLERR("IPv4 tunnel attribute has %d unknown bytes.\n", rem); + return -EINVAL; + } + + if (!is_mask) { + if (!match->key->tun_key.ipv4_dst) { + OVS_NLERR("IPv4 tunnel destination address is zero.\n"); + return -EINVAL; + } + + if (!ttl) { + OVS_NLERR("IPv4 tunnel TTL not specified.\n"); + return -EINVAL; + } + } + + return 0; +} + +static int ipv4_tun_to_nlattr(struct sk_buff *skb, + const struct ovs_key_ipv4_tunnel *tun_key, + const struct ovs_key_ipv4_tunnel *output) +{ + struct nlattr *nla; + + nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL); + if (!nla) + return -EMSGSIZE; + + if (output->tun_flags & TUNNEL_KEY && + nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id)) + return -EMSGSIZE; + if (output->ipv4_src && + nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, output->ipv4_src)) + return -EMSGSIZE; + if (output->ipv4_dst && + nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, output->ipv4_dst)) + return -EMSGSIZE; + if (output->ipv4_tos && + nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->ipv4_tos)) + return -EMSGSIZE; + if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ipv4_ttl)) + return -EMSGSIZE; + if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) && + nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT)) + return -EMSGSIZE; + if ((output->tun_flags & TUNNEL_CSUM) && + nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM)) + return -EMSGSIZE; + + nla_nest_end(skb, nla); + return 0; +} + + +static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, + const struct nlattr **a, bool is_mask) +{ + if (*attrs & (1ULL << OVS_KEY_ATTR_PRIORITY)) { + SW_FLOW_KEY_PUT(match, phy.priority, + nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask); + *attrs &= ~(1ULL << OVS_KEY_ATTR_PRIORITY); + } + + if (*attrs & (1ULL << OVS_KEY_ATTR_IN_PORT)) { + u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]); + + if (is_mask) + in_port = 0xffffffff; /* Always exact match in_port. */ + else if (in_port >= DP_MAX_PORTS) + return -EINVAL; + + SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask); + *attrs &= ~(1ULL << OVS_KEY_ATTR_IN_PORT); + } else if (!is_mask) { + SW_FLOW_KEY_PUT(match, phy.in_port, DP_MAX_PORTS, is_mask); + } + + if (*attrs & (1ULL << OVS_KEY_ATTR_SKB_MARK)) { + uint32_t mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]); + + SW_FLOW_KEY_PUT(match, phy.skb_mark, mark, is_mask); + *attrs &= ~(1ULL << OVS_KEY_ATTR_SKB_MARK); + } + if (*attrs & (1ULL << OVS_KEY_ATTR_TUNNEL)) { + if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match, + is_mask)) + return -EINVAL; + *attrs &= ~(1ULL << OVS_KEY_ATTR_TUNNEL); + } + return 0; +} + +static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, + const struct nlattr **a, bool is_mask) +{ + int err; + u64 orig_attrs = attrs; + + err = metadata_from_nlattrs(match, &attrs, a, is_mask); + if (err) + return err; + + if (attrs & (1ULL << OVS_KEY_ATTR_ETHERNET)) { + const struct ovs_key_ethernet *eth_key; + + eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]); + SW_FLOW_KEY_MEMCPY(match, eth.src, + eth_key->eth_src, ETH_ALEN, is_mask); + SW_FLOW_KEY_MEMCPY(match, eth.dst, + eth_key->eth_dst, ETH_ALEN, is_mask); + attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERNET); + } + + if (attrs & (1ULL << OVS_KEY_ATTR_VLAN)) { + __be16 tci; + + tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); + if (!(tci & htons(VLAN_TAG_PRESENT))) { + if (is_mask) + OVS_NLERR("VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit.\n"); + else + OVS_NLERR("VLAN TCI does not have VLAN_TAG_PRESENT bit set.\n"); + + return -EINVAL; + } + + SW_FLOW_KEY_PUT(match, eth.tci, tci, is_mask); + attrs &= ~(1ULL << OVS_KEY_ATTR_VLAN); + } else if (!is_mask) + SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true); + + if (attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE)) { + __be16 eth_type; + + eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); + if (is_mask) { + /* Always exact match EtherType. */ + eth_type = htons(0xffff); + } else if (ntohs(eth_type) < ETH_P_802_3_MIN) { + OVS_NLERR("EtherType is less than minimum (type=%x, min=%x).\n", + ntohs(eth_type), ETH_P_802_3_MIN); + return -EINVAL; + } + + SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask); + attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERTYPE); + } else if (!is_mask) { + SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask); + } + + if (attrs & (1ULL << OVS_KEY_ATTR_IPV4)) { + const struct ovs_key_ipv4 *ipv4_key; + + ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]); + if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) { + OVS_NLERR("Unknown IPv4 fragment type (value=%d, max=%d).\n", + ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX); + return -EINVAL; + } + SW_FLOW_KEY_PUT(match, ip.proto, + ipv4_key->ipv4_proto, is_mask); + SW_FLOW_KEY_PUT(match, ip.tos, + ipv4_key->ipv4_tos, is_mask); + SW_FLOW_KEY_PUT(match, ip.ttl, + ipv4_key->ipv4_ttl, is_mask); + SW_FLOW_KEY_PUT(match, ip.frag, + ipv4_key->ipv4_frag, is_mask); + SW_FLOW_KEY_PUT(match, ipv4.addr.src, + ipv4_key->ipv4_src, is_mask); + SW_FLOW_KEY_PUT(match, ipv4.addr.dst, + ipv4_key->ipv4_dst, is_mask); + attrs &= ~(1ULL << OVS_KEY_ATTR_IPV4); + } + + if (attrs & (1ULL << OVS_KEY_ATTR_IPV6)) { + const struct ovs_key_ipv6 *ipv6_key; + + ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]); + if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) { + OVS_NLERR("Unknown IPv6 fragment type (value=%d, max=%d).\n", + ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX); + return -EINVAL; + } + SW_FLOW_KEY_PUT(match, ipv6.label, + ipv6_key->ipv6_label, is_mask); + SW_FLOW_KEY_PUT(match, ip.proto, + ipv6_key->ipv6_proto, is_mask); + SW_FLOW_KEY_PUT(match, ip.tos, + ipv6_key->ipv6_tclass, is_mask); + SW_FLOW_KEY_PUT(match, ip.ttl, + ipv6_key->ipv6_hlimit, is_mask); + SW_FLOW_KEY_PUT(match, ip.frag, + ipv6_key->ipv6_frag, is_mask); + SW_FLOW_KEY_MEMCPY(match, ipv6.addr.src, + ipv6_key->ipv6_src, + sizeof(match->key->ipv6.addr.src), + is_mask); + SW_FLOW_KEY_MEMCPY(match, ipv6.addr.dst, + ipv6_key->ipv6_dst, + sizeof(match->key->ipv6.addr.dst), + is_mask); + + attrs &= ~(1ULL << OVS_KEY_ATTR_IPV6); + } + + if (attrs & (1ULL << OVS_KEY_ATTR_ARP)) { + const struct ovs_key_arp *arp_key; + + arp_key = nla_data(a[OVS_KEY_ATTR_ARP]); + if (!is_mask && (arp_key->arp_op & htons(0xff00))) { + OVS_NLERR("Unknown ARP opcode (opcode=%d).\n", + arp_key->arp_op); + return -EINVAL; + } + + SW_FLOW_KEY_PUT(match, ipv4.addr.src, + arp_key->arp_sip, is_mask); + SW_FLOW_KEY_PUT(match, ipv4.addr.dst, + arp_key->arp_tip, is_mask); + SW_FLOW_KEY_PUT(match, ip.proto, + ntohs(arp_key->arp_op), is_mask); + SW_FLOW_KEY_MEMCPY(match, ipv4.arp.sha, + arp_key->arp_sha, ETH_ALEN, is_mask); + SW_FLOW_KEY_MEMCPY(match, ipv4.arp.tha, + arp_key->arp_tha, ETH_ALEN, is_mask); + + attrs &= ~(1ULL << OVS_KEY_ATTR_ARP); + } + + if (attrs & (1ULL << OVS_KEY_ATTR_TCP)) { + const struct ovs_key_tcp *tcp_key; + + tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]); + if (orig_attrs & (1ULL << OVS_KEY_ATTR_IPV4)) { + SW_FLOW_KEY_PUT(match, ipv4.tp.src, + tcp_key->tcp_src, is_mask); + SW_FLOW_KEY_PUT(match, ipv4.tp.dst, + tcp_key->tcp_dst, is_mask); + } else { + SW_FLOW_KEY_PUT(match, ipv6.tp.src, + tcp_key->tcp_src, is_mask); + SW_FLOW_KEY_PUT(match, ipv6.tp.dst, + tcp_key->tcp_dst, is_mask); + } + attrs &= ~(1ULL << OVS_KEY_ATTR_TCP); + } + + if (attrs & (1ULL << OVS_KEY_ATTR_UDP)) { + const struct ovs_key_udp *udp_key; + + udp_key = nla_data(a[OVS_KEY_ATTR_UDP]); + if (orig_attrs & (1ULL << OVS_KEY_ATTR_IPV4)) { + SW_FLOW_KEY_PUT(match, ipv4.tp.src, + udp_key->udp_src, is_mask); + SW_FLOW_KEY_PUT(match, ipv4.tp.dst, + udp_key->udp_dst, is_mask); + } else { + SW_FLOW_KEY_PUT(match, ipv6.tp.src, + udp_key->udp_src, is_mask); + SW_FLOW_KEY_PUT(match, ipv6.tp.dst, + udp_key->udp_dst, is_mask); + } + attrs &= ~(1ULL << OVS_KEY_ATTR_UDP); + } + + if (attrs & (1ULL << OVS_KEY_ATTR_SCTP)) { + const struct ovs_key_sctp *sctp_key; + + sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]); + if (orig_attrs & (1ULL << OVS_KEY_ATTR_IPV4)) { + SW_FLOW_KEY_PUT(match, ipv4.tp.src, + sctp_key->sctp_src, is_mask); + SW_FLOW_KEY_PUT(match, ipv4.tp.dst, + sctp_key->sctp_dst, is_mask); + } else { + SW_FLOW_KEY_PUT(match, ipv6.tp.src, + sctp_key->sctp_src, is_mask); + SW_FLOW_KEY_PUT(match, ipv6.tp.dst, + sctp_key->sctp_dst, is_mask); + } + attrs &= ~(1ULL << OVS_KEY_ATTR_SCTP); + } + + if (attrs & (1ULL << OVS_KEY_ATTR_ICMP)) { + const struct ovs_key_icmp *icmp_key; + + icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]); + SW_FLOW_KEY_PUT(match, ipv4.tp.src, + htons(icmp_key->icmp_type), is_mask); + SW_FLOW_KEY_PUT(match, ipv4.tp.dst, + htons(icmp_key->icmp_code), is_mask); + attrs &= ~(1ULL << OVS_KEY_ATTR_ICMP); + } + + if (attrs & (1ULL << OVS_KEY_ATTR_ICMPV6)) { + const struct ovs_key_icmpv6 *icmpv6_key; + + icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]); + SW_FLOW_KEY_PUT(match, ipv6.tp.src, + htons(icmpv6_key->icmpv6_type), is_mask); + SW_FLOW_KEY_PUT(match, ipv6.tp.dst, + htons(icmpv6_key->icmpv6_code), is_mask); + attrs &= ~(1ULL << OVS_KEY_ATTR_ICMPV6); + } + + if (attrs & (1ULL << OVS_KEY_ATTR_ND)) { + const struct ovs_key_nd *nd_key; + + nd_key = nla_data(a[OVS_KEY_ATTR_ND]); + SW_FLOW_KEY_MEMCPY(match, ipv6.nd.target, + nd_key->nd_target, + sizeof(match->key->ipv6.nd.target), + is_mask); + SW_FLOW_KEY_MEMCPY(match, ipv6.nd.sll, + nd_key->nd_sll, ETH_ALEN, is_mask); + SW_FLOW_KEY_MEMCPY(match, ipv6.nd.tll, + nd_key->nd_tll, ETH_ALEN, is_mask); + attrs &= ~(1ULL << OVS_KEY_ATTR_ND); + } + + if (attrs != 0) + return -EINVAL; + + return 0; +} + +static void sw_flow_mask_set(struct sw_flow_mask *mask, + struct sw_flow_key_range *range, u8 val) +{ + u8 *m = (u8 *)&mask->key + range->start; + + mask->range = *range; + memset(m, val, range_n_bytes(range)); +} + +/** + * ovs_nla_get_match - parses Netlink attributes into a flow key and + * mask. In case the 'mask' is NULL, the flow is treated as exact match + * flow. Otherwise, it is treated as a wildcarded flow, except the mask + * does not include any don't care bit. + * @match: receives the extracted flow match information. + * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute + * sequence. The fields should of the packet that triggered the creation + * of this flow. + * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink + * attribute specifies the mask field of the wildcarded flow. + */ +int ovs_nla_get_match(struct sw_flow_match *match, + const struct nlattr *key, + const struct nlattr *mask) +{ + const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; + const struct nlattr *encap; + u64 key_attrs = 0; + u64 mask_attrs = 0; + bool encap_valid = false; + int err; + + err = parse_flow_nlattrs(key, a, &key_attrs); + if (err) + return err; + + if ((key_attrs & (1ULL << OVS_KEY_ATTR_ETHERNET)) && + (key_attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE)) && + (nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q))) { + __be16 tci; + + if (!((key_attrs & (1ULL << OVS_KEY_ATTR_VLAN)) && + (key_attrs & (1ULL << OVS_KEY_ATTR_ENCAP)))) { + OVS_NLERR("Invalid Vlan frame.\n"); + return -EINVAL; + } + + key_attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERTYPE); + tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); + encap = a[OVS_KEY_ATTR_ENCAP]; + key_attrs &= ~(1ULL << OVS_KEY_ATTR_ENCAP); + encap_valid = true; + + if (tci & htons(VLAN_TAG_PRESENT)) { + err = parse_flow_nlattrs(encap, a, &key_attrs); + if (err) + return err; + } else if (!tci) { + /* Corner case for truncated 802.1Q header. */ + if (nla_len(encap)) { + OVS_NLERR("Truncated 802.1Q header has non-zero encap attribute.\n"); + return -EINVAL; + } + } else { + OVS_NLERR("Encap attribute is set for a non-VLAN frame.\n"); + return -EINVAL; + } + } + + err = ovs_key_from_nlattrs(match, key_attrs, a, false); + if (err) + return err; + + if (mask) { + err = parse_flow_mask_nlattrs(mask, a, &mask_attrs); + if (err) + return err; + + if (mask_attrs & 1ULL << OVS_KEY_ATTR_ENCAP) { + __be16 eth_type = 0; + __be16 tci = 0; + + if (!encap_valid) { + OVS_NLERR("Encap mask attribute is set for non-VLAN frame.\n"); + return -EINVAL; + } + + mask_attrs &= ~(1ULL << OVS_KEY_ATTR_ENCAP); + if (a[OVS_KEY_ATTR_ETHERTYPE]) + eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); + + if (eth_type == htons(0xffff)) { + mask_attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERTYPE); + encap = a[OVS_KEY_ATTR_ENCAP]; + err = parse_flow_mask_nlattrs(encap, a, &mask_attrs); + } else { + OVS_NLERR("VLAN frames must have an exact match on the TPID (mask=%x).\n", + ntohs(eth_type)); + return -EINVAL; + } + + if (a[OVS_KEY_ATTR_VLAN]) + tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); + + if (!(tci & htons(VLAN_TAG_PRESENT))) { + OVS_NLERR("VLAN tag present bit must have an exact match (tci_mask=%x).\n", ntohs(tci)); + return -EINVAL; + } + } + + err = ovs_key_from_nlattrs(match, mask_attrs, a, true); + if (err) + return err; + } else { + /* Populate exact match flow's key mask. */ + if (match->mask) + sw_flow_mask_set(match->mask, &match->range, 0xff); + } + + if (!match_validate(match, key_attrs, mask_attrs)) + return -EINVAL; + + return 0; +} + +/** + * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key. + * @flow: Receives extracted in_port, priority, tun_key and skb_mark. + * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute + * sequence. + * + * This parses a series of Netlink attributes that form a flow key, which must + * take the same form accepted by flow_from_nlattrs(), but only enough of it to + * get the metadata, that is, the parts of the flow key that cannot be + * extracted from the packet itself. + */ + +int ovs_nla_get_flow_metadata(struct sw_flow *flow, + const struct nlattr *attr) +{ + struct ovs_key_ipv4_tunnel *tun_key = &flow->key.tun_key; + const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; + u64 attrs = 0; + int err; + struct sw_flow_match match; + + flow->key.phy.in_port = DP_MAX_PORTS; + flow->key.phy.priority = 0; + flow->key.phy.skb_mark = 0; + memset(tun_key, 0, sizeof(flow->key.tun_key)); + + err = parse_flow_nlattrs(attr, a, &attrs); + if (err) + return -EINVAL; + + memset(&match, 0, sizeof(match)); + match.key = &flow->key; + + err = metadata_from_nlattrs(&match, &attrs, a, false); + if (err) + return err; + + return 0; +} + +int ovs_nla_put_flow(const struct sw_flow_key *swkey, + const struct sw_flow_key *output, struct sk_buff *skb) +{ + struct ovs_key_ethernet *eth_key; + struct nlattr *nla, *encap; + bool is_mask = (swkey != output); + + if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority)) + goto nla_put_failure; + + if ((swkey->tun_key.ipv4_dst || is_mask) && + ipv4_tun_to_nlattr(skb, &swkey->tun_key, &output->tun_key)) + goto nla_put_failure; + + if (swkey->phy.in_port == DP_MAX_PORTS) { + if (is_mask && (output->phy.in_port == 0xffff)) + if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff)) + goto nla_put_failure; + } else { + u16 upper_u16; + upper_u16 = !is_mask ? 0 : 0xffff; + + if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, + (upper_u16 << 16) | output->phy.in_port)) + goto nla_put_failure; + } + + if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark)) + goto nla_put_failure; + + nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key)); + if (!nla) + goto nla_put_failure; + + eth_key = nla_data(nla); + memcpy(eth_key->eth_src, output->eth.src, ETH_ALEN); + memcpy(eth_key->eth_dst, output->eth.dst, ETH_ALEN); + + if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) { + __be16 eth_type; + eth_type = !is_mask ? htons(ETH_P_8021Q) : htons(0xffff); + if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) || + nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.tci)) + goto nla_put_failure; + encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); + if (!swkey->eth.tci) + goto unencap; + } else + encap = NULL; + + if (swkey->eth.type == htons(ETH_P_802_2)) { + /* + * Ethertype 802.2 is represented in the netlink with omitted + * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and + * 0xffff in the mask attribute. Ethertype can also + * be wildcarded. + */ + if (is_mask && output->eth.type) + if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, + output->eth.type)) + goto nla_put_failure; + goto unencap; + } + + if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type)) + goto nla_put_failure; + + if (swkey->eth.type == htons(ETH_P_IP)) { + struct ovs_key_ipv4 *ipv4_key; + + nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key)); + if (!nla) + goto nla_put_failure; + ipv4_key = nla_data(nla); + ipv4_key->ipv4_src = output->ipv4.addr.src; + ipv4_key->ipv4_dst = output->ipv4.addr.dst; + ipv4_key->ipv4_proto = output->ip.proto; + ipv4_key->ipv4_tos = output->ip.tos; + ipv4_key->ipv4_ttl = output->ip.ttl; + ipv4_key->ipv4_frag = output->ip.frag; + } else if (swkey->eth.type == htons(ETH_P_IPV6)) { + struct ovs_key_ipv6 *ipv6_key; + + nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key)); + if (!nla) + goto nla_put_failure; + ipv6_key = nla_data(nla); + memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src, + sizeof(ipv6_key->ipv6_src)); + memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst, + sizeof(ipv6_key->ipv6_dst)); + ipv6_key->ipv6_label = output->ipv6.label; + ipv6_key->ipv6_proto = output->ip.proto; + ipv6_key->ipv6_tclass = output->ip.tos; + ipv6_key->ipv6_hlimit = output->ip.ttl; + ipv6_key->ipv6_frag = output->ip.frag; + } else if (swkey->eth.type == htons(ETH_P_ARP) || + swkey->eth.type == htons(ETH_P_RARP)) { + struct ovs_key_arp *arp_key; + + nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key)); + if (!nla) + goto nla_put_failure; + arp_key = nla_data(nla); + memset(arp_key, 0, sizeof(struct ovs_key_arp)); + arp_key->arp_sip = output->ipv4.addr.src; + arp_key->arp_tip = output->ipv4.addr.dst; + arp_key->arp_op = htons(output->ip.proto); + memcpy(arp_key->arp_sha, output->ipv4.arp.sha, ETH_ALEN); + memcpy(arp_key->arp_tha, output->ipv4.arp.tha, ETH_ALEN); + } + + if ((swkey->eth.type == htons(ETH_P_IP) || + swkey->eth.type == htons(ETH_P_IPV6)) && + swkey->ip.frag != OVS_FRAG_TYPE_LATER) { + + if (swkey->ip.proto == IPPROTO_TCP) { + struct ovs_key_tcp *tcp_key; + + nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key)); + if (!nla) + goto nla_put_failure; + tcp_key = nla_data(nla); + if (swkey->eth.type == htons(ETH_P_IP)) { + tcp_key->tcp_src = output->ipv4.tp.src; + tcp_key->tcp_dst = output->ipv4.tp.dst; + } else if (swkey->eth.type == htons(ETH_P_IPV6)) { + tcp_key->tcp_src = output->ipv6.tp.src; + tcp_key->tcp_dst = output->ipv6.tp.dst; + } + } else if (swkey->ip.proto == IPPROTO_UDP) { + struct ovs_key_udp *udp_key; + + nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key)); + if (!nla) + goto nla_put_failure; + udp_key = nla_data(nla); + if (swkey->eth.type == htons(ETH_P_IP)) { + udp_key->udp_src = output->ipv4.tp.src; + udp_key->udp_dst = output->ipv4.tp.dst; + } else if (swkey->eth.type == htons(ETH_P_IPV6)) { + udp_key->udp_src = output->ipv6.tp.src; + udp_key->udp_dst = output->ipv6.tp.dst; + } + } else if (swkey->ip.proto == IPPROTO_SCTP) { + struct ovs_key_sctp *sctp_key; + + nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key)); + if (!nla) + goto nla_put_failure; + sctp_key = nla_data(nla); + if (swkey->eth.type == htons(ETH_P_IP)) { + sctp_key->sctp_src = swkey->ipv4.tp.src; + sctp_key->sctp_dst = swkey->ipv4.tp.dst; + } else if (swkey->eth.type == htons(ETH_P_IPV6)) { + sctp_key->sctp_src = swkey->ipv6.tp.src; + sctp_key->sctp_dst = swkey->ipv6.tp.dst; + } + } else if (swkey->eth.type == htons(ETH_P_IP) && + swkey->ip.proto == IPPROTO_ICMP) { + struct ovs_key_icmp *icmp_key; + + nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key)); + if (!nla) + goto nla_put_failure; + icmp_key = nla_data(nla); + icmp_key->icmp_type = ntohs(output->ipv4.tp.src); + icmp_key->icmp_code = ntohs(output->ipv4.tp.dst); + } else if (swkey->eth.type == htons(ETH_P_IPV6) && + swkey->ip.proto == IPPROTO_ICMPV6) { + struct ovs_key_icmpv6 *icmpv6_key; + + nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6, + sizeof(*icmpv6_key)); + if (!nla) + goto nla_put_failure; + icmpv6_key = nla_data(nla); + icmpv6_key->icmpv6_type = ntohs(output->ipv6.tp.src); + icmpv6_key->icmpv6_code = ntohs(output->ipv6.tp.dst); + + if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION || + icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) { + struct ovs_key_nd *nd_key; + + nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key)); + if (!nla) + goto nla_put_failure; + nd_key = nla_data(nla); + memcpy(nd_key->nd_target, &output->ipv6.nd.target, + sizeof(nd_key->nd_target)); + memcpy(nd_key->nd_sll, output->ipv6.nd.sll, ETH_ALEN); + memcpy(nd_key->nd_tll, output->ipv6.nd.tll, ETH_ALEN); + } + } + } + +unencap: + if (encap) + nla_nest_end(skb, encap); + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +#define MAX_ACTIONS_BUFSIZE (32 * 1024) + +struct sw_flow_actions *ovs_nla_alloc_flow_actions(int size) +{ + struct sw_flow_actions *sfa; + + if (size > MAX_ACTIONS_BUFSIZE) + return ERR_PTR(-EINVAL); + + sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL); + if (!sfa) + return ERR_PTR(-ENOMEM); + + sfa->actions_len = 0; + return sfa; +} + +/* RCU callback used by ovs_nla_free_flow_actions. */ +static void rcu_free_acts_callback(struct rcu_head *rcu) +{ + struct sw_flow_actions *sf_acts = container_of(rcu, + struct sw_flow_actions, rcu); + kfree(sf_acts); +} + +/* Schedules 'sf_acts' to be freed after the next RCU grace period. + * The caller must hold rcu_read_lock for this to be sensible. */ +void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts) +{ + call_rcu(&sf_acts->rcu, rcu_free_acts_callback); +} + +static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, + int attr_len) +{ + + struct sw_flow_actions *acts; + int new_acts_size; + int req_size = NLA_ALIGN(attr_len); + int next_offset = offsetof(struct sw_flow_actions, actions) + + (*sfa)->actions_len; + + if (req_size <= (ksize(*sfa) - next_offset)) + goto out; + + new_acts_size = ksize(*sfa) * 2; + + if (new_acts_size > MAX_ACTIONS_BUFSIZE) { + if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) + return ERR_PTR(-EMSGSIZE); + new_acts_size = MAX_ACTIONS_BUFSIZE; + } + + acts = ovs_nla_alloc_flow_actions(new_acts_size); + if (IS_ERR(acts)) + return (void *)acts; + + memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len); + acts->actions_len = (*sfa)->actions_len; + kfree(*sfa); + *sfa = acts; + +out: + (*sfa)->actions_len += req_size; + return (struct nlattr *) ((unsigned char *)(*sfa) + next_offset); +} + +static int add_action(struct sw_flow_actions **sfa, int attrtype, void *data, int len) +{ + struct nlattr *a; + + a = reserve_sfa_size(sfa, nla_attr_size(len)); + if (IS_ERR(a)) + return PTR_ERR(a); + + a->nla_type = attrtype; + a->nla_len = nla_attr_size(len); + + if (data) + memcpy(nla_data(a), data, len); + memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len)); + + return 0; +} + +static inline int add_nested_action_start(struct sw_flow_actions **sfa, + int attrtype) +{ + int used = (*sfa)->actions_len; + int err; + + err = add_action(sfa, attrtype, NULL, 0); + if (err) + return err; + + return used; +} + +static inline void add_nested_action_end(struct sw_flow_actions *sfa, + int st_offset) +{ + struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions + + st_offset); + + a->nla_len = sfa->actions_len - st_offset; +} + +static int validate_and_copy_sample(const struct nlattr *attr, + const struct sw_flow_key *key, int depth, + struct sw_flow_actions **sfa) +{ + const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; + const struct nlattr *probability, *actions; + const struct nlattr *a; + int rem, start, err, st_acts; + + memset(attrs, 0, sizeof(attrs)); + nla_for_each_nested(a, attr, rem) { + int type = nla_type(a); + if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type]) + return -EINVAL; + attrs[type] = a; + } + if (rem) + return -EINVAL; + + probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY]; + if (!probability || nla_len(probability) != sizeof(u32)) + return -EINVAL; + + actions = attrs[OVS_SAMPLE_ATTR_ACTIONS]; + if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN)) + return -EINVAL; + + /* validation done, copy sample action. */ + start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE); + if (start < 0) + return start; + err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY, + nla_data(probability), sizeof(u32)); + if (err) + return err; + st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS); + if (st_acts < 0) + return st_acts; + + err = ovs_nla_copy_actions(actions, key, depth + 1, sfa); + if (err) + return err; + + add_nested_action_end(*sfa, st_acts); + add_nested_action_end(*sfa, start); + + return 0; +} + +static int validate_tp_port(const struct sw_flow_key *flow_key) +{ + if (flow_key->eth.type == htons(ETH_P_IP)) { + if (flow_key->ipv4.tp.src || flow_key->ipv4.tp.dst) + return 0; + } else if (flow_key->eth.type == htons(ETH_P_IPV6)) { + if (flow_key->ipv6.tp.src || flow_key->ipv6.tp.dst) + return 0; + } + + return -EINVAL; +} + +void ovs_match_init(struct sw_flow_match *match, + struct sw_flow_key *key, + struct sw_flow_mask *mask) +{ + memset(match, 0, sizeof(*match)); + match->key = key; + match->mask = mask; + + memset(key, 0, sizeof(*key)); + + if (mask) { + memset(&mask->key, 0, sizeof(mask->key)); + mask->range.start = mask->range.end = 0; + } +} + +static int validate_and_copy_set_tun(const struct nlattr *attr, + struct sw_flow_actions **sfa) +{ + struct sw_flow_match match; + struct sw_flow_key key; + int err, start; + + ovs_match_init(&match, &key, NULL); + err = ipv4_tun_from_nlattr(nla_data(attr), &match, false); + if (err) + return err; + + start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET); + if (start < 0) + return start; + + err = add_action(sfa, OVS_KEY_ATTR_IPV4_TUNNEL, &match.key->tun_key, + sizeof(match.key->tun_key)); + add_nested_action_end(*sfa, start); + + return err; +} + +static int validate_set(const struct nlattr *a, + const struct sw_flow_key *flow_key, + struct sw_flow_actions **sfa, + bool *set_tun) +{ + const struct nlattr *ovs_key = nla_data(a); + int key_type = nla_type(ovs_key); + + /* There can be only one key in a action */ + if (nla_total_size(nla_len(ovs_key)) != nla_len(a)) + return -EINVAL; + + if (key_type > OVS_KEY_ATTR_MAX || + (ovs_key_lens[key_type] != nla_len(ovs_key) && + ovs_key_lens[key_type] != -1)) + return -EINVAL; + + switch (key_type) { + const struct ovs_key_ipv4 *ipv4_key; + const struct ovs_key_ipv6 *ipv6_key; + int err; + + case OVS_KEY_ATTR_PRIORITY: + case OVS_KEY_ATTR_SKB_MARK: + case OVS_KEY_ATTR_ETHERNET: + break; + + case OVS_KEY_ATTR_TUNNEL: + *set_tun = true; + err = validate_and_copy_set_tun(a, sfa); + if (err) + return err; + break; + + case OVS_KEY_ATTR_IPV4: + if (flow_key->eth.type != htons(ETH_P_IP)) + return -EINVAL; + + if (!flow_key->ip.proto) + return -EINVAL; + + ipv4_key = nla_data(ovs_key); + if (ipv4_key->ipv4_proto != flow_key->ip.proto) + return -EINVAL; + + if (ipv4_key->ipv4_frag != flow_key->ip.frag) + return -EINVAL; + + break; + + case OVS_KEY_ATTR_IPV6: + if (flow_key->eth.type != htons(ETH_P_IPV6)) + return -EINVAL; + + if (!flow_key->ip.proto) + return -EINVAL; + + ipv6_key = nla_data(ovs_key); + if (ipv6_key->ipv6_proto != flow_key->ip.proto) + return -EINVAL; + + if (ipv6_key->ipv6_frag != flow_key->ip.frag) + return -EINVAL; + + if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000) + return -EINVAL; + + break; + + case OVS_KEY_ATTR_TCP: + if (flow_key->ip.proto != IPPROTO_TCP) + return -EINVAL; + + return validate_tp_port(flow_key); + + case OVS_KEY_ATTR_UDP: + if (flow_key->ip.proto != IPPROTO_UDP) + return -EINVAL; + + return validate_tp_port(flow_key); + + case OVS_KEY_ATTR_SCTP: + if (flow_key->ip.proto != IPPROTO_SCTP) + return -EINVAL; + + return validate_tp_port(flow_key); + + default: + return -EINVAL; + } + + return 0; +} + +static int validate_userspace(const struct nlattr *attr) +{ + static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = { + [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 }, + [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC }, + }; + struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1]; + int error; + + error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX, + attr, userspace_policy); + if (error) + return error; + + if (!a[OVS_USERSPACE_ATTR_PID] || + !nla_get_u32(a[OVS_USERSPACE_ATTR_PID])) + return -EINVAL; + + return 0; +} + +static int copy_action(const struct nlattr *from, + struct sw_flow_actions **sfa) +{ + int totlen = NLA_ALIGN(from->nla_len); + struct nlattr *to; + + to = reserve_sfa_size(sfa, from->nla_len); + if (IS_ERR(to)) + return PTR_ERR(to); + + memcpy(to, from, totlen); + return 0; +} + +int ovs_nla_copy_actions(const struct nlattr *attr, + const struct sw_flow_key *key, + int depth, + struct sw_flow_actions **sfa) +{ + const struct nlattr *a; + int rem, err; + + if (depth >= SAMPLE_ACTION_DEPTH) + return -EOVERFLOW; + + nla_for_each_nested(a, attr, rem) { + /* Expected argument lengths, (u32)-1 for variable length. */ + static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = { + [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32), + [OVS_ACTION_ATTR_USERSPACE] = (u32)-1, + [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan), + [OVS_ACTION_ATTR_POP_VLAN] = 0, + [OVS_ACTION_ATTR_SET] = (u32)-1, + [OVS_ACTION_ATTR_SAMPLE] = (u32)-1 + }; + const struct ovs_action_push_vlan *vlan; + int type = nla_type(a); + bool skip_copy; + + if (type > OVS_ACTION_ATTR_MAX || + (action_lens[type] != nla_len(a) && + action_lens[type] != (u32)-1)) + return -EINVAL; + + skip_copy = false; + switch (type) { + case OVS_ACTION_ATTR_UNSPEC: + return -EINVAL; + + case OVS_ACTION_ATTR_USERSPACE: + err = validate_userspace(a); + if (err) + return err; + break; + + case OVS_ACTION_ATTR_OUTPUT: + if (nla_get_u32(a) >= DP_MAX_PORTS) + return -EINVAL; + break; + + + case OVS_ACTION_ATTR_POP_VLAN: + break; + + case OVS_ACTION_ATTR_PUSH_VLAN: + vlan = nla_data(a); + if (vlan->vlan_tpid != htons(ETH_P_8021Q)) + return -EINVAL; + if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT))) + return -EINVAL; + break; + + case OVS_ACTION_ATTR_SET: + err = validate_set(a, key, sfa, &skip_copy); + if (err) + return err; + break; + + case OVS_ACTION_ATTR_SAMPLE: + err = validate_and_copy_sample(a, key, depth, sfa); + if (err) + return err; + skip_copy = true; + break; + + default: + return -EINVAL; + } + if (!skip_copy) { + err = copy_action(a, sfa); + if (err) + return err; + } + } + + if (rem > 0) + return -EINVAL; + + return 0; +} + +static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb) +{ + const struct nlattr *a; + struct nlattr *start; + int err = 0, rem; + + start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE); + if (!start) + return -EMSGSIZE; + + nla_for_each_nested(a, attr, rem) { + int type = nla_type(a); + struct nlattr *st_sample; + + switch (type) { + case OVS_SAMPLE_ATTR_PROBABILITY: + if (nla_put(skb, OVS_SAMPLE_ATTR_PROBABILITY, + sizeof(u32), nla_data(a))) + return -EMSGSIZE; + break; + case OVS_SAMPLE_ATTR_ACTIONS: + st_sample = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS); + if (!st_sample) + return -EMSGSIZE; + err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb); + if (err) + return err; + nla_nest_end(skb, st_sample); + break; + } + } + + nla_nest_end(skb, start); + return err; +} + +static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb) +{ + const struct nlattr *ovs_key = nla_data(a); + int key_type = nla_type(ovs_key); + struct nlattr *start; + int err; + + switch (key_type) { + case OVS_KEY_ATTR_IPV4_TUNNEL: + start = nla_nest_start(skb, OVS_ACTION_ATTR_SET); + if (!start) + return -EMSGSIZE; + + err = ipv4_tun_to_nlattr(skb, nla_data(ovs_key), + nla_data(ovs_key)); + if (err) + return err; + nla_nest_end(skb, start); + break; + default: + if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key)) + return -EMSGSIZE; + break; + } + + return 0; +} + +int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb) +{ + const struct nlattr *a; + int rem, err; + + nla_for_each_attr(a, attr, len, rem) { + int type = nla_type(a); + + switch (type) { + case OVS_ACTION_ATTR_SET: + err = set_action_to_attr(a, skb); + if (err) + return err; + break; + + case OVS_ACTION_ATTR_SAMPLE: + err = sample_action_to_attr(a, skb); + if (err) + return err; + break; + default: + if (nla_put(skb, type, nla_len(a), nla_data(a))) + return -EMSGSIZE; + break; + } + } + + return 0; +} diff --git a/datapath/flow_netlink.h b/datapath/flow_netlink.h new file mode 100644 index 000000000..440151045 --- /dev/null +++ b/datapath/flow_netlink.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2007-2013 Nicira, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + + +#ifndef FLOW_NETLINK_H +#define FLOW_NETLINK_H 1 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "flow.h" + +void ovs_match_init(struct sw_flow_match *match, + struct sw_flow_key *key, struct sw_flow_mask *mask); + +int ovs_nla_put_flow(const struct sw_flow_key *, + const struct sw_flow_key *, struct sk_buff *); +int ovs_nla_get_flow_metadata(struct sw_flow *flow, + const struct nlattr *attr); +int ovs_nla_get_match(struct sw_flow_match *match, + const struct nlattr *, + const struct nlattr *); + +int ovs_nla_copy_actions(const struct nlattr *attr, + const struct sw_flow_key *key, int depth, + struct sw_flow_actions **sfa); +int ovs_nla_put_actions(const struct nlattr *attr, + int len, struct sk_buff *skb); + +struct sw_flow_actions *ovs_nla_alloc_flow_actions(int actions_len); +void ovs_nla_free_flow_actions(struct sw_flow_actions *); + +#endif /* flow_netlink.h */ diff --git a/datapath/flow_table.c b/datapath/flow_table.c new file mode 100644 index 000000000..98eb809d1 --- /dev/null +++ b/datapath/flow_table.c @@ -0,0 +1,579 @@ +/* + * Copyright (c) 2007-2013 Nicira, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#include "flow.h" +#include "datapath.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "datapath.h" +#include "vlan.h" + +#define TBL_MIN_BUCKETS 1024 +#define REHASH_INTERVAL (10 * 60 * HZ) + +static struct kmem_cache *flow_cache; + +static u16 range_n_bytes(const struct sw_flow_key_range *range) +{ + return range->end - range->start; +} + +void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, + const struct sw_flow_mask *mask) +{ + const long *m = (long *)((u8 *)&mask->key + mask->range.start); + const long *s = (long *)((u8 *)src + mask->range.start); + long *d = (long *)((u8 *)dst + mask->range.start); + int i; + + /* The memory outside of the 'mask->range' are not set since + * further operations on 'dst' only uses contents within + * 'mask->range'. + */ + for (i = 0; i < range_n_bytes(&mask->range); i += sizeof(long)) + *d++ = *s++ & *m++; +} + +struct sw_flow *ovs_flow_alloc(void) +{ + struct sw_flow *flow; + + flow = kmem_cache_alloc(flow_cache, GFP_KERNEL); + if (!flow) + return ERR_PTR(-ENOMEM); + + spin_lock_init(&flow->lock); + flow->sf_acts = NULL; + flow->mask = NULL; + + return flow; +} + +int ovs_flow_tbl_count(struct flow_table *table) +{ + return table->count; +} + +static struct flex_array *alloc_buckets(unsigned int n_buckets) +{ + struct flex_array *buckets; + int i, err; + + buckets = flex_array_alloc(sizeof(struct hlist_head), + n_buckets, GFP_KERNEL); + if (!buckets) + return NULL; + + err = flex_array_prealloc(buckets, 0, n_buckets, GFP_KERNEL); + if (err) { + flex_array_free(buckets); + return NULL; + } + + for (i = 0; i < n_buckets; i++) + INIT_HLIST_HEAD((struct hlist_head *) + flex_array_get(buckets, i)); + + return buckets; +} + +static void flow_free(struct sw_flow *flow) +{ + kfree((struct sf_flow_acts __force *)flow->sf_acts); + kmem_cache_free(flow_cache, flow); +} + +static void rcu_free_flow_callback(struct rcu_head *rcu) +{ + struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu); + + flow_free(flow); +} + +static void rcu_free_sw_flow_mask_cb(struct rcu_head *rcu) +{ + struct sw_flow_mask *mask = container_of(rcu, struct sw_flow_mask, rcu); + + kfree(mask); +} + +static void flow_mask_del_ref(struct sw_flow_mask *mask, bool deferred) +{ + if (!mask) + return; + + BUG_ON(!mask->ref_count); + mask->ref_count--; + + if (!mask->ref_count) { + list_del_rcu(&mask->list); + if (deferred) + call_rcu(&mask->rcu, rcu_free_sw_flow_mask_cb); + else + kfree(mask); + } +} + +void ovs_flow_free(struct sw_flow *flow, bool deferred) +{ + if (!flow) + return; + + flow_mask_del_ref(flow->mask, deferred); + + if (deferred) + call_rcu(&flow->rcu, rcu_free_flow_callback); + else + flow_free(flow); +} + +static void free_buckets(struct flex_array *buckets) +{ + flex_array_free(buckets); +} + +static void __table_instance_destroy(struct table_instance *ti) +{ + int i; + + if (ti->keep_flows) + goto skip_flows; + + for (i = 0; i < ti->n_buckets; i++) { + struct sw_flow *flow; + struct hlist_head *head = flex_array_get(ti->buckets, i); + struct hlist_node *n; + int ver = ti->node_ver; + + hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) { + hlist_del(&flow->hash_node[ver]); + ovs_flow_free(flow, false); + } + } + +skip_flows: + free_buckets(ti->buckets); + kfree(ti); +} + +static struct table_instance *table_instance_alloc(int new_size) +{ + struct table_instance *ti = kmalloc(sizeof(*ti), GFP_KERNEL); + + if (!ti) + return NULL; + + ti->buckets = alloc_buckets(new_size); + + if (!ti->buckets) { + kfree(ti); + return NULL; + } + ti->n_buckets = new_size; + ti->node_ver = 0; + ti->keep_flows = false; + get_random_bytes(&ti->hash_seed, sizeof(u32)); + + return ti; +} + +int ovs_flow_tbl_init(struct flow_table *table) +{ + struct table_instance *ti; + + ti = table_instance_alloc(TBL_MIN_BUCKETS); + + if (!ti) + return -ENOMEM; + + rcu_assign_pointer(table->ti, ti); + INIT_LIST_HEAD(&table->mask_list); + table->last_rehash = jiffies; + table->count = 0; + return 0; +} + +static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu) +{ + struct table_instance *ti = container_of(rcu, struct table_instance, rcu); + + __table_instance_destroy(ti); +} + +static void table_instance_destroy(struct table_instance *ti, bool deferred) +{ + if (!ti) + return; + + if (deferred) + call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb); + else + __table_instance_destroy(ti); +} + +void ovs_flow_tbl_destroy(struct flow_table *table) +{ + struct table_instance *ti = ovsl_dereference(table->ti); + + table_instance_destroy(ti, false); +} + +struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti, + u32 *bucket, u32 *last) +{ + struct sw_flow *flow; + struct hlist_head *head; + int ver; + int i; + + ver = ti->node_ver; + while (*bucket < ti->n_buckets) { + i = 0; + head = flex_array_get(ti->buckets, *bucket); + hlist_for_each_entry_rcu(flow, head, hash_node[ver]) { + if (i < *last) { + i++; + continue; + } + *last = i + 1; + return flow; + } + (*bucket)++; + *last = 0; + } + + return NULL; +} + +static struct hlist_head *find_bucket(struct table_instance *ti, u32 hash) +{ + hash = jhash_1word(hash, ti->hash_seed); + return flex_array_get(ti->buckets, + (hash & (ti->n_buckets - 1))); +} + +static void table_instance_insert(struct table_instance *ti, struct sw_flow *flow) +{ + struct hlist_head *head; + + head = find_bucket(ti, flow->hash); + hlist_add_head_rcu(&flow->hash_node[ti->node_ver], head); +} + +static void flow_table_copy_flows(struct table_instance *old, + struct table_instance *new) +{ + int old_ver; + int i; + + old_ver = old->node_ver; + new->node_ver = !old_ver; + + /* Insert in new table. */ + for (i = 0; i < old->n_buckets; i++) { + struct sw_flow *flow; + struct hlist_head *head; + + head = flex_array_get(old->buckets, i); + + hlist_for_each_entry(flow, head, hash_node[old_ver]) + table_instance_insert(new, flow); + } + + old->keep_flows = true; +} + +static struct table_instance *table_instance_rehash(struct table_instance *ti, + int n_buckets) +{ + struct table_instance *new_ti; + + new_ti = table_instance_alloc(n_buckets); + if (!new_ti) + return NULL; + + flow_table_copy_flows(ti, new_ti); + + return new_ti; +} + +int ovs_flow_tbl_flush(struct flow_table *flow_table) +{ + struct table_instance *old_ti; + struct table_instance *new_ti; + + old_ti = ovsl_dereference(flow_table->ti); + new_ti = table_instance_alloc(TBL_MIN_BUCKETS); + if (!new_ti) + return -ENOMEM; + + rcu_assign_pointer(flow_table->ti, new_ti); + flow_table->last_rehash = jiffies; + flow_table->count = 0; + + table_instance_destroy(old_ti, true); + return 0; +} + +static u32 flow_hash(const struct sw_flow_key *key, int key_start, + int key_end) +{ + u32 *hash_key = (u32 *)((u8 *)key + key_start); + int hash_u32s = (key_end - key_start) >> 2; + + /* Make sure number of hash bytes are multiple of u32. */ + BUILD_BUG_ON(sizeof(long) % sizeof(u32)); + + return jhash2(hash_key, hash_u32s, 0); +} + +static int flow_key_start(const struct sw_flow_key *key) +{ + if (key->tun_key.ipv4_dst) + return 0; + else + return rounddown(offsetof(struct sw_flow_key, phy), + sizeof(long)); +} + +static bool cmp_key(const struct sw_flow_key *key1, + const struct sw_flow_key *key2, + int key_start, int key_end) +{ + const long *cp1 = (long *)((u8 *)key1 + key_start); + const long *cp2 = (long *)((u8 *)key2 + key_start); + long diffs = 0; + int i; + + for (i = key_start; i < key_end; i += sizeof(long)) + diffs |= *cp1++ ^ *cp2++; + + return diffs == 0; +} + +static bool flow_cmp_masked_key(const struct sw_flow *flow, + const struct sw_flow_key *key, + int key_start, int key_end) +{ + return cmp_key(&flow->key, key, key_start, key_end); +} + +bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, + struct sw_flow_match *match) +{ + struct sw_flow_key *key = match->key; + int key_start = flow_key_start(key); + int key_end = match->range.end; + + return cmp_key(&flow->unmasked_key, key, key_start, key_end); +} + +static struct sw_flow *masked_flow_lookup(struct table_instance *ti, + const struct sw_flow_key *unmasked, + struct sw_flow_mask *mask) +{ + struct sw_flow *flow; + struct hlist_head *head; + int key_start = mask->range.start; + int key_end = mask->range.end; + u32 hash; + struct sw_flow_key masked_key; + + ovs_flow_mask_key(&masked_key, unmasked, mask); + hash = flow_hash(&masked_key, key_start, key_end); + head = find_bucket(ti, hash); + hlist_for_each_entry_rcu(flow, head, hash_node[ti->node_ver]) { + if (flow->mask == mask && + flow_cmp_masked_key(flow, &masked_key, + key_start, key_end)) + return flow; + } + return NULL; +} + +struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl, + const struct sw_flow_key *key) +{ + struct table_instance *ti = rcu_dereference(tbl->ti); + struct sw_flow_mask *mask; + struct sw_flow *flow; + + list_for_each_entry_rcu(mask, &tbl->mask_list, list) { + flow = masked_flow_lookup(ti, key, mask); + if (flow) /* Found */ + return flow; + } + return NULL; +} + +static struct table_instance *table_instance_expand(struct table_instance *ti) +{ + return table_instance_rehash(ti, ti->n_buckets * 2); +} + +void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) +{ + struct table_instance *ti = ovsl_dereference(table->ti); + + BUG_ON(table->count == 0); + hlist_del_rcu(&flow->hash_node[ti->node_ver]); + table->count--; +} + +static struct sw_flow_mask *mask_alloc(void) +{ + struct sw_flow_mask *mask; + + mask = kmalloc(sizeof(*mask), GFP_KERNEL); + if (mask) + mask->ref_count = 0; + + return mask; +} + +static void mask_add_ref(struct sw_flow_mask *mask) +{ + mask->ref_count++; +} + +static bool mask_equal(const struct sw_flow_mask *a, + const struct sw_flow_mask *b) +{ + u8 *a_ = (u8 *)&a->key + a->range.start; + u8 *b_ = (u8 *)&b->key + b->range.start; + + return (a->range.end == b->range.end) + && (a->range.start == b->range.start) + && (memcmp(a_, b_, range_n_bytes(&a->range)) == 0); +} + +static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl, + const struct sw_flow_mask *mask) +{ + struct list_head *ml; + + list_for_each(ml, &tbl->mask_list) { + struct sw_flow_mask *m; + m = container_of(ml, struct sw_flow_mask, list); + if (mask_equal(mask, m)) + return m; + } + + return NULL; +} + +/** + * add a new mask into the mask list. + * The caller needs to make sure that 'mask' is not the same + * as any masks that are already on the list. + */ +static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow, + struct sw_flow_mask *new) +{ + struct sw_flow_mask *mask; + mask = flow_mask_find(tbl, new); + if (!mask) { + /* Allocate a new mask if none exsits. */ + mask = mask_alloc(); + if (!mask) + return -ENOMEM; + mask->key = new->key; + mask->range = new->range; + list_add_rcu(&mask->list, &tbl->mask_list); + } + + mask_add_ref(mask); + flow->mask = mask; + return 0; +} + +int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow, + struct sw_flow_mask *mask) +{ + struct table_instance *new_ti = NULL; + struct table_instance *ti; + int err; + + err = flow_mask_insert(table, flow, mask); + if (err) + return err; + + flow->hash = flow_hash(&flow->key, flow->mask->range.start, + flow->mask->range.end); + ti = ovsl_dereference(table->ti); + table_instance_insert(ti, flow); + table->count++; + + /* Expand table, if necessary, to make room. */ + if (table->count > ti->n_buckets) + new_ti = table_instance_expand(ti); + else if (time_after(jiffies, table->last_rehash + REHASH_INTERVAL)) + new_ti = table_instance_rehash(ti, ti->n_buckets); + + if (new_ti) { + rcu_assign_pointer(table->ti, new_ti); + table_instance_destroy(ti, true); + table->last_rehash = jiffies; + } + return 0; +} + +/* Initializes the flow module. + * Returns zero if successful or a negative error code. */ +int ovs_flow_init(void) +{ + BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long)); + BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long)); + + flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0, + 0, NULL); + if (flow_cache == NULL) + return -ENOMEM; + + return 0; +} + +/* Uninitializes the flow module. */ +void ovs_flow_exit(void) +{ + kmem_cache_destroy(flow_cache); +} diff --git a/datapath/flow_table.h b/datapath/flow_table.h new file mode 100644 index 000000000..4db5f78b6 --- /dev/null +++ b/datapath/flow_table.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2007-2013 Nicira, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#ifndef FLOW_TABLE_H +#define FLOW_TABLE_H 1 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "flow.h" + +struct table_instance { + struct flex_array *buckets; + unsigned int n_buckets; + struct rcu_head rcu; + int node_ver; + u32 hash_seed; + bool keep_flows; +}; + +struct flow_table { + struct table_instance __rcu *ti; + struct list_head mask_list; + unsigned long last_rehash; + unsigned int count; +}; + +int ovs_flow_init(void); +void ovs_flow_exit(void); + +struct sw_flow *ovs_flow_alloc(void); +void ovs_flow_free(struct sw_flow *, bool deferred); + +int ovs_flow_tbl_init(struct flow_table *); +int ovs_flow_tbl_count(struct flow_table *table); +void ovs_flow_tbl_destroy(struct flow_table *table); +int ovs_flow_tbl_flush(struct flow_table *flow_table); + +int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow, + struct sw_flow_mask *mask); +void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow); +struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *table, + u32 *bucket, u32 *idx); +struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *, + const struct sw_flow_key *); + +bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, + struct sw_flow_match *match); + +void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, + const struct sw_flow_mask *mask); +#endif /* flow_table.h */ diff --git a/datapath/linux/.gitignore b/datapath/linux/.gitignore index d74ad3c63..32b1770b4 100644 --- a/datapath/linux/.gitignore +++ b/datapath/linux/.gitignore @@ -15,6 +15,8 @@ /flex_array.c /flow.c /flow_dissector.c +/flow_netlink.c +/flow_table.c /genetlink-openvswitch.c /genl_exec.c /gre.c diff --git a/datapath/linux/compat/vxlan.c b/datapath/linux/compat/vxlan.c index 4f7671bcd..09d0fd741 100644 --- a/datapath/linux/compat/vxlan.c +++ b/datapath/linux/compat/vxlan.c @@ -219,8 +219,6 @@ int vxlan_xmit_skb(struct vxlan_sock *vs, int min_headroom; int err; - skb_reset_inner_headers(skb); - min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len + VXLAN_HLEN + sizeof(struct iphdr) + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); @@ -239,6 +237,8 @@ int vxlan_xmit_skb(struct vxlan_sock *vs, vlan_set_tci(skb, 0); } + skb_reset_inner_headers(skb); + vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); vxh->vx_flags = htonl(VXLAN_FLAGS); vxh->vx_vni = vni; diff --git a/include/openflow/nicira-ext.h b/include/openflow/nicira-ext.h index de5ff6ae2..ca272fdf3 100644 --- a/include/openflow/nicira-ext.h +++ b/include/openflow/nicira-ext.h @@ -1717,8 +1717,9 @@ OFP_ASSERT(sizeof(struct nx_action_output_reg) == 24); * * Format: 20-bit IPv6 flow label in least-significant bits. * - * Masking: Not maskable. */ -#define NXM_NX_IPV6_LABEL NXM_HEADER (0x0001, 27, 4) + * Masking: Fully maskable. */ +#define NXM_NX_IPV6_LABEL NXM_HEADER (0x0001, 27, 4) +#define NXM_NX_IPV6_LABEL_W NXM_HEADER_W(0x0001, 27, 4) /* The ECN of the IP header. * diff --git a/include/openflow/openflow-common.h b/include/openflow/openflow-common.h index 5018f8500..45d03ef03 100644 --- a/include/openflow/openflow-common.h +++ b/include/openflow/openflow-common.h @@ -104,8 +104,8 @@ enum ofp_version { #define OFP_MAX_TABLE_NAME_LEN 32 #define OFP_MAX_PORT_NAME_LEN 16 -#define OFP_TCP_PORT 6633 -#define OFP_SSL_PORT 6633 +#define OFP_OLD_PORT 6633 +#define OFP_PORT 6653 #define OFP_ETH_ALEN 6 /* Bytes in an Ethernet address. */ diff --git a/lib/aes128.c b/lib/aes128.c index 9d8d2fc41..98447d14b 100644 --- a/lib/aes128.c +++ b/lib/aes128.c @@ -363,341 +363,6 @@ static const uint32_t Te4[256] = { 0xb0b0b0b0U, 0x54545454U, 0xbbbbbbbbU, 0x16161616U, }; -static const uint32_t Td0[256] = { - 0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U, - 0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U, - 0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U, - 0x4fe5d7fcU, 0xc52acbd7U, 0x26354480U, 0xb562a38fU, - 0xdeb15a49U, 0x25ba1b67U, 0x45ea0e98U, 0x5dfec0e1U, - 0xc32f7502U, 0x814cf012U, 0x8d4697a3U, 0x6bd3f9c6U, - 0x038f5fe7U, 0x15929c95U, 0xbf6d7aebU, 0x955259daU, - 0xd4be832dU, 0x587421d3U, 0x49e06929U, 0x8ec9c844U, - 0x75c2896aU, 0xf48e7978U, 0x99583e6bU, 0x27b971ddU, - 0xbee14fb6U, 0xf088ad17U, 0xc920ac66U, 0x7dce3ab4U, - 0x63df4a18U, 0xe51a3182U, 0x97513360U, 0x62537f45U, - 0xb16477e0U, 0xbb6bae84U, 0xfe81a01cU, 0xf9082b94U, - 0x70486858U, 0x8f45fd19U, 0x94de6c87U, 0x527bf8b7U, - 0xab73d323U, 0x724b02e2U, 0xe31f8f57U, 0x6655ab2aU, - 0xb2eb2807U, 0x2fb5c203U, 0x86c57b9aU, 0xd33708a5U, - 0x302887f2U, 0x23bfa5b2U, 0x02036abaU, 0xed16825cU, - 0x8acf1c2bU, 0xa779b492U, 0xf307f2f0U, 0x4e69e2a1U, - 0x65daf4cdU, 0x0605bed5U, 0xd134621fU, 0xc4a6fe8aU, - 0x342e539dU, 0xa2f355a0U, 0x058ae132U, 0xa4f6eb75U, - 0x0b83ec39U, 0x4060efaaU, 0x5e719f06U, 0xbd6e1051U, - 0x3e218af9U, 0x96dd063dU, 0xdd3e05aeU, 0x4de6bd46U, - 0x91548db5U, 0x71c45d05U, 0x0406d46fU, 0x605015ffU, - 0x1998fb24U, 0xd6bde997U, 0x894043ccU, 0x67d99e77U, - 0xb0e842bdU, 0x07898b88U, 0xe7195b38U, 0x79c8eedbU, - 0xa17c0a47U, 0x7c420fe9U, 0xf8841ec9U, 0x00000000U, - 0x09808683U, 0x322bed48U, 0x1e1170acU, 0x6c5a724eU, - 0xfd0efffbU, 0x0f853856U, 0x3daed51eU, 0x362d3927U, - 0x0a0fd964U, 0x685ca621U, 0x9b5b54d1U, 0x24362e3aU, - 0x0c0a67b1U, 0x9357e70fU, 0xb4ee96d2U, 0x1b9b919eU, - 0x80c0c54fU, 0x61dc20a2U, 0x5a774b69U, 0x1c121a16U, - 0xe293ba0aU, 0xc0a02ae5U, 0x3c22e043U, 0x121b171dU, - 0x0e090d0bU, 0xf28bc7adU, 0x2db6a8b9U, 0x141ea9c8U, - 0x57f11985U, 0xaf75074cU, 0xee99ddbbU, 0xa37f60fdU, - 0xf701269fU, 0x5c72f5bcU, 0x44663bc5U, 0x5bfb7e34U, - 0x8b432976U, 0xcb23c6dcU, 0xb6edfc68U, 0xb8e4f163U, - 0xd731dccaU, 0x42638510U, 0x13972240U, 0x84c61120U, - 0x854a247dU, 0xd2bb3df8U, 0xaef93211U, 0xc729a16dU, - 0x1d9e2f4bU, 0xdcb230f3U, 0x0d8652ecU, 0x77c1e3d0U, - 0x2bb3166cU, 0xa970b999U, 0x119448faU, 0x47e96422U, - 0xa8fc8cc4U, 0xa0f03f1aU, 0x567d2cd8U, 0x223390efU, - 0x87494ec7U, 0xd938d1c1U, 0x8ccaa2feU, 0x98d40b36U, - 0xa6f581cfU, 0xa57ade28U, 0xdab78e26U, 0x3fadbfa4U, - 0x2c3a9de4U, 0x5078920dU, 0x6a5fcc9bU, 0x547e4662U, - 0xf68d13c2U, 0x90d8b8e8U, 0x2e39f75eU, 0x82c3aff5U, - 0x9f5d80beU, 0x69d0937cU, 0x6fd52da9U, 0xcf2512b3U, - 0xc8ac993bU, 0x10187da7U, 0xe89c636eU, 0xdb3bbb7bU, - 0xcd267809U, 0x6e5918f4U, 0xec9ab701U, 0x834f9aa8U, - 0xe6956e65U, 0xaaffe67eU, 0x21bccf08U, 0xef15e8e6U, - 0xbae79bd9U, 0x4a6f36ceU, 0xea9f09d4U, 0x29b07cd6U, - 0x31a4b2afU, 0x2a3f2331U, 0xc6a59430U, 0x35a266c0U, - 0x744ebc37U, 0xfc82caa6U, 0xe090d0b0U, 0x33a7d815U, - 0xf104984aU, 0x41ecdaf7U, 0x7fcd500eU, 0x1791f62fU, - 0x764dd68dU, 0x43efb04dU, 0xccaa4d54U, 0xe49604dfU, - 0x9ed1b5e3U, 0x4c6a881bU, 0xc12c1fb8U, 0x4665517fU, - 0x9d5eea04U, 0x018c355dU, 0xfa877473U, 0xfb0b412eU, - 0xb3671d5aU, 0x92dbd252U, 0xe9105633U, 0x6dd64713U, - 0x9ad7618cU, 0x37a10c7aU, 0x59f8148eU, 0xeb133c89U, - 0xcea927eeU, 0xb761c935U, 0xe11ce5edU, 0x7a47b13cU, - 0x9cd2df59U, 0x55f2733fU, 0x1814ce79U, 0x73c737bfU, - 0x53f7cdeaU, 0x5ffdaa5bU, 0xdf3d6f14U, 0x7844db86U, - 0xcaaff381U, 0xb968c43eU, 0x3824342cU, 0xc2a3405fU, - 0x161dc372U, 0xbce2250cU, 0x283c498bU, 0xff0d9541U, - 0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U, - 0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U, -}; - -static const uint32_t Td1[256] = { - 0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU, - 0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U, - 0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU, - 0xfc4fe5d7U, 0xd7c52acbU, 0x80263544U, 0x8fb562a3U, - 0x49deb15aU, 0x6725ba1bU, 0x9845ea0eU, 0xe15dfec0U, - 0x02c32f75U, 0x12814cf0U, 0xa38d4697U, 0xc66bd3f9U, - 0xe7038f5fU, 0x9515929cU, 0xebbf6d7aU, 0xda955259U, - 0x2dd4be83U, 0xd3587421U, 0x2949e069U, 0x448ec9c8U, - 0x6a75c289U, 0x78f48e79U, 0x6b99583eU, 0xdd27b971U, - 0xb6bee14fU, 0x17f088adU, 0x66c920acU, 0xb47dce3aU, - 0x1863df4aU, 0x82e51a31U, 0x60975133U, 0x4562537fU, - 0xe0b16477U, 0x84bb6baeU, 0x1cfe81a0U, 0x94f9082bU, - 0x58704868U, 0x198f45fdU, 0x8794de6cU, 0xb7527bf8U, - 0x23ab73d3U, 0xe2724b02U, 0x57e31f8fU, 0x2a6655abU, - 0x07b2eb28U, 0x032fb5c2U, 0x9a86c57bU, 0xa5d33708U, - 0xf2302887U, 0xb223bfa5U, 0xba02036aU, 0x5ced1682U, - 0x2b8acf1cU, 0x92a779b4U, 0xf0f307f2U, 0xa14e69e2U, - 0xcd65daf4U, 0xd50605beU, 0x1fd13462U, 0x8ac4a6feU, - 0x9d342e53U, 0xa0a2f355U, 0x32058ae1U, 0x75a4f6ebU, - 0x390b83ecU, 0xaa4060efU, 0x065e719fU, 0x51bd6e10U, - 0xf93e218aU, 0x3d96dd06U, 0xaedd3e05U, 0x464de6bdU, - 0xb591548dU, 0x0571c45dU, 0x6f0406d4U, 0xff605015U, - 0x241998fbU, 0x97d6bde9U, 0xcc894043U, 0x7767d99eU, - 0xbdb0e842U, 0x8807898bU, 0x38e7195bU, 0xdb79c8eeU, - 0x47a17c0aU, 0xe97c420fU, 0xc9f8841eU, 0x00000000U, - 0x83098086U, 0x48322bedU, 0xac1e1170U, 0x4e6c5a72U, - 0xfbfd0effU, 0x560f8538U, 0x1e3daed5U, 0x27362d39U, - 0x640a0fd9U, 0x21685ca6U, 0xd19b5b54U, 0x3a24362eU, - 0xb10c0a67U, 0x0f9357e7U, 0xd2b4ee96U, 0x9e1b9b91U, - 0x4f80c0c5U, 0xa261dc20U, 0x695a774bU, 0x161c121aU, - 0x0ae293baU, 0xe5c0a02aU, 0x433c22e0U, 0x1d121b17U, - 0x0b0e090dU, 0xadf28bc7U, 0xb92db6a8U, 0xc8141ea9U, - 0x8557f119U, 0x4caf7507U, 0xbbee99ddU, 0xfda37f60U, - 0x9ff70126U, 0xbc5c72f5U, 0xc544663bU, 0x345bfb7eU, - 0x768b4329U, 0xdccb23c6U, 0x68b6edfcU, 0x63b8e4f1U, - 0xcad731dcU, 0x10426385U, 0x40139722U, 0x2084c611U, - 0x7d854a24U, 0xf8d2bb3dU, 0x11aef932U, 0x6dc729a1U, - 0x4b1d9e2fU, 0xf3dcb230U, 0xec0d8652U, 0xd077c1e3U, - 0x6c2bb316U, 0x99a970b9U, 0xfa119448U, 0x2247e964U, - 0xc4a8fc8cU, 0x1aa0f03fU, 0xd8567d2cU, 0xef223390U, - 0xc787494eU, 0xc1d938d1U, 0xfe8ccaa2U, 0x3698d40bU, - 0xcfa6f581U, 0x28a57adeU, 0x26dab78eU, 0xa43fadbfU, - 0xe42c3a9dU, 0x0d507892U, 0x9b6a5fccU, 0x62547e46U, - 0xc2f68d13U, 0xe890d8b8U, 0x5e2e39f7U, 0xf582c3afU, - 0xbe9f5d80U, 0x7c69d093U, 0xa96fd52dU, 0xb3cf2512U, - 0x3bc8ac99U, 0xa710187dU, 0x6ee89c63U, 0x7bdb3bbbU, - 0x09cd2678U, 0xf46e5918U, 0x01ec9ab7U, 0xa8834f9aU, - 0x65e6956eU, 0x7eaaffe6U, 0x0821bccfU, 0xe6ef15e8U, - 0xd9bae79bU, 0xce4a6f36U, 0xd4ea9f09U, 0xd629b07cU, - 0xaf31a4b2U, 0x312a3f23U, 0x30c6a594U, 0xc035a266U, - 0x37744ebcU, 0xa6fc82caU, 0xb0e090d0U, 0x1533a7d8U, - 0x4af10498U, 0xf741ecdaU, 0x0e7fcd50U, 0x2f1791f6U, - 0x8d764dd6U, 0x4d43efb0U, 0x54ccaa4dU, 0xdfe49604U, - 0xe39ed1b5U, 0x1b4c6a88U, 0xb8c12c1fU, 0x7f466551U, - 0x049d5eeaU, 0x5d018c35U, 0x73fa8774U, 0x2efb0b41U, - 0x5ab3671dU, 0x5292dbd2U, 0x33e91056U, 0x136dd647U, - 0x8c9ad761U, 0x7a37a10cU, 0x8e59f814U, 0x89eb133cU, - 0xeecea927U, 0x35b761c9U, 0xede11ce5U, 0x3c7a47b1U, - 0x599cd2dfU, 0x3f55f273U, 0x791814ceU, 0xbf73c737U, - 0xea53f7cdU, 0x5b5ffdaaU, 0x14df3d6fU, 0x867844dbU, - 0x81caaff3U, 0x3eb968c4U, 0x2c382434U, 0x5fc2a340U, - 0x72161dc3U, 0x0cbce225U, 0x8b283c49U, 0x41ff0d95U, - 0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U, - 0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U, -}; - -static const uint32_t Td2[256] = { - 0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U, - 0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U, - 0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U, - 0xd7fc4fe5U, 0xcbd7c52aU, 0x44802635U, 0xa38fb562U, - 0x5a49deb1U, 0x1b6725baU, 0x0e9845eaU, 0xc0e15dfeU, - 0x7502c32fU, 0xf012814cU, 0x97a38d46U, 0xf9c66bd3U, - 0x5fe7038fU, 0x9c951592U, 0x7aebbf6dU, 0x59da9552U, - 0x832dd4beU, 0x21d35874U, 0x692949e0U, 0xc8448ec9U, - 0x896a75c2U, 0x7978f48eU, 0x3e6b9958U, 0x71dd27b9U, - 0x4fb6bee1U, 0xad17f088U, 0xac66c920U, 0x3ab47dceU, - 0x4a1863dfU, 0x3182e51aU, 0x33609751U, 0x7f456253U, - 0x77e0b164U, 0xae84bb6bU, 0xa01cfe81U, 0x2b94f908U, - 0x68587048U, 0xfd198f45U, 0x6c8794deU, 0xf8b7527bU, - 0xd323ab73U, 0x02e2724bU, 0x8f57e31fU, 0xab2a6655U, - 0x2807b2ebU, 0xc2032fb5U, 0x7b9a86c5U, 0x08a5d337U, - 0x87f23028U, 0xa5b223bfU, 0x6aba0203U, 0x825ced16U, - 0x1c2b8acfU, 0xb492a779U, 0xf2f0f307U, 0xe2a14e69U, - 0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U, - 0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U, - 0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU, - 0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U, - 0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U, - 0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U, - 0x42bdb0e8U, 0x8b880789U, 0x5b38e719U, 0xeedb79c8U, - 0x0a47a17cU, 0x0fe97c42U, 0x1ec9f884U, 0x00000000U, - 0x86830980U, 0xed48322bU, 0x70ac1e11U, 0x724e6c5aU, - 0xfffbfd0eU, 0x38560f85U, 0xd51e3daeU, 0x3927362dU, - 0xd9640a0fU, 0xa621685cU, 0x54d19b5bU, 0x2e3a2436U, - 0x67b10c0aU, 0xe70f9357U, 0x96d2b4eeU, 0x919e1b9bU, - 0xc54f80c0U, 0x20a261dcU, 0x4b695a77U, 0x1a161c12U, - 0xba0ae293U, 0x2ae5c0a0U, 0xe0433c22U, 0x171d121bU, - 0x0d0b0e09U, 0xc7adf28bU, 0xa8b92db6U, 0xa9c8141eU, - 0x198557f1U, 0x074caf75U, 0xddbbee99U, 0x60fda37fU, - 0x269ff701U, 0xf5bc5c72U, 0x3bc54466U, 0x7e345bfbU, - 0x29768b43U, 0xc6dccb23U, 0xfc68b6edU, 0xf163b8e4U, - 0xdccad731U, 0x85104263U, 0x22401397U, 0x112084c6U, - 0x247d854aU, 0x3df8d2bbU, 0x3211aef9U, 0xa16dc729U, - 0x2f4b1d9eU, 0x30f3dcb2U, 0x52ec0d86U, 0xe3d077c1U, - 0x166c2bb3U, 0xb999a970U, 0x48fa1194U, 0x642247e9U, - 0x8cc4a8fcU, 0x3f1aa0f0U, 0x2cd8567dU, 0x90ef2233U, - 0x4ec78749U, 0xd1c1d938U, 0xa2fe8ccaU, 0x0b3698d4U, - 0x81cfa6f5U, 0xde28a57aU, 0x8e26dab7U, 0xbfa43fadU, - 0x9de42c3aU, 0x920d5078U, 0xcc9b6a5fU, 0x4662547eU, - 0x13c2f68dU, 0xb8e890d8U, 0xf75e2e39U, 0xaff582c3U, - 0x80be9f5dU, 0x937c69d0U, 0x2da96fd5U, 0x12b3cf25U, - 0x993bc8acU, 0x7da71018U, 0x636ee89cU, 0xbb7bdb3bU, - 0x7809cd26U, 0x18f46e59U, 0xb701ec9aU, 0x9aa8834fU, - 0x6e65e695U, 0xe67eaaffU, 0xcf0821bcU, 0xe8e6ef15U, - 0x9bd9bae7U, 0x36ce4a6fU, 0x09d4ea9fU, 0x7cd629b0U, - 0xb2af31a4U, 0x23312a3fU, 0x9430c6a5U, 0x66c035a2U, - 0xbc37744eU, 0xcaa6fc82U, 0xd0b0e090U, 0xd81533a7U, - 0x984af104U, 0xdaf741ecU, 0x500e7fcdU, 0xf62f1791U, - 0xd68d764dU, 0xb04d43efU, 0x4d54ccaaU, 0x04dfe496U, - 0xb5e39ed1U, 0x881b4c6aU, 0x1fb8c12cU, 0x517f4665U, - 0xea049d5eU, 0x355d018cU, 0x7473fa87U, 0x412efb0bU, - 0x1d5ab367U, 0xd25292dbU, 0x5633e910U, 0x47136dd6U, - 0x618c9ad7U, 0x0c7a37a1U, 0x148e59f8U, 0x3c89eb13U, - 0x27eecea9U, 0xc935b761U, 0xe5ede11cU, 0xb13c7a47U, - 0xdf599cd2U, 0x733f55f2U, 0xce791814U, 0x37bf73c7U, - 0xcdea53f7U, 0xaa5b5ffdU, 0x6f14df3dU, 0xdb867844U, - 0xf381caafU, 0xc43eb968U, 0x342c3824U, 0x405fc2a3U, - 0xc372161dU, 0x250cbce2U, 0x498b283cU, 0x9541ff0dU, - 0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U, - 0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U, -}; - -static const uint32_t Td3[256] = { - 0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU, - 0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU, - 0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U, - 0xe5d7fc4fU, 0x2acbd7c5U, 0x35448026U, 0x62a38fb5U, - 0xb15a49deU, 0xba1b6725U, 0xea0e9845U, 0xfec0e15dU, - 0x2f7502c3U, 0x4cf01281U, 0x4697a38dU, 0xd3f9c66bU, - 0x8f5fe703U, 0x929c9515U, 0x6d7aebbfU, 0x5259da95U, - 0xbe832dd4U, 0x7421d358U, 0xe0692949U, 0xc9c8448eU, - 0xc2896a75U, 0x8e7978f4U, 0x583e6b99U, 0xb971dd27U, - 0xe14fb6beU, 0x88ad17f0U, 0x20ac66c9U, 0xce3ab47dU, - 0xdf4a1863U, 0x1a3182e5U, 0x51336097U, 0x537f4562U, - 0x6477e0b1U, 0x6bae84bbU, 0x81a01cfeU, 0x082b94f9U, - 0x48685870U, 0x45fd198fU, 0xde6c8794U, 0x7bf8b752U, - 0x73d323abU, 0x4b02e272U, 0x1f8f57e3U, 0x55ab2a66U, - 0xeb2807b2U, 0xb5c2032fU, 0xc57b9a86U, 0x3708a5d3U, - 0x2887f230U, 0xbfa5b223U, 0x036aba02U, 0x16825cedU, - 0xcf1c2b8aU, 0x79b492a7U, 0x07f2f0f3U, 0x69e2a14eU, - 0xdaf4cd65U, 0x05bed506U, 0x34621fd1U, 0xa6fe8ac4U, - 0x2e539d34U, 0xf355a0a2U, 0x8ae13205U, 0xf6eb75a4U, - 0x83ec390bU, 0x60efaa40U, 0x719f065eU, 0x6e1051bdU, - 0x218af93eU, 0xdd063d96U, 0x3e05aeddU, 0xe6bd464dU, - 0x548db591U, 0xc45d0571U, 0x06d46f04U, 0x5015ff60U, - 0x98fb2419U, 0xbde997d6U, 0x4043cc89U, 0xd99e7767U, - 0xe842bdb0U, 0x898b8807U, 0x195b38e7U, 0xc8eedb79U, - 0x7c0a47a1U, 0x420fe97cU, 0x841ec9f8U, 0x00000000U, - 0x80868309U, 0x2bed4832U, 0x1170ac1eU, 0x5a724e6cU, - 0x0efffbfdU, 0x8538560fU, 0xaed51e3dU, 0x2d392736U, - 0x0fd9640aU, 0x5ca62168U, 0x5b54d19bU, 0x362e3a24U, - 0x0a67b10cU, 0x57e70f93U, 0xee96d2b4U, 0x9b919e1bU, - 0xc0c54f80U, 0xdc20a261U, 0x774b695aU, 0x121a161cU, - 0x93ba0ae2U, 0xa02ae5c0U, 0x22e0433cU, 0x1b171d12U, - 0x090d0b0eU, 0x8bc7adf2U, 0xb6a8b92dU, 0x1ea9c814U, - 0xf1198557U, 0x75074cafU, 0x99ddbbeeU, 0x7f60fda3U, - 0x01269ff7U, 0x72f5bc5cU, 0x663bc544U, 0xfb7e345bU, - 0x4329768bU, 0x23c6dccbU, 0xedfc68b6U, 0xe4f163b8U, - 0x31dccad7U, 0x63851042U, 0x97224013U, 0xc6112084U, - 0x4a247d85U, 0xbb3df8d2U, 0xf93211aeU, 0x29a16dc7U, - 0x9e2f4b1dU, 0xb230f3dcU, 0x8652ec0dU, 0xc1e3d077U, - 0xb3166c2bU, 0x70b999a9U, 0x9448fa11U, 0xe9642247U, - 0xfc8cc4a8U, 0xf03f1aa0U, 0x7d2cd856U, 0x3390ef22U, - 0x494ec787U, 0x38d1c1d9U, 0xcaa2fe8cU, 0xd40b3698U, - 0xf581cfa6U, 0x7ade28a5U, 0xb78e26daU, 0xadbfa43fU, - 0x3a9de42cU, 0x78920d50U, 0x5fcc9b6aU, 0x7e466254U, - 0x8d13c2f6U, 0xd8b8e890U, 0x39f75e2eU, 0xc3aff582U, - 0x5d80be9fU, 0xd0937c69U, 0xd52da96fU, 0x2512b3cfU, - 0xac993bc8U, 0x187da710U, 0x9c636ee8U, 0x3bbb7bdbU, - 0x267809cdU, 0x5918f46eU, 0x9ab701ecU, 0x4f9aa883U, - 0x956e65e6U, 0xffe67eaaU, 0xbccf0821U, 0x15e8e6efU, - 0xe79bd9baU, 0x6f36ce4aU, 0x9f09d4eaU, 0xb07cd629U, - 0xa4b2af31U, 0x3f23312aU, 0xa59430c6U, 0xa266c035U, - 0x4ebc3774U, 0x82caa6fcU, 0x90d0b0e0U, 0xa7d81533U, - 0x04984af1U, 0xecdaf741U, 0xcd500e7fU, 0x91f62f17U, - 0x4dd68d76U, 0xefb04d43U, 0xaa4d54ccU, 0x9604dfe4U, - 0xd1b5e39eU, 0x6a881b4cU, 0x2c1fb8c1U, 0x65517f46U, - 0x5eea049dU, 0x8c355d01U, 0x877473faU, 0x0b412efbU, - 0x671d5ab3U, 0xdbd25292U, 0x105633e9U, 0xd647136dU, - 0xd7618c9aU, 0xa10c7a37U, 0xf8148e59U, 0x133c89ebU, - 0xa927eeceU, 0x61c935b7U, 0x1ce5ede1U, 0x47b13c7aU, - 0xd2df599cU, 0xf2733f55U, 0x14ce7918U, 0xc737bf73U, - 0xf7cdea53U, 0xfdaa5b5fU, 0x3d6f14dfU, 0x44db8678U, - 0xaff381caU, 0x68c43eb9U, 0x24342c38U, 0xa3405fc2U, - 0x1dc37216U, 0xe2250cbcU, 0x3c498b28U, 0x0d9541ffU, - 0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U, - 0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U, -}; - -static const uint32_t Td4[256] = { - 0x52525252U, 0x09090909U, 0x6a6a6a6aU, 0xd5d5d5d5U, - 0x30303030U, 0x36363636U, 0xa5a5a5a5U, 0x38383838U, - 0xbfbfbfbfU, 0x40404040U, 0xa3a3a3a3U, 0x9e9e9e9eU, - 0x81818181U, 0xf3f3f3f3U, 0xd7d7d7d7U, 0xfbfbfbfbU, - 0x7c7c7c7cU, 0xe3e3e3e3U, 0x39393939U, 0x82828282U, - 0x9b9b9b9bU, 0x2f2f2f2fU, 0xffffffffU, 0x87878787U, - 0x34343434U, 0x8e8e8e8eU, 0x43434343U, 0x44444444U, - 0xc4c4c4c4U, 0xdedededeU, 0xe9e9e9e9U, 0xcbcbcbcbU, - 0x54545454U, 0x7b7b7b7bU, 0x94949494U, 0x32323232U, - 0xa6a6a6a6U, 0xc2c2c2c2U, 0x23232323U, 0x3d3d3d3dU, - 0xeeeeeeeeU, 0x4c4c4c4cU, 0x95959595U, 0x0b0b0b0bU, - 0x42424242U, 0xfafafafaU, 0xc3c3c3c3U, 0x4e4e4e4eU, - 0x08080808U, 0x2e2e2e2eU, 0xa1a1a1a1U, 0x66666666U, - 0x28282828U, 0xd9d9d9d9U, 0x24242424U, 0xb2b2b2b2U, - 0x76767676U, 0x5b5b5b5bU, 0xa2a2a2a2U, 0x49494949U, - 0x6d6d6d6dU, 0x8b8b8b8bU, 0xd1d1d1d1U, 0x25252525U, - 0x72727272U, 0xf8f8f8f8U, 0xf6f6f6f6U, 0x64646464U, - 0x86868686U, 0x68686868U, 0x98989898U, 0x16161616U, - 0xd4d4d4d4U, 0xa4a4a4a4U, 0x5c5c5c5cU, 0xccccccccU, - 0x5d5d5d5dU, 0x65656565U, 0xb6b6b6b6U, 0x92929292U, - 0x6c6c6c6cU, 0x70707070U, 0x48484848U, 0x50505050U, - 0xfdfdfdfdU, 0xededededU, 0xb9b9b9b9U, 0xdadadadaU, - 0x5e5e5e5eU, 0x15151515U, 0x46464646U, 0x57575757U, - 0xa7a7a7a7U, 0x8d8d8d8dU, 0x9d9d9d9dU, 0x84848484U, - 0x90909090U, 0xd8d8d8d8U, 0xababababU, 0x00000000U, - 0x8c8c8c8cU, 0xbcbcbcbcU, 0xd3d3d3d3U, 0x0a0a0a0aU, - 0xf7f7f7f7U, 0xe4e4e4e4U, 0x58585858U, 0x05050505U, - 0xb8b8b8b8U, 0xb3b3b3b3U, 0x45454545U, 0x06060606U, - 0xd0d0d0d0U, 0x2c2c2c2cU, 0x1e1e1e1eU, 0x8f8f8f8fU, - 0xcacacacaU, 0x3f3f3f3fU, 0x0f0f0f0fU, 0x02020202U, - 0xc1c1c1c1U, 0xafafafafU, 0xbdbdbdbdU, 0x03030303U, - 0x01010101U, 0x13131313U, 0x8a8a8a8aU, 0x6b6b6b6bU, - 0x3a3a3a3aU, 0x91919191U, 0x11111111U, 0x41414141U, - 0x4f4f4f4fU, 0x67676767U, 0xdcdcdcdcU, 0xeaeaeaeaU, - 0x97979797U, 0xf2f2f2f2U, 0xcfcfcfcfU, 0xcecececeU, - 0xf0f0f0f0U, 0xb4b4b4b4U, 0xe6e6e6e6U, 0x73737373U, - 0x96969696U, 0xacacacacU, 0x74747474U, 0x22222222U, - 0xe7e7e7e7U, 0xadadadadU, 0x35353535U, 0x85858585U, - 0xe2e2e2e2U, 0xf9f9f9f9U, 0x37373737U, 0xe8e8e8e8U, - 0x1c1c1c1cU, 0x75757575U, 0xdfdfdfdfU, 0x6e6e6e6eU, - 0x47474747U, 0xf1f1f1f1U, 0x1a1a1a1aU, 0x71717171U, - 0x1d1d1d1dU, 0x29292929U, 0xc5c5c5c5U, 0x89898989U, - 0x6f6f6f6fU, 0xb7b7b7b7U, 0x62626262U, 0x0e0e0e0eU, - 0xaaaaaaaaU, 0x18181818U, 0xbebebebeU, 0x1b1b1b1bU, - 0xfcfcfcfcU, 0x56565656U, 0x3e3e3e3eU, 0x4b4b4b4bU, - 0xc6c6c6c6U, 0xd2d2d2d2U, 0x79797979U, 0x20202020U, - 0x9a9a9a9aU, 0xdbdbdbdbU, 0xc0c0c0c0U, 0xfefefefeU, - 0x78787878U, 0xcdcdcdcdU, 0x5a5a5a5aU, 0xf4f4f4f4U, - 0x1f1f1f1fU, 0xddddddddU, 0xa8a8a8a8U, 0x33333333U, - 0x88888888U, 0x07070707U, 0xc7c7c7c7U, 0x31313131U, - 0xb1b1b1b1U, 0x12121212U, 0x10101010U, 0x59595959U, - 0x27272727U, 0x80808080U, 0xececececU, 0x5f5f5f5fU, - 0x60606060U, 0x51515151U, 0x7f7f7f7fU, 0xa9a9a9a9U, - 0x19191919U, 0xb5b5b5b5U, 0x4a4a4a4aU, 0x0d0d0d0dU, - 0x2d2d2d2dU, 0xe5e5e5e5U, 0x7a7a7a7aU, 0x9f9f9f9fU, - 0x93939393U, 0xc9c9c9c9U, 0x9c9c9c9cU, 0xefefefefU, - 0xa0a0a0a0U, 0xe0e0e0e0U, 0x3b3b3b3bU, 0x4d4d4d4dU, - 0xaeaeaeaeU, 0x2a2a2a2aU, 0xf5f5f5f5U, 0xb0b0b0b0U, - 0xc8c8c8c8U, 0xebebebebU, 0xbbbbbbbbU, 0x3c3c3c3cU, - 0x83838383U, 0x53535353U, 0x99999999U, 0x61616161U, - 0x17171717U, 0x2b2b2b2bU, 0x04040404U, 0x7e7e7e7eU, - 0xbabababaU, 0x77777777U, 0xd6d6d6d6U, 0x26262626U, - 0xe1e1e1e1U, 0x69696969U, 0x14141414U, 0x63636363U, - 0x55555555U, 0x21212121U, 0x0c0c0c0cU, 0x7d7d7d7dU, -}; - static const uint32_t rcon[] = { 0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000, 0x20000000, 0x40000000, 0x80000000, diff --git a/lib/automake.mk b/lib/automake.mk index b2d6dc24a..ffaf89ab7 100644 --- a/lib/automake.mk +++ b/lib/automake.mk @@ -201,6 +201,8 @@ lib_libopenvswitch_a_SOURCES = \ lib/svec.h \ lib/table.c \ lib/table.h \ + lib/tag.c \ + lib/tag.h \ lib/timer.c \ lib/timer.h \ lib/timeval.c \ diff --git a/lib/classifier.c b/lib/classifier.c index 36eb1f0bf..53487a45a 100644 --- a/lib/classifier.c +++ b/lib/classifier.c @@ -154,6 +154,7 @@ classifier_init(struct classifier *cls) cls->n_rules = 0; hmap_init(&cls->tables); list_init(&cls->tables_priority); + hmap_init(&cls->partitions); ovs_rwlock_init(&cls->rwlock); } @@ -163,12 +164,20 @@ void classifier_destroy(struct classifier *cls) { if (cls) { + struct cls_table *partition, *next_partition; struct cls_table *table, *next_table; HMAP_FOR_EACH_SAFE (table, next_table, hmap_node, &cls->tables) { destroy_table(cls, table); } hmap_destroy(&cls->tables); + + HMAP_FOR_EACH_SAFE (partition, next_partition, hmap_node, + &cls->partitions) { + hmap_remove(&cls->partitions, &partition->hmap_node); + free(partition); + } + hmap_destroy(&cls->partitions); ovs_rwlock_destroy(&cls->rwlock); } } @@ -187,6 +196,44 @@ classifier_count(const struct classifier *cls) return cls->n_rules; } +static uint32_t +hash_metadata(ovs_be64 metadata_) +{ + uint64_t metadata = (OVS_FORCE uint64_t) metadata_; + return hash_2words(metadata, metadata >> 32); +} + +static struct cls_partition * +find_partition(const struct classifier *cls, ovs_be64 metadata, uint32_t hash) +{ + struct cls_partition *partition; + + HMAP_FOR_EACH_IN_BUCKET (partition, hmap_node, hash, &cls->partitions) { + if (partition->metadata == metadata) { + return partition; + } + } + + return NULL; +} + +static struct cls_partition * +create_partition(struct classifier *cls, struct cls_table *table, + ovs_be64 metadata) +{ + uint32_t hash = hash_metadata(metadata); + struct cls_partition *partition = find_partition(cls, metadata, hash); + if (!partition) { + partition = xmalloc(sizeof *partition); + partition->metadata = metadata; + partition->tags = 0; + tag_tracker_init(&partition->tracker); + hmap_insert(&cls->partitions, &partition->hmap_node, hash); + } + tag_tracker_add(&partition->tracker, &partition->tags, table->tag); + return partition; +} + /* Inserts 'rule' into 'cls'. Until 'rule' is removed from 'cls', the caller * must not modify or free it. * @@ -213,8 +260,17 @@ classifier_replace(struct classifier *cls, struct cls_rule *rule) old_rule = insert_rule(cls, table, rule); if (!old_rule) { + if (minimask_get_metadata_mask(&rule->match.mask) == OVS_BE64_MAX) { + ovs_be64 metadata = miniflow_get_metadata(&rule->match.flow); + rule->partition = create_partition(cls, table, metadata); + } else { + rule->partition = NULL; + } + table->n_table_rules++; cls->n_rules++; + } else { + rule->partition = old_rule->partition; } return old_rule; } @@ -238,6 +294,7 @@ classifier_insert(struct classifier *cls, struct cls_rule *rule) void classifier_remove(struct classifier *cls, struct cls_rule *rule) { + struct cls_partition *partition; struct cls_rule *head; struct cls_table *table; @@ -255,6 +312,16 @@ classifier_remove(struct classifier *cls, struct cls_rule *rule) hmap_replace(&table->rules, &rule->hmap_node, &next->hmap_node); } + partition = rule->partition; + if (partition) { + tag_tracker_subtract(&partition->tracker, &partition->tags, + table->tag); + if (!partition->tags) { + hmap_remove(&cls->partitions, &partition->hmap_node); + free(partition); + } + } + if (--table->n_table_rules == 0) { destroy_table(cls, table); } else { @@ -275,13 +342,44 @@ struct cls_rule * classifier_lookup(const struct classifier *cls, const struct flow *flow, struct flow_wildcards *wc) { + const struct cls_partition *partition; struct cls_table *table; struct cls_rule *best; + tag_type tags; + + /* Determine 'tags' such that, if 'table->tag' doesn't intersect them, then + * 'flow' cannot possibly match in 'table': + * + * - If flow->metadata maps to a given 'partition', then we can use + * 'tags' for 'partition->tags'. + * + * - If flow->metadata has no partition, then no rule in 'cls' has an + * exact-match for flow->metadata. That means that we don't need to + * search any table that includes flow->metadata in its mask. + * + * In either case, we always need to search any cls_tables that do not + * include flow->metadata in its mask. One way to do that would be to + * check the "cls_table"s explicitly for that, but that would require an + * extra branch per table. Instead, we mark such a cls_table's 'tags' as + * TAG_ALL and make sure that 'tags' is never empty. This means that + * 'tags' always intersects such a cls_table's 'tags', so we don't need a + * special case. + */ + partition = (hmap_is_empty(&cls->partitions) + ? NULL + : find_partition(cls, flow->metadata, + hash_metadata(flow->metadata))); + tags = partition ? partition->tags : TAG_ARBITRARY; best = NULL; LIST_FOR_EACH (table, list_node, &cls->tables_priority) { - struct cls_rule *rule = find_match(table, flow); + struct cls_rule *rule; + if (!tag_intersects(tags, table->tag)) { + continue; + } + + rule = find_match(table, flow); if (wc) { flow_wildcards_fold_minimask(wc, &table->mask); } @@ -293,6 +391,10 @@ classifier_lookup(const struct classifier *cls, const struct flow *flow, * can not find anything better. */ return best; } + if (!tag_intersects(tags, table->tag)) { + continue; + } + rule = find_match(table, flow); if (wc) { flow_wildcards_fold_minimask(wc, &table->mask); @@ -550,6 +652,7 @@ find_table(const struct classifier *cls, const struct minimask *mask) static struct cls_table * insert_table(struct classifier *cls, const struct minimask *mask) { + uint32_t hash = minimask_hash(mask, 0); struct cls_table *table; table = xzalloc(sizeof *table); @@ -557,6 +660,9 @@ insert_table(struct classifier *cls, const struct minimask *mask) minimask_clone(&table->mask, mask); hmap_insert(&cls->tables, &table->hmap_node, minimask_hash(mask, 0)); list_push_back(&cls->tables_priority, &table->list_node); + table->tag = (minimask_get_metadata_mask(mask) == OVS_BE64_MAX + ? tag_create_deterministic(hash) + : TAG_ALL); return table; } @@ -668,8 +774,7 @@ find_match(const struct cls_table *table, const struct flow *flow) struct cls_rule *rule; HMAP_FOR_EACH_WITH_HASH (rule, hmap_node, hash, &table->rules) { - if (miniflow_equal_flow_in_minimask(&rule->match.flow, flow, - &table->mask)) { + if (minimatch_matches_flow(&rule->match, flow)) { return rule; } } diff --git a/lib/classifier.h b/lib/classifier.h index a795b4a18..0e39012b8 100644 --- a/lib/classifier.h +++ b/lib/classifier.h @@ -19,11 +19,80 @@ /* Flow classifier. * - * A classifier is a "struct classifier", - * a hash map from a set of wildcards to a "struct cls_table", - * a hash map from fixed field values to "struct cls_rule", - * which can contain a list of otherwise identical rules - * with lower priorities. + * + * What? + * ===== + * + * A flow classifier holds any number of "rules", each of which specifies + * values to match for some fields or subfields and a priority. The primary + * design goal for the classifier is that, given a packet, it can as quickly as + * possible find the highest-priority rule that matches the packet. + * + * Each OpenFlow table is implemented as a flow classifier. + * + * + * Basic Design + * ============ + * + * Suppose that all the rules in a classifier had the same form. For example, + * suppose that they all matched on the source and destination Ethernet address + * and wildcarded all the other fields. Then the obvious way to implement a + * classifier would be a hash table on the source and destination Ethernet + * addresses. If new classification rules came along with a different form, + * you could add a second hash table that hashed on the fields matched in those + * rules. With two hash tables, you look up a given flow in each hash table. + * If there are no matches, the classifier didn't contain a match; if you find + * a match in one of them, that's the result; if you find a match in both of + * them, then the result is the rule with the higher priority. + * + * This is how the classifier works. In a "struct classifier", each form of + * "struct cls_rule" present (based on its ->match.mask) goes into a separate + * "struct cls_table". A lookup does a hash lookup in every "struct cls_table" + * in the classifier and tracks the highest-priority match that it finds. The + * tables are kept in a descending priority order according to the highest + * priority rule in each table, which allows lookup to skip over tables that + * can't possibly have a higher-priority match than already found. + * + * One detail: a classifier can contain multiple rules that are identical other + * than their priority. When this happens, only the highest priority rule out + * of a group of otherwise identical rules is stored directly in the "struct + * cls_table", with the other almost-identical rules chained off a linked list + * inside that highest-priority rule. + * + * + * Partitioning + * ============ + * + * Suppose that a given classifier is being used to handle multiple stages in a + * pipeline using "resubmit", with metadata (that is, the OpenFlow 1.1+ field + * named "metadata") distinguishing between the different stages. For example, + * metadata value 1 might identify ingress rules, metadata value 2 might + * identify ACLs, and metadata value 3 might identify egress rules. Such a + * classifier is essentially partitioned into multiple sub-classifiers on the + * basis of the metadata value. + * + * The classifier has a special optimization to speed up matching in this + * scenario: + * + * - Each cls_table that matches on metadata gets a tag derived from the + * table's mask, so that it is likely that each table has a unique tag. + * (Duplicate tags have a performance cost but do not affect + * correctness.) + * + * - For each metadata value matched by any cls_rule, the classifier + * constructs a "struct cls_partition" indexed by the metadata value. + * The cls_partition has a 'tags' member whose value is the bitwise-OR of + * the tags of each cls_table that contains any rule that matches on the + * cls_partition's metadata value. In other words, struct cls_partition + * associates metadata values with tables that need to be checked with + * flows with that specific metadata value. + * + * Thus, a flow lookup can start by looking up the partition associated with + * the flow's metadata, and then skip over any cls_table whose 'tag' does not + * intersect the partition's 'tags'. (The flow must also be looked up in any + * cls_table that doesn't match on metadata. We handle that by giving any such + * cls_table TAG_ALL as its 'tags' so that it matches any tag.) + * * * Thread-safety * ============= @@ -37,6 +106,7 @@ #include "hmap.h" #include "list.h" #include "match.h" +#include "tag.h" #include "openflow/nicira-ext.h" #include "openflow/openflow.h" #include "ovs-thread.h" @@ -54,6 +124,7 @@ struct classifier { int n_rules; /* Total number of rules. */ struct hmap tables; /* Contains "struct cls_table"s. */ struct list tables_priority; /* Tables in descending priority order */ + struct hmap partitions; /* Contains "struct cls_partition"s. */ struct ovs_rwlock rwlock OVS_ACQ_AFTER(ofproto_mutex); }; @@ -66,6 +137,7 @@ struct cls_table { int n_table_rules; /* Number of rules, including duplicates. */ unsigned int max_priority; /* Max priority of any rule in the table. */ unsigned int max_count; /* Count of max_priority rules. */ + tag_type tag; /* Tag generated from mask for partitioning. */ }; /* Returns true if 'table' is a "catch-all" table that will match every @@ -82,6 +154,17 @@ struct cls_rule { struct list list; /* List of identical, lower-priority rules. */ struct minimatch match; /* Matching rule. */ unsigned int priority; /* Larger numbers are higher priorities. */ + struct cls_partition *partition; +}; + +/* Associates a metadata value (that is, a value of the OpenFlow 1.1+ metadata + * field) with tags for the "cls_table"s that contain rules that match that + * metadata value. */ +struct cls_partition { + struct hmap_node hmap_node; /* In struct classifier's 'partitions' hmap. */ + ovs_be64 metadata; /* metadata value for this partition. */ + tag_type tags; /* OR of each included flow's cls_table tag. */ + struct tag_tracker tracker; /* Tracks the bits in 'tags'. */ }; void cls_rule_init(struct cls_rule *, const struct match *, diff --git a/lib/coverage-unixctl.man b/lib/coverage-unixctl.man index 971889419..8e5df818e 100644 --- a/lib/coverage-unixctl.man +++ b/lib/coverage-unixctl.man @@ -8,4 +8,6 @@ main loop takes unusually long to run. Coverage counters are useful mainly for performance analysis and debugging. .IP "\fBcoverage/show\fR" -Displays the values of all of the coverage counters. +Displays the averaged per-second rates for the last few seconds, the +last minute and the last hour, and the total counts of all of the +coverage counters. diff --git a/lib/coverage.c b/lib/coverage.c index 23e29973d..43647344b 100644 --- a/lib/coverage.c +++ b/lib/coverage.c @@ -63,7 +63,14 @@ struct coverage_counter *coverage_counters[] = { static struct ovs_mutex coverage_mutex = OVS_MUTEX_INITIALIZER; +static long long int coverage_run_time = LLONG_MIN; + +/* Index counter used to compute the moving average array's index. */ +static unsigned int idx_count = 0; + static void coverage_read(struct svec *); +static unsigned int coverage_array_sum(const unsigned int *arr, + const unsigned int len); static void coverage_unixctl_show(struct unixctl_conn *conn, int argc OVS_UNUSED, @@ -206,6 +213,7 @@ coverage_log(void) static void coverage_read(struct svec *lines) { + struct coverage_counter **c = coverage_counters; unsigned long long int *totals; size_t n_never_hit; uint32_t hash; @@ -215,24 +223,37 @@ coverage_read(struct svec *lines) n_never_hit = 0; svec_add_nocopy(lines, - xasprintf("Event coverage, hash=%08"PRIx32":", hash)); + xasprintf("Event coverage, avg rate over last: %d " + "seconds, last minute, last hour, " + "hash=%08"PRIx32":", + COVERAGE_RUN_INTERVAL/1000, hash)); totals = xmalloc(n_coverage_counters * sizeof *totals); ovs_mutex_lock(&coverage_mutex); for (i = 0; i < n_coverage_counters; i++) { - totals[i] = coverage_counters[i]->total; + totals[i] = c[i]->total; } ovs_mutex_unlock(&coverage_mutex); for (i = 0; i < n_coverage_counters; i++) { if (totals[i]) { - svec_add_nocopy(lines, xasprintf("%-24s %9llu", - coverage_counters[i]->name, - totals[i])); + /* Shows the averaged per-second rates for the last + * COVERAGE_RUN_INTERVAL interval, the last minute and + * the last hour. */ + svec_add_nocopy(lines, + xasprintf("%-24s %5.1f/sec %9.3f/sec " + "%13.4f/sec total: %llu", + c[i]->name, + (c[i]->min[(idx_count - 1) % MIN_AVG_LEN] + * 1000.0 / COVERAGE_RUN_INTERVAL), + coverage_array_sum(c[i]->min, MIN_AVG_LEN) / 60.0, + coverage_array_sum(c[i]->hr, HR_AVG_LEN) / 3600.0, + totals[i])); } else { n_never_hit++; } } + svec_add_nocopy(lines, xasprintf("%zu events never hit", n_never_hit)); free(totals); } @@ -249,3 +270,85 @@ coverage_clear(void) } ovs_mutex_unlock(&coverage_mutex); } + +/* Runs approximately every COVERAGE_RUN_INTERVAL amount of time to update the + * coverage counters' 'min' and 'hr' array. 'min' array is for cumulating + * per second counts into per minute count. 'hr' array is for cumulating per + * minute counts into per hour count. Every thread may call this function. */ +void +coverage_run(void) +{ + /* Defines the moving average array index variables. */ + static unsigned int min_idx, hr_idx; + struct coverage_counter **c = coverage_counters; + long long int now; + + ovs_mutex_lock(&coverage_mutex); + now = time_msec(); + /* Initialize the coverage_run_time. */ + if (coverage_run_time == LLONG_MIN) { + coverage_run_time = now + COVERAGE_RUN_INTERVAL; + } + + if (now >= coverage_run_time) { + size_t i, j; + /* Computes the number of COVERAGE_RUN_INTERVAL slots, since + * it is possible that the actual run interval is multiple of + * COVERAGE_RUN_INTERVAL. */ + int slots = (now - coverage_run_time) / COVERAGE_RUN_INTERVAL + 1; + + for (i = 0; i < n_coverage_counters; i++) { + unsigned int count, portion; + unsigned int m_idx = min_idx; + unsigned int h_idx = hr_idx; + unsigned int idx = idx_count; + + /* Computes the differences between the current total and the one + * recorded in last invocation of coverage_run(). */ + count = c[i]->total - c[i]->last_total; + c[i]->last_total = c[i]->total; + /* The count over the time interval is evenly distributed + * among slots by calculating the portion. */ + portion = count / slots; + + for (j = 0; j < slots; j++) { + /* Updates the index variables. */ + /* The m_idx is increased from 0 to MIN_AVG_LEN - 1. Every + * time the m_idx finishes a cycle (a cycle is one minute), + * the h_idx is incremented by 1. */ + m_idx = idx % MIN_AVG_LEN; + h_idx = idx / MIN_AVG_LEN; + + c[i]->min[m_idx] = portion + (j == (slots - 1) + ? count % slots : 0); + c[i]->hr[h_idx] = m_idx == 0 + ? c[i]->min[m_idx] + : (c[i]->hr[h_idx] + c[i]->min[m_idx]); + /* This is to guarantee that h_idx ranges from 0 to 59. */ + idx = (idx + 1) % (MIN_AVG_LEN * HR_AVG_LEN); + } + } + + /* Updates the global index variables. */ + idx_count = (idx_count + slots) % (MIN_AVG_LEN * HR_AVG_LEN); + min_idx = idx_count % MIN_AVG_LEN; + hr_idx = idx_count / MIN_AVG_LEN; + /* Updates the run time. */ + coverage_run_time = now + COVERAGE_RUN_INTERVAL; + } + ovs_mutex_unlock(&coverage_mutex); +} + +static unsigned int +coverage_array_sum(const unsigned int *arr, const unsigned int len) +{ + unsigned int sum = 0; + size_t i; + + ovs_mutex_lock(&coverage_mutex); + for (i = 0; i < len; i++) { + sum += arr[i]; + } + ovs_mutex_unlock(&coverage_mutex); + return sum; +} diff --git a/lib/coverage.h b/lib/coverage.h index 3d1a115d4..163728eba 100644 --- a/lib/coverage.h +++ b/lib/coverage.h @@ -30,11 +30,25 @@ #include "ovs-thread.h" #include "vlog.h" +/* Makes coverage_run run every 5000 ms (5 seconds). + * If this value is redefined, the new value must + * divide 60000 (1 minute). */ +#define COVERAGE_RUN_INTERVAL 5000 +BUILD_ASSERT_DECL(60000 % COVERAGE_RUN_INTERVAL == 0); + +/* Defines the moving average array length. */ +#define MIN_AVG_LEN (60000/COVERAGE_RUN_INTERVAL) +#define HR_AVG_LEN 60 + /* A coverage counter. */ struct coverage_counter { const char *const name; /* Textual name. */ unsigned int (*const count)(void); /* Gets, zeros this thread's count. */ unsigned long long int total; /* Total count. */ + unsigned long long int last_total; + /* The moving average arrays. */ + unsigned int min[MIN_AVG_LEN]; + unsigned int hr[HR_AVG_LEN]; }; /* Defines COUNTER. There must be exactly one such definition at file scope @@ -56,7 +70,7 @@ struct coverage_counter { } \ extern struct coverage_counter counter_##COUNTER; \ struct coverage_counter counter_##COUNTER \ - = { #COUNTER, COUNTER##_count, 0 }; \ + = { #COUNTER, COUNTER##_count, 0, 0, {0}, {0} }; \ extern struct coverage_counter *counter_ptr_##COUNTER; \ struct coverage_counter *counter_ptr_##COUNTER \ __attribute__((section("coverage"))) = &counter_##COUNTER @@ -80,6 +94,7 @@ struct coverage_counter { void coverage_init(void); void coverage_log(void); void coverage_clear(void); +void coverage_run(void); /* Implementation detail. */ #define COVERAGE_DEFINE__(COUNTER) \ diff --git a/lib/dpif.c b/lib/dpif.c index bb95502c6..16819113f 100644 --- a/lib/dpif.c +++ b/lib/dpif.c @@ -51,8 +51,6 @@ COVERAGE_DEFINE(dpif_flow_flush); COVERAGE_DEFINE(dpif_flow_get); COVERAGE_DEFINE(dpif_flow_put); COVERAGE_DEFINE(dpif_flow_del); -COVERAGE_DEFINE(dpif_flow_query_list); -COVERAGE_DEFINE(dpif_flow_query_list_n); COVERAGE_DEFINE(dpif_execute); COVERAGE_DEFINE(dpif_purge); @@ -1352,7 +1350,7 @@ log_flow_message(const struct dpif *dpif, int error, const char *operation, if (error) { ds_put_format(&ds, "(%s) ", ovs_strerror(error)); } - odp_flow_format(key, key_len, mask, mask_len, &ds, true); + odp_flow_format(key, key_len, mask, mask_len, NULL, &ds, true); if (stats) { ds_put_cstr(&ds, ", "); dpif_flow_stats_format(stats, &ds); diff --git a/lib/flow.c b/lib/flow.c index 9ab19617a..0678c6fde 100644 --- a/lib/flow.c +++ b/lib/flow.c @@ -36,9 +36,6 @@ #include "openflow/openflow.h" #include "packets.h" #include "unaligned.h" -#include "vlog.h" - -VLOG_DEFINE_THIS_MODULE(flow); COVERAGE_DEFINE(flow_extract); COVERAGE_DEFINE(miniflow_malloc); @@ -103,9 +100,11 @@ static void parse_mpls(struct ofpbuf *b, struct flow *flow) { struct mpls_hdr *mh; + bool top = true; while ((mh = ofpbuf_try_pull(b, sizeof *mh))) { - if (flow->mpls_depth++ == 0) { + if (top) { + top = false; flow->mpls_lse = mh->mpls_lse; } if (mh->mpls_lse & htonl(MPLS_BOS_MASK)) { @@ -514,7 +513,7 @@ flow_zero_wildcards(struct flow *flow, const struct flow_wildcards *wildcards) void flow_get_metadata(const struct flow *flow, struct flow_metadata *fmd) { - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 20); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 21); fmd->tun_id = flow->tunnel.tun_id; fmd->tun_src = flow->tunnel.ip_src; @@ -609,7 +608,6 @@ void flow_wildcards_init_exact(struct flow_wildcards *wc) { memset(&wc->masks, 0xff, sizeof wc->masks); - memset(wc->masks.zeros, 0, sizeof wc->masks.zeros); } /* Returns true if 'wc' matches every packet, false if 'wc' fixes any bits or @@ -1093,13 +1091,38 @@ miniflow_alloc_values(struct miniflow *flow, int n) } } +/* Completes an initialization of 'dst' as a miniflow copy of 'src' begun by + * the caller. The caller must have already initialized 'dst->map' properly + * to indicate the nonzero uint32_t elements of 'src'. 'n' must be the number + * of 1-bits in 'dst->map'. + * + * This function initializes 'dst->values' (either inline if possible or with + * malloc() otherwise) and copies the nonzero uint32_t elements of 'src' into + * it. */ +static void +miniflow_init__(struct miniflow *dst, const struct flow *src, int n) +{ + const uint32_t *src_u32 = (const uint32_t *) src; + unsigned int ofs; + int i; + + dst->values = miniflow_alloc_values(dst, n); + ofs = 0; + for (i = 0; i < MINI_N_MAPS; i++) { + uint32_t map; + + for (map = dst->map[i]; map; map = zero_rightmost_1bit(map)) { + dst->values[ofs++] = src_u32[raw_ctz(map) + i * 32]; + } + } +} + /* Initializes 'dst' as a copy of 'src'. The caller must eventually free 'dst' * with miniflow_destroy(). */ void miniflow_init(struct miniflow *dst, const struct flow *src) { const uint32_t *src_u32 = (const uint32_t *) src; - unsigned int ofs; unsigned int i; int n; @@ -1113,16 +1136,17 @@ miniflow_init(struct miniflow *dst, const struct flow *src) } } - /* Initialize dst->values. */ - dst->values = miniflow_alloc_values(dst, n); - ofs = 0; - for (i = 0; i < MINI_N_MAPS; i++) { - uint32_t map; + miniflow_init__(dst, src, n); +} - for (map = dst->map[i]; map; map = zero_rightmost_1bit(map)) { - dst->values[ofs++] = src_u32[raw_ctz(map) + i * 32]; - } - } +/* Initializes 'dst' as a copy of 'src', using 'mask->map' as 'dst''s map. The + * caller must eventually free 'dst' with miniflow_destroy(). */ +void +miniflow_init_with_minimask(struct miniflow *dst, const struct flow *src, + const struct minimask *mask) +{ + memcpy(dst->map, mask->masks.map, sizeof dst->map); + miniflow_init__(dst, src, miniflow_n_values(dst)); } /* Initializes 'dst' as a copy of 'src'. The caller must eventually free 'dst' @@ -1220,16 +1244,35 @@ miniflow_get_vid(const struct miniflow *flow) bool miniflow_equal(const struct miniflow *a, const struct miniflow *b) { + const uint32_t *ap = a->values; + const uint32_t *bp = b->values; int i; for (i = 0; i < MINI_N_MAPS; i++) { - if (a->map[i] != b->map[i]) { - return false; + const uint32_t a_map = a->map[i]; + const uint32_t b_map = b->map[i]; + uint32_t map; + + if (a_map == b_map) { + for (map = a_map; map; map = zero_rightmost_1bit(map)) { + if (*ap++ != *bp++) { + return false; + } + } + } else { + for (map = a_map | b_map; map; map = zero_rightmost_1bit(map)) { + uint32_t bit = rightmost_1bit(map); + uint32_t a_value = a_map & bit ? *ap++ : 0; + uint32_t b_value = b_map & bit ? *bp++ : 0; + + if (a_value != b_value) { + return false; + } + } } } - return !memcmp(a->values, b->values, - miniflow_n_values(a) * sizeof *a->values); + return true; } /* Returns true if 'a' and 'b' are equal at the places where there are 1-bits @@ -1289,10 +1332,24 @@ miniflow_equal_flow_in_minimask(const struct miniflow *a, const struct flow *b, uint32_t miniflow_hash(const struct miniflow *flow, uint32_t basis) { - BUILD_ASSERT_DECL(MINI_N_MAPS == 2); - return hash_3words(flow->map[0], flow->map[1], - hash_words(flow->values, miniflow_n_values(flow), - basis)); + const uint32_t *p = flow->values; + uint32_t hash = basis; + int i; + + for (i = 0; i < MINI_N_MAPS; i++) { + uint32_t hash_map = 0; + uint32_t map; + + for (map = flow->map[i]; map; map = zero_rightmost_1bit(map)) { + if (*p) { + hash = mhash_add(hash, *p); + hash_map |= rightmost_1bit(map); + } + p++; + } + hash = mhash_add(hash, hash_map); + } + return mhash_finish(hash, p - flow->values); } /* Returns a hash value for the bits of 'flow' where there are 1-bits in @@ -1313,9 +1370,10 @@ miniflow_hash_in_minimask(const struct miniflow *flow, uint32_t map; for (map = mask->masks.map[i]; map; map = zero_rightmost_1bit(map)) { - int ofs = raw_ctz(map) + i * 32; - - hash = mhash_add(hash, miniflow_get(flow, ofs) & *p); + if (*p) { + int ofs = raw_ctz(map) + i * 32; + hash = mhash_add(hash, miniflow_get(flow, ofs) & *p); + } p++; } } @@ -1332,21 +1390,23 @@ uint32_t flow_hash_in_minimask(const struct flow *flow, const struct minimask *mask, uint32_t basis) { - const uint32_t *flow_u32 = (const uint32_t *) flow; + const uint32_t *flow_u32; const uint32_t *p = mask->masks.values; uint32_t hash; int i; hash = basis; + flow_u32 = (const uint32_t *) flow; for (i = 0; i < MINI_N_MAPS; i++) { uint32_t map; for (map = mask->masks.map[i]; map; map = zero_rightmost_1bit(map)) { - int ofs = raw_ctz(map) + i * 32; - - hash = mhash_add(hash, flow_u32[ofs] & *p); + if (*p) { + hash = mhash_add(hash, flow_u32[raw_ctz(map)] & *p); + } p++; } + flow_u32 += 32; } return mhash_finish(hash, (p - mask->masks.values) * 4); @@ -1487,7 +1547,17 @@ bool minimask_is_catchall(const struct minimask *mask_) { const struct miniflow *mask = &mask_->masks; + const uint32_t *p = mask->values; + int i; - BUILD_ASSERT(MINI_N_MAPS == 2); - return !(mask->map[0] | mask->map[1]); + for (i = 0; i < MINI_N_MAPS; i++) { + uint32_t map; + + for (map = mask->map[i]; map; map = zero_rightmost_1bit(map)) { + if (*p++) { + return false; + } + } + } + return true; } diff --git a/lib/flow.h b/lib/flow.h index 75d95e8ec..4bd1504ee 100644 --- a/lib/flow.h +++ b/lib/flow.h @@ -21,6 +21,7 @@ #include #include #include +#include "byte-order.h" #include "openflow/nicira-ext.h" #include "openflow/openflow.h" #include "hash.h" @@ -36,7 +37,7 @@ struct ofpbuf; /* This sequence number should be incremented whenever anything involving flows * or the wildcarding of flows changes. This will cause build assertion * failures in places which likely need to be updated. */ -#define FLOW_WC_SEQ 20 +#define FLOW_WC_SEQ 21 #define FLOW_N_REGS 8 BUILD_ASSERT_DECL(FLOW_N_REGS <= NXM_NX_MAX_REGS); @@ -98,7 +99,6 @@ struct flow { union flow_in_port in_port; /* Input port.*/ uint32_t pkt_mark; /* Packet mark. */ ovs_be32 mpls_lse; /* MPLS label stack entry. */ - uint16_t mpls_depth; /* Depth of MPLS stack. */ ovs_be16 vlan_tci; /* If 802.1Q, TCI | VLAN_CFI; otherwise 0. */ ovs_be16 dl_type; /* Ethernet frame type. */ ovs_be16 tp_src; /* TCP/UDP/SCTP source port. */ @@ -111,15 +111,14 @@ struct flow { uint8_t arp_tha[6]; /* ARP/ND target hardware address. */ uint8_t nw_ttl; /* IP TTL/Hop Limit. */ uint8_t nw_frag; /* FLOW_FRAG_* flags. */ - uint8_t zeros[6]; }; BUILD_ASSERT_DECL(sizeof(struct flow) % 4 == 0); #define FLOW_U32S (sizeof(struct flow) / 4) /* Remember to update FLOW_WC_SEQ when changing 'struct flow'. */ -BUILD_ASSERT_DECL(sizeof(struct flow) == sizeof(struct flow_tnl) + 160 && - FLOW_WC_SEQ == 20); +BUILD_ASSERT_DECL(sizeof(struct flow) == sizeof(struct flow_tnl) + 152 && + FLOW_WC_SEQ == 21); /* Represents the metadata fields of struct flow. */ struct flow_metadata { @@ -291,7 +290,7 @@ bool flow_equal_except(const struct flow *a, const struct flow *b, * * The 'map' member holds one bit for each uint32_t in a "struct flow". Each * 0-bit indicates that the corresponding uint32_t is zero, each 1-bit that it - * is nonzero. + * *may* be nonzero. * * 'values' points to the start of an array that has one element for each 1-bit * in 'map'. The least-numbered 1-bit is in values[0], the next 1-bit is in @@ -309,9 +308,9 @@ bool flow_equal_except(const struct flow *a, const struct flow *b, * that makes sense. So far that's only proved useful for * minimask_combine(), but the principle works elsewhere. * - * The implementation maintains and depends on the invariant that every element - * in 'values' is nonzero; that is, wherever a 1-bit appears in 'map', the - * corresponding element of 'values' must be nonzero. + * Elements in 'values' are allowed to be zero. This is useful for "struct + * minimatch", for which ensuring that the miniflow and minimask members have + * same 'map' allows optimization . */ struct miniflow { uint32_t *values; @@ -320,6 +319,8 @@ struct miniflow { }; void miniflow_init(struct miniflow *, const struct flow *); +void miniflow_init_with_minimask(struct miniflow *, const struct flow *, + const struct minimask *); void miniflow_clone(struct miniflow *, const struct miniflow *); void miniflow_move(struct miniflow *dst, struct miniflow *); void miniflow_destroy(struct miniflow *); @@ -328,6 +329,7 @@ void miniflow_expand(const struct miniflow *, struct flow *); uint32_t miniflow_get(const struct miniflow *, unsigned int u32_ofs); uint16_t miniflow_get_vid(const struct miniflow *); +static inline ovs_be64 miniflow_get_metadata(const struct miniflow *); bool miniflow_equal(const struct miniflow *a, const struct miniflow *b); bool miniflow_equal_in_minimask(const struct miniflow *a, @@ -361,11 +363,36 @@ void minimask_expand(const struct minimask *, struct flow_wildcards *); uint32_t minimask_get(const struct minimask *, unsigned int u32_ofs); uint16_t minimask_get_vid_mask(const struct minimask *); +static inline ovs_be64 minimask_get_metadata_mask(const struct minimask *); bool minimask_equal(const struct minimask *a, const struct minimask *b); uint32_t minimask_hash(const struct minimask *, uint32_t basis); bool minimask_has_extra(const struct minimask *, const struct minimask *); bool minimask_is_catchall(const struct minimask *); + +/* Returns the value of the OpenFlow 1.1+ "metadata" field in 'flow'. */ +static inline ovs_be64 +miniflow_get_metadata(const struct miniflow *flow) +{ + enum { MD_OFS = offsetof(struct flow, metadata) }; + BUILD_ASSERT_DECL(MD_OFS % sizeof(uint32_t) == 0); + ovs_be32 hi = (OVS_FORCE ovs_be32) miniflow_get(flow, MD_OFS / 4); + ovs_be32 lo = (OVS_FORCE ovs_be32) miniflow_get(flow, MD_OFS / 4 + 1); + + return htonll(((uint64_t) ntohl(hi) << 32) | ntohl(lo)); +} + +/* Returns the mask for the OpenFlow 1.1+ "metadata" field in 'mask'. + * + * The return value is all-1-bits if 'mask' matches on the whole value of the + * metadata field, all-0-bits if 'mask' entirely wildcards the metadata field, + * or some other value if the metadata field is partially matched, partially + * wildcarded. */ +static inline ovs_be64 +minimask_get_metadata_mask(const struct minimask *mask) +{ + return miniflow_get_metadata(&mask->masks); +} #endif /* flow.h */ diff --git a/lib/jsonrpc.c b/lib/jsonrpc.c index e02f03579..cef96a9cc 100644 --- a/lib/jsonrpc.c +++ b/lib/jsonrpc.c @@ -59,22 +59,21 @@ static void jsonrpc_cleanup(struct jsonrpc *); static void jsonrpc_error(struct jsonrpc *, int error); /* This is just the same as stream_open() except that it uses the default - * JSONRPC ports if none is specified. */ + * JSONRPC port if none is specified. */ int jsonrpc_stream_open(const char *name, struct stream **streamp, uint8_t dscp) { - return stream_open_with_default_ports(name, JSONRPC_TCP_PORT, - JSONRPC_SSL_PORT, streamp, - dscp); + return stream_open_with_default_port(name, OVSDB_OLD_PORT, + streamp, dscp); } /* This is just the same as pstream_open() except that it uses the default - * JSONRPC ports if none is specified. */ + * JSONRPC port if none is specified. */ int jsonrpc_pstream_open(const char *name, struct pstream **pstreamp, uint8_t dscp) { - return pstream_open_with_default_ports(name, JSONRPC_TCP_PORT, - JSONRPC_SSL_PORT, pstreamp, dscp); + return pstream_open_with_default_port(name, OVSDB_OLD_PORT, + pstreamp, dscp); } /* Returns a new JSON-RPC stream that uses 'stream' for input and output. The diff --git a/lib/jsonrpc.h b/lib/jsonrpc.h index 0ae205d32..539720027 100644 --- a/lib/jsonrpc.h +++ b/lib/jsonrpc.h @@ -34,10 +34,12 @@ struct stream; /* Default port numbers. * - * There is nothing standard about these port numbers. They are simply what - * we have chosen. */ -#define JSONRPC_TCP_PORT 6632 -#define JSONRPC_SSL_PORT 6632 + * OVSDB_OLD_PORT defines the original port number used by OVS. + * OVSDB_PORT defines the official port number assigned by IANA. By + * default, we still uses OVSDB_OLD_PORT, but we present a warning that + * that will change. */ +#define OVSDB_OLD_PORT 6632 +#define OVSDB_PORT 6640 int jsonrpc_stream_open(const char *name, struct stream **, uint8_t dscp); int jsonrpc_pstream_open(const char *name, struct pstream **, uint8_t dscp); diff --git a/lib/lockfile.c b/lib/lockfile.c index 43e55927e..d8f3952fb 100644 --- a/lib/lockfile.c +++ b/lib/lockfile.c @@ -35,7 +35,6 @@ VLOG_DEFINE_THIS_MODULE(lockfile); COVERAGE_DEFINE(lockfile_lock); -COVERAGE_DEFINE(lockfile_timeout); COVERAGE_DEFINE(lockfile_error); COVERAGE_DEFINE(lockfile_unlock); diff --git a/lib/mac-learning.c b/lib/mac-learning.c index 80dac6982..fe0674404 100644 --- a/lib/mac-learning.c +++ b/lib/mac-learning.c @@ -29,9 +29,6 @@ #include "unaligned.h" #include "util.h" #include "vlan-bitmap.h" -#include "vlog.h" - -VLOG_DEFINE_THIS_MODULE(mac_learning); COVERAGE_DEFINE(mac_learning_learned); COVERAGE_DEFINE(mac_learning_expired); diff --git a/lib/match.c b/lib/match.c index 03413fa5c..93f61f98a 100644 --- a/lib/match.c +++ b/lib/match.c @@ -21,10 +21,6 @@ #include "dynamic-string.h" #include "ofp-util.h" #include "packets.h" -#include "vlog.h" - -VLOG_DEFINE_THIS_MODULE(match); - /* Converts the flow in 'flow' into a match in 'match', with the given * 'wildcards'. */ @@ -835,7 +831,7 @@ match_format(const struct match *match, struct ds *s, unsigned int priority) int i; - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 20); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 21); if (priority != OFP_DEFAULT_PRIORITY) { ds_put_format(s, "priority=%u,", priority); @@ -1091,8 +1087,8 @@ match_print(const struct match *match) void minimatch_init(struct minimatch *dst, const struct match *src) { - miniflow_init(&dst->flow, &src->flow); minimask_init(&dst->mask, &src->wc); + miniflow_init_with_minimask(&dst->flow, &src->flow, &dst->mask); } /* Initializes 'dst' as a copy of 'src'. The caller must eventually free 'dst' @@ -1145,6 +1141,35 @@ minimatch_hash(const struct minimatch *match, uint32_t basis) return miniflow_hash(&match->flow, minimask_hash(&match->mask, basis)); } +/* Returns true if 'target' satisifies 'match', that is, if each bit for which + * 'match' specifies a particular value has the correct value in 'target'. + * + * This function is equivalent to miniflow_equal_flow_in_minimask(&match->flow, + * target, &match->mask) but it is faster because of the invariant that + * match->flow.map and match->mask.map are the same. */ +bool +minimatch_matches_flow(const struct minimatch *match, + const struct flow *target) +{ + const uint32_t *target_u32 = (const uint32_t *) target; + const uint32_t *flowp = match->flow.values; + const uint32_t *maskp = match->mask.masks.values; + int i; + + for (i = 0; i < MINI_N_MAPS; i++) { + uint32_t map; + + for (map = match->flow.map[i]; map; map = zero_rightmost_1bit(map)) { + if ((*flowp++ ^ target_u32[raw_ctz(map)]) & *maskp++) { + return false; + } + } + target_u32 += 32; + } + + return true; +} + /* Appends a string representation of 'match' to 's'. If 'priority' is * different from OFP_DEFAULT_PRIORITY, includes it in 's'. */ void diff --git a/lib/match.h b/lib/match.h index 7b104ee12..48c8aa244 100644 --- a/lib/match.h +++ b/lib/match.h @@ -132,13 +132,15 @@ void match_print(const struct match *); /* A sparse representation of a "struct match". * - * This has the same invariant as "struct match", that is, a 1-bit in the - * 'flow' must correspond to a 1-bit in 'mask'. + * There are two invariants: * - * The invariants for the underlying miniflow and minimask are also maintained, - * which means that 'flow' and 'mask' can have different 'map's. In - * particular, if the match checks that a given 32-bit field has value 0, then - * 'map' will have a 1-bit in 'mask' but a 0-bit in 'flow' for that field. */ + * - The same invariant as "struct match", that is, a 1-bit in the 'flow' + * must correspond to a 1-bit in 'mask'. + * + * - 'flow' and 'mask' have the same 'map'. This implies that 'flow' and + * 'mask' have the same part of "struct flow" at the same offset into + * 'values', which makes minimatch_matches_flow() faster. + */ struct minimatch { struct miniflow flow; struct minimask mask; @@ -154,6 +156,8 @@ void minimatch_expand(const struct minimatch *, struct match *); bool minimatch_equal(const struct minimatch *a, const struct minimatch *b); uint32_t minimatch_hash(const struct minimatch *, uint32_t basis); +bool minimatch_matches_flow(const struct minimatch *, const struct flow *); + void minimatch_format(const struct minimatch *, struct ds *, unsigned int priority); char *minimatch_to_string(const struct minimatch *, unsigned int priority); diff --git a/lib/netlink-socket.c b/lib/netlink-socket.c index 23c05c5e8..34205037c 100644 --- a/lib/netlink-socket.c +++ b/lib/netlink-socket.c @@ -40,7 +40,6 @@ VLOG_DEFINE_THIS_MODULE(netlink_socket); COVERAGE_DEFINE(netlink_overflow); COVERAGE_DEFINE(netlink_received); COVERAGE_DEFINE(netlink_recv_jumbo); -COVERAGE_DEFINE(netlink_send); COVERAGE_DEFINE(netlink_sent); /* Linux header file confusion causes this to be undefined. */ diff --git a/lib/netlink.c b/lib/netlink.c index 50444abd9..40477eaec 100644 --- a/lib/netlink.c +++ b/lib/netlink.c @@ -322,7 +322,7 @@ nl_msg_push_unspec_uninit(struct ofpbuf *msg, uint16_t type, size_t size) { size_t total_size = NLA_HDRLEN + size; struct nlattr* nla = nl_msg_push_uninit(msg, total_size); - ovs_assert(NLA_ALIGN(total_size) <= UINT16_MAX); + ovs_assert(!nl_attr_oversized(size)); nla->nla_len = total_size; nla->nla_type = type; return nla + 1; @@ -468,6 +468,16 @@ nl_msg_next(struct ofpbuf *buffer, struct ofpbuf *msg) msg->size = 0; return NULL; } + +/* Returns true if a Netlink attribute with a payload that is 'payload_size' + * bytes long would be oversized, that is, if it's not possible to create an + * nlattr of that size because its size wouldn't fit in the 16-bit nla_len + * field. */ +bool +nl_attr_oversized(size_t payload_size) +{ + return NL_ATTR_SIZE(payload_size) > UINT16_MAX; +} /* Attributes. */ diff --git a/lib/netlink.h b/lib/netlink.h index afe2277ec..21d49d38f 100644 --- a/lib/netlink.h +++ b/lib/netlink.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2009, 2010, 2011 Nicira, Inc. + * Copyright (c) 2008, 2009, 2010, 2011, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -103,6 +103,8 @@ struct nlmsghdr *nl_msg_next(struct ofpbuf *buffer, struct ofpbuf *msg); #define NL_A_BE32_SIZE NL_ATTR_SIZE(sizeof(ovs_be32)) #define NL_A_BE64_SIZE NL_ATTR_SIZE(sizeof(ovs_be64)) #define NL_A_FLAG_SIZE NL_ATTR_SIZE(0) + +bool nl_attr_oversized(size_t payload_size); /* Netlink attribute types. */ enum nl_attr_type diff --git a/lib/nx-match.c b/lib/nx-match.c index 2d7ee347a..8444ab759 100644 --- a/lib/nx-match.c +++ b/lib/nx-match.c @@ -570,7 +570,7 @@ nx_put_raw(struct ofpbuf *b, bool oxm, const struct match *match, int match_len; int i; - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 20); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 21); /* Metadata. */ if (match->wc.masks.in_port.ofp_port) { diff --git a/lib/odp-util.c b/lib/odp-util.c index aec4196a4..5c7ccfb63 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -51,7 +51,8 @@ static const char *delimiters = ", \t\r\n"; static int parse_odp_key_mask_attr(const char *, const struct simap *port_names, struct ofpbuf *, struct ofpbuf *); static void format_odp_key_attr(const struct nlattr *a, - const struct nlattr *ma, struct ds *ds, + const struct nlattr *ma, + const struct hmap *portno_names, struct ds *ds, bool verbose); /* Returns one the following for the action with the given OVS_ACTION_ATTR_* @@ -401,7 +402,7 @@ format_odp_action(struct ds *ds, const struct nlattr *a) break; case OVS_ACTION_ATTR_SET: ds_put_cstr(ds, "set("); - format_odp_key_attr(nl_attr_get(a), NULL, ds, true); + format_odp_key_attr(nl_attr_get(a), NULL, NULL, ds, true); ds_put_cstr(ds, ")"); break; case OVS_ACTION_ATTR_PUSH_VLAN: @@ -935,10 +936,49 @@ odp_mask_attr_is_exact(const struct nlattr *ma) return is_exact; } +void +odp_portno_names_set(struct hmap *portno_names, odp_port_t port_no, + char *port_name) +{ + struct odp_portno_names *odp_portno_names; + + odp_portno_names = xmalloc(sizeof *odp_portno_names); + odp_portno_names->port_no = port_no; + odp_portno_names->name = xstrdup(port_name); + hmap_insert(portno_names, &odp_portno_names->hmap_node, + hash_odp_port(port_no)); +} + +static char * +odp_portno_names_get(const struct hmap *portno_names, odp_port_t port_no) +{ + struct odp_portno_names *odp_portno_names; + + HMAP_FOR_EACH_IN_BUCKET (odp_portno_names, hmap_node, + hash_odp_port(port_no), portno_names) { + if (odp_portno_names->port_no == port_no) { + return odp_portno_names->name; + } + } + return NULL; +} + +void +odp_portno_names_destroy(struct hmap *portno_names) +{ + struct odp_portno_names *odp_portno_names, *odp_portno_names_next; + HMAP_FOR_EACH_SAFE (odp_portno_names, odp_portno_names_next, + hmap_node, portno_names) { + hmap_remove(portno_names, &odp_portno_names->hmap_node); + free(odp_portno_names->name); + free(odp_portno_names); + } +} static void format_odp_key_attr(const struct nlattr *a, const struct nlattr *ma, - struct ds *ds, bool verbose) + const struct hmap *portno_names, struct ds *ds, + bool verbose) { struct flow_tnl tun_key; enum ovs_key_attr attr = nl_attr_type(a); @@ -981,10 +1021,11 @@ format_odp_key_attr(const struct nlattr *a, const struct nlattr *ma, case OVS_KEY_ATTR_ENCAP: if (ma && nl_attr_get_size(ma) && nl_attr_get_size(a)) { odp_flow_format(nl_attr_get(a), nl_attr_get_size(a), - nl_attr_get(ma), nl_attr_get_size(ma), ds, verbose); - } else if (nl_attr_get_size(a)) { - odp_flow_format(nl_attr_get(a), nl_attr_get_size(a), NULL, 0, ds, + nl_attr_get(ma), nl_attr_get_size(ma), NULL, ds, verbose); + } else if (nl_attr_get_size(a)) { + odp_flow_format(nl_attr_get(a), nl_attr_get_size(a), NULL, 0, NULL, + ds, verbose); } break; @@ -1038,9 +1079,19 @@ format_odp_key_attr(const struct nlattr *a, const struct nlattr *ma, break; case OVS_KEY_ATTR_IN_PORT: - ds_put_format(ds, "%"PRIu32, nl_attr_get_u32(a)); - if (!is_exact) { - ds_put_format(ds, "/%#"PRIx32, nl_attr_get_u32(ma)); + if (portno_names && verbose && is_exact) { + char *name = odp_portno_names_get(portno_names, + u32_to_odp(nl_attr_get_u32(a))); + if (name) { + ds_put_format(ds, "%s", name); + } else { + ds_put_format(ds, "%"PRIu32, nl_attr_get_u32(a)); + } + } else { + ds_put_format(ds, "%"PRIu32, nl_attr_get_u32(a)); + if (!is_exact) { + ds_put_format(ds, "/%#"PRIx32, nl_attr_get_u32(ma)); + } } break; @@ -1364,7 +1415,7 @@ generate_all_wildcard_mask(struct ofpbuf *ofp, const struct nlattr *key) void odp_flow_format(const struct nlattr *key, size_t key_len, const struct nlattr *mask, size_t mask_len, - struct ds *ds, bool verbose) + const struct hmap *portno_names, struct ds *ds, bool verbose) { if (key_len) { const struct nlattr *a; @@ -1398,7 +1449,7 @@ odp_flow_format(const struct nlattr *key, size_t key_len, if (!first_field) { ds_put_char(ds, ','); } - format_odp_key_attr(a, ma, ds, verbose); + format_odp_key_attr(a, ma, portno_names, ds, verbose); first_field = false; } ofpbuf_clear(&ofp); @@ -1435,7 +1486,7 @@ void odp_flow_key_format(const struct nlattr *key, size_t key_len, struct ds *ds) { - odp_flow_format(key, key_len, NULL, 0, ds, true); + odp_flow_format(key, key_len, NULL, 0, NULL, ds, true); } static void @@ -2503,9 +2554,7 @@ odp_flow_key_from_flow__(struct ofpbuf *buf, const struct flow *data, arp_key->arp_op = htons(data->nw_proto); memcpy(arp_key->arp_sha, data->arp_sha, ETH_ADDR_LEN); memcpy(arp_key->arp_tha, data->arp_tha, ETH_ADDR_LEN); - } - - if (flow->mpls_depth) { + } else if (eth_type_mpls(flow->dl_type)) { struct ovs_key_mpls *mpls_key; mpls_key = nl_msg_put_unspec_uninit(buf, OVS_KEY_ATTR_MPLS, @@ -2798,7 +2847,6 @@ parse_l2_5_onward(const struct nlattr *attrs[OVS_KEY_ATTR_MAX + 1], return ODP_FIT_TOO_LITTLE; } flow->mpls_lse = nl_attr_get_be32(attrs[OVS_KEY_ATTR_MPLS]); - flow->mpls_depth++; } else if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_MPLS)) { flow->mpls_lse = nl_attr_get_be32(attrs[OVS_KEY_ATTR_MPLS]); @@ -2806,10 +2854,6 @@ parse_l2_5_onward(const struct nlattr *attrs[OVS_KEY_ATTR_MAX + 1], return ODP_FIT_ERROR; } expected_attrs |= (UINT64_C(1) << OVS_KEY_ATTR_MPLS); - if (flow->mpls_lse) { - /* XXX Is this needed? */ - flow->mpls_depth = 0xffff; - } } goto done; } else if (src_flow->dl_type == htons(ETH_TYPE_IP)) { @@ -3325,10 +3369,10 @@ commit_set_ether_addr_action(const struct flow *flow, struct flow *base, } static void -commit_vlan_action(const struct flow *flow, struct flow *base, +commit_vlan_action(ovs_be16 vlan_tci, struct flow *base, struct ofpbuf *odp_actions, struct flow_wildcards *wc) { - if (base->vlan_tci == flow->vlan_tci) { + if (base->vlan_tci == vlan_tci) { return; } @@ -3338,61 +3382,57 @@ commit_vlan_action(const struct flow *flow, struct flow *base, nl_msg_put_flag(odp_actions, OVS_ACTION_ATTR_POP_VLAN); } - if (flow->vlan_tci & htons(VLAN_CFI)) { + if (vlan_tci & htons(VLAN_CFI)) { struct ovs_action_push_vlan vlan; vlan.vlan_tpid = htons(ETH_TYPE_VLAN); - vlan.vlan_tci = flow->vlan_tci; + vlan.vlan_tci = vlan_tci; nl_msg_put_unspec(odp_actions, OVS_ACTION_ATTR_PUSH_VLAN, &vlan, sizeof vlan); } - base->vlan_tci = flow->vlan_tci; + base->vlan_tci = vlan_tci; } static void commit_mpls_action(const struct flow *flow, struct flow *base, - struct ofpbuf *odp_actions, struct flow_wildcards *wc) + struct ofpbuf *odp_actions, struct flow_wildcards *wc, + int *mpls_depth_delta) { - if (flow->mpls_lse == base->mpls_lse && - flow->mpls_depth == base->mpls_depth) { + if (flow->mpls_lse == base->mpls_lse && !*mpls_depth_delta) { return; } memset(&wc->masks.mpls_lse, 0xff, sizeof wc->masks.mpls_lse); - if (flow->mpls_depth < base->mpls_depth) { - if (base->mpls_depth - flow->mpls_depth > 1) { - static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(10, 10); - VLOG_WARN_RL(&rl, "Multiple mpls_pop actions reduced to " - " a single mpls_pop action"); - } - + switch (*mpls_depth_delta) { + case -1: nl_msg_put_be16(odp_actions, OVS_ACTION_ATTR_POP_MPLS, flow->dl_type); - } else if (flow->mpls_depth > base->mpls_depth) { + break; + case 1: { struct ovs_action_push_mpls *mpls; - if (flow->mpls_depth - base->mpls_depth > 1) { - static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(10, 10); - VLOG_WARN_RL(&rl, "Multiple mpls_push actions reduced to " - " a single mpls_push action"); - } - mpls = nl_msg_put_unspec_uninit(odp_actions, OVS_ACTION_ATTR_PUSH_MPLS, sizeof *mpls); memset(mpls, 0, sizeof *mpls); mpls->mpls_ethertype = flow->dl_type; mpls->mpls_lse = flow->mpls_lse; - } else { + break; + } + case 0: { struct ovs_key_mpls mpls_key; mpls_key.mpls_lse = flow->mpls_lse; commit_set_action(odp_actions, OVS_KEY_ATTR_MPLS, &mpls_key, sizeof(mpls_key)); + break; + } + default: + NOT_REACHED(); } base->dl_type = flow->dl_type; base->mpls_lse = flow->mpls_lse; - base->mpls_depth = flow->mpls_depth; + *mpls_depth_delta = 0; } static void @@ -3563,17 +3603,18 @@ commit_set_pkt_mark_action(const struct flow *flow, struct flow *base, * used as part of the action. */ void commit_odp_actions(const struct flow *flow, struct flow *base, - struct ofpbuf *odp_actions, struct flow_wildcards *wc) + struct ofpbuf *odp_actions, struct flow_wildcards *wc, + int *mpls_depth_delta) { commit_set_ether_addr_action(flow, base, odp_actions, wc); - commit_vlan_action(flow, base, odp_actions, wc); + commit_vlan_action(flow->vlan_tci, base, odp_actions, wc); commit_set_nw_action(flow, base, odp_actions, wc); commit_set_port_action(flow, base, odp_actions, wc); /* Committing MPLS actions should occur after committing nw and port * actions. This is because committing MPLS actions may alter a packet so * that it is no longer IP and thus nw and port actions are no longer valid. */ - commit_mpls_action(flow, base, odp_actions, wc); + commit_mpls_action(flow, base, odp_actions, wc, mpls_depth_delta); commit_set_priority_action(flow, base, odp_actions, wc); commit_set_pkt_mark_action(flow, base, odp_actions, wc); } diff --git a/lib/odp-util.h b/lib/odp-util.h index 192cfa016..2712cb007 100644 --- a/lib/odp-util.h +++ b/lib/odp-util.h @@ -23,6 +23,7 @@ #include #include #include "hash.h" +#include "hmap.h" #include "openflow/openflow.h" #include "util.h" @@ -42,6 +43,16 @@ void format_odp_actions(struct ds *, const struct nlattr *odp_actions, int odp_actions_from_string(const char *, const struct simap *port_names, struct ofpbuf *odp_actions); +/* A map from odp port number to its name. */ +struct odp_portno_names { + struct hmap_node hmap_node; /* A node in a port number to name hmap. */ + odp_port_t port_no; /* Port number in the datapath. */ + char *name; /* Name associated with the above 'port_no'. */ +}; + +void odp_portno_names_set(struct hmap *portno_names, odp_port_t port_no, + char *port_name); +void odp_portno_names_destroy(struct hmap *portno_names); /* The maximum number of bytes that odp_flow_key_from_flow() appends to a * buffer. This is the upper bound on the length of a nlattr-formatted flow * key that ovs-vswitchd fully understands. @@ -94,7 +105,8 @@ enum odp_key_fitness odp_tun_key_from_attr(const struct nlattr *, void odp_flow_format(const struct nlattr *key, size_t key_len, const struct nlattr *mask, size_t mask_len, - struct ds *, bool verbose); + const struct hmap *portno_names, struct ds *, + bool verbose); void odp_flow_key_format(const struct nlattr *, size_t, struct ds *); int odp_flow_from_string(const char *s, const struct simap *port_names, @@ -130,8 +142,8 @@ const char *odp_key_fitness_to_string(enum odp_key_fitness); void commit_odp_tunnel_action(const struct flow *, struct flow *base, struct ofpbuf *odp_actions); void commit_odp_actions(const struct flow *, struct flow *base, - struct ofpbuf *odp_actions, - struct flow_wildcards *wc); + struct ofpbuf *odp_actions, struct flow_wildcards *wc, + int *mpls_depth_delta); /* ofproto-dpif interface. * diff --git a/lib/ofp-actions.c b/lib/ofp-actions.c index dcc82dbe9..65430f315 100644 --- a/lib/ofp-actions.c +++ b/lib/ofp-actions.c @@ -884,14 +884,14 @@ ofpacts_from_openflow11(const union ofp_action *in, size_t n_in, /* OpenFlow 1.1 instructions. */ #define DEFINE_INST(ENUM, STRUCT, EXTENSIBLE, NAME) \ - static inline const struct STRUCT * \ + static inline const struct STRUCT * OVS_UNUSED \ instruction_get_##ENUM(const struct ofp11_instruction *inst)\ { \ ovs_assert(inst->type == htons(ENUM)); \ return ALIGNED_CAST(struct STRUCT *, inst); \ } \ \ - static inline void \ + static inline void OVS_UNUSED \ instruction_init_##ENUM(struct STRUCT *s) \ { \ memset(s, 0, sizeof *s); \ @@ -899,7 +899,7 @@ ofpacts_from_openflow11(const union ofp_action *in, size_t n_in, s->len = htons(sizeof *s); \ } \ \ - static inline struct STRUCT * \ + static inline struct STRUCT * OVS_UNUSED \ instruction_put_##ENUM(struct ofpbuf *buf) \ { \ struct STRUCT *s = ofpbuf_put_uninit(buf, sizeof *s); \ diff --git a/lib/ofp-parse.c b/lib/ofp-parse.c index 522bd95d1..7ca730512 100644 --- a/lib/ofp-parse.c +++ b/lib/ofp-parse.c @@ -38,9 +38,6 @@ #include "packets.h" #include "socket-util.h" #include "vconn.h" -#include "vlog.h" - -VLOG_DEFINE_THIS_MODULE(ofp_parse); /* Parses 'str' as an 8-bit unsigned integer into '*valuep'. * diff --git a/lib/ofp-util.c b/lib/ofp-util.c index 6a2bf5bf3..173b53479 100644 --- a/lib/ofp-util.c +++ b/lib/ofp-util.c @@ -84,7 +84,7 @@ ofputil_netmask_to_wcbits(ovs_be32 netmask) void ofputil_wildcard_from_ofpfw10(uint32_t ofpfw, struct flow_wildcards *wc) { - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 20); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 21); /* Initialize most of wc. */ flow_wildcards_init_catchall(wc); @@ -4905,7 +4905,6 @@ ofputil_normalize_match__(struct match *match, bool may_log) } if (!(may_match & MAY_MPLS)) { wc.masks.mpls_lse = htonl(0); - wc.masks.mpls_depth = 0; } /* Log any changes. */ diff --git a/lib/ofp-version-opt.c b/lib/ofp-version-opt.c index 84e83d8e7..10784fc54 100644 --- a/lib/ofp-version-opt.c +++ b/lib/ofp-version-opt.c @@ -1,11 +1,8 @@ #include +#include "dynamic-string.h" #include "ofp-util.h" #include "ofp-version-opt.h" #include "ovs-thread.h" -#include "vlog.h" -#include "dynamic-string.h" - -VLOG_DEFINE_THIS_MODULE(ofp_version); static uint32_t allowed_versions = 0; diff --git a/lib/process.c b/lib/process.c index 143347ca5..5dd34b3a7 100644 --- a/lib/process.c +++ b/lib/process.c @@ -37,7 +37,6 @@ VLOG_DEFINE_THIS_MODULE(process); -COVERAGE_DEFINE(process_sigchld); COVERAGE_DEFINE(process_start); struct process { diff --git a/lib/signals.c b/lib/signals.c index f11ed0568..27da5d6f9 100644 --- a/lib/signals.c +++ b/lib/signals.c @@ -110,7 +110,7 @@ const char * signal_name(int signum, char *namebuf, size_t bufsize) { #if HAVE_DECL_SYS_SIGLIST - if (signum >= 0 && signum < ARRAY_SIZE(sys_siglist)) { + if (signum >= 0 && signum < N_SIGNALS) { const char *name = sys_siglist[signum]; if (name) { return name; diff --git a/lib/stream-ssl.c b/lib/stream-ssl.c index 3b9270f93..1e748c007 100644 --- a/lib/stream-ssl.c +++ b/lib/stream-ssl.c @@ -317,7 +317,7 @@ ssl_open(const char *name, char *suffix, struct stream **streamp, uint8_t dscp) return error; } - error = inet_open_active(SOCK_STREAM, suffix, OFP_SSL_PORT, &sin, &fd, + error = inet_open_active(SOCK_STREAM, suffix, OFP_OLD_PORT, &sin, &fd, dscp); if (fd >= 0) { int state = error ? STATE_TCP_CONNECTING : STATE_SSL_CONNECTING; @@ -797,7 +797,7 @@ pssl_open(const char *name OVS_UNUSED, char *suffix, struct pstream **pstreamp, return retval; } - fd = inet_open_passive(SOCK_STREAM, suffix, OFP_SSL_PORT, &sin, dscp); + fd = inet_open_passive(SOCK_STREAM, suffix, OFP_OLD_PORT, &sin, dscp); if (fd < 0) { return -fd; } @@ -846,7 +846,7 @@ pssl_accept(struct pstream *pstream, struct stream **new_streamp) } sprintf(name, "ssl:"IP_FMT, IP_ARGS(sin.sin_addr.s_addr)); - if (sin.sin_port != htons(OFP_SSL_PORT)) { + if (sin.sin_port != htons(OFP_OLD_PORT)) { sprintf(strchr(name, '\0'), ":%"PRIu16, ntohs(sin.sin_port)); } return new_ssl_stream(name, new_fd, SERVER, STATE_SSL_CONNECTING, &sin, diff --git a/lib/stream.c b/lib/stream.c index da089ae4b..0442d84e0 100644 --- a/lib/stream.c +++ b/lib/stream.c @@ -26,6 +26,7 @@ #include "dynamic-string.h" #include "fatal-signal.h" #include "flow.h" +#include "jsonrpc.h" #include "ofp-print.h" #include "ofpbuf.h" #include "openflow/nicira-ext.h" @@ -717,23 +718,29 @@ count_fields(const char *s_) return n; } -/* Like stream_open(), but for tcp streams the port defaults to - * 'default_tcp_port' if no port number is given and for SSL streams the port - * defaults to 'default_ssl_port' if no port number is given. */ +/* Like stream_open(), but the port defaults to 'default_port' if no port + * number is given. */ int -stream_open_with_default_ports(const char *name_, - uint16_t default_tcp_port, - uint16_t default_ssl_port, - struct stream **streamp, - uint8_t dscp) +stream_open_with_default_port(const char *name_, + uint16_t default_port, + struct stream **streamp, + uint8_t dscp) { char *name; int error; - if (!strncmp(name_, "tcp:", 4) && count_fields(name_) < 3) { - name = xasprintf("%s:%d", name_, default_tcp_port); - } else if (!strncmp(name_, "ssl:", 4) && count_fields(name_) < 3) { - name = xasprintf("%s:%d", name_, default_ssl_port); + if ((!strncmp(name_, "tcp:", 4) || !strncmp(name_, "ssl:", 4)) + && count_fields(name_) < 3) { + if (default_port == OFP_OLD_PORT) { + VLOG_WARN_ONCE("The default OpenFlow port number will change " + "from %d to %d in a future release", + OFP_OLD_PORT, OFP_PORT); + } else if (default_port == OVSDB_OLD_PORT) { + VLOG_WARN_ONCE("The default OVSDB port number will change " + "from %d to %d in a future release", + OVSDB_OLD_PORT, OVSDB_PORT); + } + name = xasprintf("%s:%d", name_, default_port); } else { name = xstrdup(name_); } @@ -743,23 +750,20 @@ stream_open_with_default_ports(const char *name_, return error; } -/* Like pstream_open(), but for ptcp streams the port defaults to - * 'default_ptcp_port' if no port number is given and for passive SSL streams - * the port defaults to 'default_pssl_port' if no port number is given. */ +/* Like pstream_open(), but port defaults to 'default_port' if no port + * number is given. */ int -pstream_open_with_default_ports(const char *name_, - uint16_t default_ptcp_port, - uint16_t default_pssl_port, - struct pstream **pstreamp, - uint8_t dscp) +pstream_open_with_default_port(const char *name_, + uint16_t default_port, + struct pstream **pstreamp, + uint8_t dscp) { char *name; int error; - if (!strncmp(name_, "ptcp:", 5) && count_fields(name_) < 2) { - name = xasprintf("%s%d", name_, default_ptcp_port); - } else if (!strncmp(name_, "pssl:", 5) && count_fields(name_) < 2) { - name = xasprintf("%s%d", name_, default_pssl_port); + if ((!strncmp(name_, "ptcp:", 5) || !strncmp(name_, "pssl:", 5)) + && count_fields(name_) < 2) { + name = xasprintf("%s%d", name_, default_port); } else { name = xstrdup(name_); } @@ -778,15 +782,12 @@ pstream_open_with_default_ports(const char *name_, * - On error, function returns false and *sin contains garbage. */ bool -stream_parse_target_with_default_ports(const char *target, - uint16_t default_tcp_port, - uint16_t default_ssl_port, - struct sockaddr_in *sin) -{ - return (!strncmp(target, "tcp:", 4) - && inet_parse_active(target + 4, default_tcp_port, sin)) || - (!strncmp(target, "ssl:", 4) - && inet_parse_active(target + 4, default_ssl_port, sin)); +stream_parse_target_with_default_port(const char *target, + uint16_t default_port, + struct sockaddr_in *sin) +{ + return ((!strncmp(target, "tcp:", 4) || !strncmp(target, "ssl:", 4)) + && inet_parse_active(target + 4, default_port, sin)); } /* Attempts to guess the content type of a stream whose first few bytes were diff --git a/lib/stream.h b/lib/stream.h index aa3fa9d48..d966cde81 100644 --- a/lib/stream.h +++ b/lib/stream.h @@ -71,19 +71,16 @@ ovs_be16 pstream_get_bound_port(const struct pstream *); /* Convenience functions. */ -int stream_open_with_default_ports(const char *name, - uint16_t default_tcp_port, - uint16_t default_ssl_port, - struct stream **, +int stream_open_with_default_port(const char *name, + uint16_t default_port, + struct stream **, + uint8_t dscp); +int pstream_open_with_default_port(const char *name, + uint16_t default_port, + struct pstream **, uint8_t dscp); -int pstream_open_with_default_ports(const char *name, - uint16_t default_ptcp_port, - uint16_t default_pssl_port, - struct pstream **, - uint8_t dscp); -bool stream_parse_target_with_default_ports(const char *target, - uint16_t default_tcp_port, - uint16_t default_ssl_port, +bool stream_parse_target_with_default_port(const char *target, + uint16_t default_port, struct sockaddr_in *sin); int stream_or_pstream_needs_probes(const char *name); diff --git a/lib/tag.c b/lib/tag.c new file mode 100644 index 000000000..13d182925 --- /dev/null +++ b/lib/tag.c @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2008, 2009, 2010, 2011, 2013 Nicira, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "tag.h" + +#define LOG2_N_TAG_BITS (N_TAG_BITS == 32 ? 5 : N_TAG_BITS == 64 ? 6 : 0) +BUILD_ASSERT_DECL(LOG2_N_TAG_BITS > 0); + +/* Returns a tag deterministically generated from 'seed'. + * + * 'seed' should have data in all of its bits; if it has data only in its + * low-order bits then the resulting tags will be poorly distributed. Use a + * hash function such as hash_bytes() to generate 'seed' if necessary. */ +tag_type +tag_create_deterministic(uint32_t seed) +{ + int x = seed & (N_TAG_BITS - 1); + int y = (seed >> LOG2_N_TAG_BITS) % (N_TAG_BITS - 1); + y += y >= x; + return (1u << x) | (1u << y); +} + +/* Initializes 'tracker'. */ +void +tag_tracker_init(struct tag_tracker *tracker) +{ + memset(tracker, 0, sizeof *tracker); +} + +/* Adds 'add' to '*tags' and records the bits added in 'tracker'. */ +void +tag_tracker_add(struct tag_tracker *tracker, tag_type *tags, tag_type add) +{ + *tags |= add; + for (; add; add = zero_rightmost_1bit(add)) { + tracker->counts[rightmost_1bit_idx(add)]++; + } +} + +/* Removes 'sub' from 'tracker' and unsets any bits in '*tags' that no + * remaining tag includes. */ +void +tag_tracker_subtract(struct tag_tracker *tracker, tag_type *tags, tag_type sub) +{ + for (; sub; sub = zero_rightmost_1bit(sub)) { + if (!--tracker->counts[rightmost_1bit_idx(sub)]) { + *tags &= ~rightmost_1bit(sub); + } + } +} diff --git a/lib/tag.h b/lib/tag.h new file mode 100644 index 000000000..c99fd098e --- /dev/null +++ b/lib/tag.h @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2008, 2011, 2012, 2013 Nicira, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef TAG_H +#define TAG_H 1 + +#include +#include +#include +#include "util.h" + +/* + * Tagging support. + * + * A 'tag' represents an arbitrary category. Currently, tags are used to + * represent categories of flows and in particular the value of the 64-bit + * "metadata" field in the flow. The universe of possible categories is very + * large (2**64). The number of categories in use at a given time can also be + * large. This means that keeping track of category membership via + * conventional means (lists, bitmaps, etc.) is likely to be expensive. + * + * Tags are actually implemented via a "superimposed coding", as discussed in + * Knuth TAOCP v.3 section 6.5 "Retrieval on Secondary Keys". A tag is an + * unsigned integer in which exactly 2 bits are set to 1 and the rest set to 0. + * For 32-bit integers (as currently used) there are 32 * 31 / 2 = 496 unique + * tags; for 64-bit integers there are 64 * 63 / 2 = 2,016. + * + * Because there is a small finite number of unique tags, tags must collide + * after some number of them have been created. In practice we generally + * create tags by choosing bits randomly or based on a hash function. + * + * The key property of tags is that we can combine them without increasing the + * amount of data required using bitwise-OR, since the result has the 1-bits + * from both tags set. The necessary tradeoff is that the result is even more + * ambiguous: if combining two tags yields a value with 4 bits set to 1, then + * the result value will test as having 4 * 3 / 2 = 6 unique tags, not just the + * two tags that we combined. + * + * The upshot is this: a value that is the bitwise-OR combination of a number + * of tags will always include the tags that were combined, but it may contain + * any number of additional tags as well. This is acceptable for our use, + * since we want to be sure that we check every classifier table that contains + * a rule with a given metadata value, but it is OK if we check a few extra + * tables as well. + * + * If we combine too many tags, then the result will have every bit set, so + * that it will test as including every tag. This can happen, but we hope that + * this is not the common case. + */ + +/* Represents a tag, or the combination of 0 or more tags. */ +typedef uint32_t tag_type; + +#define N_TAG_BITS (CHAR_BIT * sizeof(tag_type)) +BUILD_ASSERT_DECL(IS_POW2(N_TAG_BITS)); + +/* A 'tag_type' value that intersects every tag. */ +#define TAG_ALL UINT32_MAX + +/* An arbitrary tag. */ +#define TAG_ARBITRARY UINT32_C(3) + +tag_type tag_create_deterministic(uint32_t seed); +static inline bool tag_intersects(tag_type, tag_type); + +/* Returns true if 'a' and 'b' have at least one tag in common, + * false if their set of tags is disjoint. */ +static inline bool +tag_intersects(tag_type a, tag_type b) +{ + tag_type x = a & b; + return (x & (x - 1)) != 0; +} + +/* Adding tags is easy, but subtracting is hard because you can't tell whether + * a bit was set only by the tag you're removing or by multiple tags. The + * tag_tracker data structure counts the number of tags that set each bit, + * which allows for efficient subtraction. */ +struct tag_tracker { + unsigned int counts[N_TAG_BITS]; +}; + +void tag_tracker_init(struct tag_tracker *); +void tag_tracker_add(struct tag_tracker *, tag_type *, tag_type); +void tag_tracker_subtract(struct tag_tracker *, tag_type *, tag_type); + +#endif /* tag.h */ diff --git a/lib/timeval.c b/lib/timeval.c index 223ed3084..64ae845a1 100644 --- a/lib/timeval.c +++ b/lib/timeval.c @@ -234,6 +234,7 @@ time_poll(struct pollfd *pollfds, int n_pollfds, long long int timeout_when, log_poll_interval(*last_wakeup); } coverage_clear(); + coverage_run(); start = time_msec(); timeout_when = MIN(timeout_when, deadline); diff --git a/lib/util.h b/lib/util.h index 0db41be92..a899065a3 100644 --- a/lib/util.h +++ b/lib/util.h @@ -87,8 +87,23 @@ void ovs_assert_failure(const char *, const char *, const char *) NO_RETURN; extern const char *program_name; +#define __ARRAY_SIZE_NOCHECK(ARRAY) (sizeof(ARRAY) / sizeof((ARRAY)[0])) +#ifdef __GNUC__ +/* return 0 for array types, 1 otherwise */ +#define __ARRAY_CHECK(ARRAY) \ + !__builtin_types_compatible_p(typeof(ARRAY), typeof(&ARRAY[0])) + +/* compile-time fail if not array */ +#define __ARRAY_FAIL(ARRAY) (sizeof(char[-2*!__ARRAY_CHECK(ARRAY)])) +#define __ARRAY_SIZE(ARRAY) \ + __builtin_choose_expr(__ARRAY_CHECK(ARRAY), \ + __ARRAY_SIZE_NOCHECK(ARRAY), __ARRAY_FAIL(ARRAY)) +#else +#define __ARRAY_SIZE(ARRAY) __ARRAY_SIZE_NOCHECK(ARRAY) +#endif + /* Returns the number of elements in ARRAY. */ -#define ARRAY_SIZE(ARRAY) (sizeof ARRAY / sizeof *ARRAY) +#define ARRAY_SIZE(ARRAY) __ARRAY_SIZE(ARRAY) /* Returns X / Y, rounding up. X must be nonnegative to round correctly. */ #define DIV_ROUND_UP(X, Y) (((X) + ((Y) - 1)) / (Y)) diff --git a/lib/vconn-active.man b/lib/vconn-active.man index be96ca813..bf7aaf7b5 100644 --- a/lib/vconn-active.man +++ b/lib/vconn-active.man @@ -1,13 +1,13 @@ .IP "\fBssl:\fIip\fR[\fB:\fIport\fR]" -The specified SSL \fIport\fR (default: 6633) on the host at the given -\fIip\fR, which must be expressed as an IP address (not a DNS name). -The \fB\-\-private\-key\fR, \fB\-\-certificate\fR, and -\fB\-\-ca\-cert\fR options are mandatory when this form is used. -. -.IP "\fBtcp:\fIip\fR[\fB:\fIport\fR]" -The specified TCP \fIport\fR (default: 6633) on the host at the given -\fIip\fR, which must be expressed as an IP address (not a DNS name). -. +.IQ "\fBtcp:\fIip\fR[\fB:\fIport\fR]" +The specified \fIport\fR on the host at the given \fIip\fR, which must +be expressed as an IP address (not a DNS name). For \fBssl\fR, the +\fB\-\-private\-key\fR, \fB\-\-certificate\fR, and \fB\-\-ca\-cert\fR +options are mandatory. +.IP +If \fIport\fR is not specified, it currently defaults to 6633. In the +future, the default will change to 6653, which is the IANA-defined +value. .TP \fBunix:\fIfile\fR The Unix domain server socket named \fIfile\fR. diff --git a/lib/vconn-passive.man b/lib/vconn-passive.man index 1edd11835..a9efdb3bd 100644 --- a/lib/vconn-passive.man +++ b/lib/vconn-passive.man @@ -1,16 +1,14 @@ .IP "\fBpssl:\fR[\fIport\fR][\fB:\fIip\fR]" -Listens for OpenFlow SSL connections on \fIport\fR (default: 6633). -The \fB\-\-private\-key\fR, \fB\-\-certificate\fR, and -\fB\-\-ca\-cert\fR options are mandatory when this form is used. By -default, connections are not bound to a particular local IP address, -but \fIip\fR may be specified to listen only for connections to the -given \fIip\fR. -. -.IP "\fBptcp:\fR[\fIport\fR][\fB:\fIip\fR]" -Listens for OpenFlow TCP connections on \fIport\fR (default: 6633). -By default, connections are not bound to a particular local IP -address, but \fIip\fR may be specified to listen only for connections -to the given \fIip\fR. +.IQ "\fBptcp:\fR[\fIport\fR][\fB:\fIip\fR]" +Listens for OpenFlow connections on \fIport\fR. By +default, connections are not bound to a particular local IP address, but +\fIip\fR may be specified to listen only for connections to the given +\fIip\fR. For \fBpssl\fR, the \fB\-\-private\-key\fR, +\fB\-\-certificate\fR, and \fB\-\-ca\-cert\fR options are mandatory. +.IP +If \fIport\fR is not specified, it currently defaults to 6633. In the +future, the default will change to 6653, which is the IANA-defined +value. . .IP "\fBpunix:\fIfile\fR" Listens for OpenFlow connections on the Unix domain server socket diff --git a/lib/vconn-stream.c b/lib/vconn-stream.c index 92076d999..027f48f19 100644 --- a/lib/vconn-stream.c +++ b/lib/vconn-stream.c @@ -82,8 +82,7 @@ vconn_stream_open(const char *name, uint32_t allowed_versions, struct stream *stream; int error; - error = stream_open_with_default_ports(name, OFP_TCP_PORT, OFP_SSL_PORT, - &stream, dscp); + error = stream_open_with_default_port(name, OFP_OLD_PORT, &stream, dscp); if (!error) { error = stream_connect(stream); if (!error || error == EAGAIN) { @@ -316,8 +315,8 @@ pvconn_pstream_listen(const char *name, uint32_t allowed_versions, struct pstream *pstream; int error; - error = pstream_open_with_default_ports(name, OFP_TCP_PORT, OFP_SSL_PORT, - &pstream, dscp); + error = pstream_open_with_default_port(name, OFP_OLD_PORT, + &pstream, dscp); if (error) { return error; } diff --git a/lib/vconn.c b/lib/vconn.c index 15ac11909..5708987d5 100644 --- a/lib/vconn.c +++ b/lib/vconn.c @@ -138,10 +138,10 @@ vconn_usage(bool active, bool passive, bool bootstrap OVS_UNUSED) if (active) { printf("Active OpenFlow connection methods:\n"); printf(" tcp:IP[:PORT] " - "PORT (default: %d) at remote IP\n", OFP_TCP_PORT); + "PORT (default: %d) at remote IP\n", OFP_OLD_PORT); #ifdef HAVE_OPENSSL printf(" ssl:IP[:PORT] " - "SSL PORT (default: %d) at remote IP\n", OFP_SSL_PORT); + "SSL PORT (default: %d) at remote IP\n", OFP_OLD_PORT); #endif printf(" unix:FILE Unix domain socket named FILE\n"); } @@ -150,11 +150,11 @@ vconn_usage(bool active, bool passive, bool bootstrap OVS_UNUSED) printf("Passive OpenFlow connection methods:\n"); printf(" ptcp:[PORT][:IP] " "listen to TCP PORT (default: %d) on IP\n", - OFP_TCP_PORT); + OFP_OLD_PORT); #ifdef HAVE_OPENSSL printf(" pssl:[PORT][:IP] " "listen for SSL on PORT (default: %d) on IP\n", - OFP_SSL_PORT); + OFP_OLD_PORT); #endif printf(" punix:FILE " "listen on Unix domain socket FILE\n"); diff --git a/lib/vlandev.c b/lib/vlandev.c index 282d28ca3..6531bf0cf 100644 --- a/lib/vlandev.c +++ b/lib/vlandev.c @@ -303,7 +303,7 @@ vlandev_stub_del(const char *vlan_dev OVS_UNUSED) return EOPNOTSUPP; } -static const struct vlandev_class vlandev_stub_class = { +static const struct vlandev_class OVS_UNUSED vlandev_stub_class = { NULL, /* vd_refresh */ vlandev_stub_add, vlandev_stub_del diff --git a/lib/vlog.c b/lib/vlog.c index 37806b8db..b1ca15895 100644 --- a/lib/vlog.c +++ b/lib/vlog.c @@ -42,8 +42,6 @@ VLOG_DEFINE_THIS_MODULE(vlog); -COVERAGE_DEFINE(vlog_recursive); - /* ovs_assert() logs the assertion message, so using ovs_assert() in this * source file could cause recursion. */ #undef ovs_assert diff --git a/manpages.mk b/manpages.mk index 811d2f992..2a34f04bc 100644 --- a/manpages.mk +++ b/manpages.mk @@ -116,6 +116,10 @@ lib/vconn-active.man: lib/vconn-passive.man: lib/vlog.man: +utilities/ovs-dpctl-top.8: \ + utilities/ovs-dpctl-top.8.in +utilities/ovs-dpctl-top.8.in: + utilities/ovs-dpctl.8: \ utilities/ovs-dpctl.8.in \ lib/common.man \ @@ -124,10 +128,6 @@ utilities/ovs-dpctl.8.in: lib/common.man: lib/vlog.man: -utilities/ovs-dpctl-top.8: \ - utilities/ovs-dpctl-top.8.in -utilities/ovs-dpctl-top.8.in: - utilities/ovs-l3ping.8: \ utilities/ovs-l3ping.8.in \ lib/common-syn.man \ diff --git a/ofproto/connmgr.c b/ofproto/connmgr.c index 4a370eba7..8a4195d83 100644 --- a/ofproto/connmgr.c +++ b/ofproto/connmgr.c @@ -692,10 +692,9 @@ update_in_band_remotes(struct connmgr *mgr) continue; } - if (stream_parse_target_with_default_ports(target, - OFP_TCP_PORT, - OFP_SSL_PORT, - sin)) { + if (stream_parse_target_with_default_port(target, + OFP_OLD_PORT, + sin)) { n_addrs++; } } diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c index 180b87e2c..9ec081ac0 100644 --- a/ofproto/ofproto-dpif-upcall.c +++ b/ofproto/ofproto-dpif-upcall.c @@ -29,6 +29,8 @@ #include "list.h" #include "netlink.h" #include "ofpbuf.h" +#include "ofproto-dpif-ipfix.h" +#include "ofproto-dpif-sflow.h" #include "ofproto-dpif.h" #include "packets.h" #include "poll-loop.h" @@ -38,9 +40,8 @@ VLOG_DEFINE_THIS_MODULE(ofproto_dpif_upcall); -COVERAGE_DEFINE(upcall_queue_overflow); COVERAGE_DEFINE(drop_queue_overflow); -COVERAGE_DEFINE(miss_queue_overflow); +COVERAGE_DEFINE(upcall_queue_overflow); COVERAGE_DEFINE(fmb_queue_overflow); COVERAGE_DEFINE(fmb_queue_revalidated); @@ -53,11 +54,12 @@ struct handler { struct ovs_mutex mutex; /* Mutex guarding the following. */ - /* Atomic queue of unprocessed miss upcalls. */ + /* Atomic queue of unprocessed upcalls. */ struct list upcalls OVS_GUARDED; size_t n_upcalls OVS_GUARDED; size_t n_new_upcalls; /* Only changed by the dispatcher. */ + bool need_signal; /* Only changed by the dispatcher. */ pthread_cond_t wake_cond; /* Wakes 'thread' while holding 'mutex'. */ @@ -78,12 +80,11 @@ struct udpif { pthread_t dispatcher; /* Dispatcher thread ID. */ - struct handler *handlers; /* Miss handlers. */ + struct handler *handlers; /* Upcall handlers. */ size_t n_handlers; /* Queues to pass up to ofproto-dpif. */ struct guarded_list drop_keys; /* "struct drop key"s. */ - struct guarded_list upcalls; /* "struct upcall"s. */ struct guarded_list fmbs; /* "struct flow_miss_batch"es. */ /* Number of times udpif_revalidate() has been called. */ @@ -94,13 +95,33 @@ struct udpif { struct latch exit_latch; /* Tells child threads to exit. */ }; +enum upcall_type { + BAD_UPCALL, /* Some kind of bug somewhere. */ + MISS_UPCALL, /* A flow miss. */ + SFLOW_UPCALL, /* sFlow sample. */ + FLOW_SAMPLE_UPCALL, /* Per-flow sampling. */ + IPFIX_UPCALL /* Per-bridge sampling. */ +}; + +struct upcall { + struct list list_node; /* For queuing upcalls. */ + struct flow_miss *flow_miss; /* This upcall's flow_miss. */ + + /* Raw upcall plus data for keeping track of the memory backing it. */ + struct dpif_upcall dpif_upcall; /* As returned by dpif_recv() */ + struct ofpbuf upcall_buf; /* Owns some data in 'dpif_upcall'. */ + uint64_t upcall_stub[512 / 8]; /* Buffer to reduce need for malloc(). */ +}; + +static void upcall_destroy(struct upcall *); + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); static void recv_upcalls(struct udpif *); -static void handle_miss_upcalls(struct udpif *, struct list *upcalls); +static void handle_upcalls(struct udpif *, struct list *upcalls); static void miss_destroy(struct flow_miss *); static void *udpif_dispatcher(void *); -static void *udpif_miss_handler(void *); +static void *udpif_upcall_handler(void *); struct udpif * udpif_create(struct dpif_backer *backer, struct dpif *dpif) @@ -113,7 +134,6 @@ udpif_create(struct dpif_backer *backer, struct dpif *dpif) udpif->wait_seq = seq_create(); latch_init(&udpif->exit_latch); guarded_list_init(&udpif->drop_keys); - guarded_list_init(&udpif->upcalls); guarded_list_init(&udpif->fmbs); atomic_init(&udpif->reval_seq, 0); @@ -125,7 +145,6 @@ udpif_destroy(struct udpif *udpif) { struct flow_miss_batch *fmb; struct drop_key *drop_key; - struct upcall *upcall; udpif_recv_set(udpif, 0, false); @@ -133,16 +152,11 @@ udpif_destroy(struct udpif *udpif) drop_key_destroy(drop_key); } - while ((upcall = upcall_next(udpif))) { - upcall_destroy(upcall); - } - while ((fmb = flow_miss_batch_next(udpif))) { flow_miss_batch_destroy(fmb); } guarded_list_destroy(&udpif->drop_keys); - guarded_list_destroy(&udpif->upcalls); guarded_list_destroy(&udpif->fmbs); latch_destroy(&udpif->exit_latch); seq_destroy(udpif->wait_seq); @@ -150,8 +164,9 @@ udpif_destroy(struct udpif *udpif) } /* Tells 'udpif' to begin or stop handling flow misses depending on the value - * of 'enable'. 'n_handlers' is the number of miss_handler threads to create. - * Passing 'n_handlers' as zero is equivalent to passing 'enable' as false. */ + * of 'enable'. 'n_handlers' is the number of upcall_handler threads to + * create. Passing 'n_handlers' as zero is equivalent to passing 'enable' as + * false. */ void udpif_recv_set(struct udpif *udpif, size_t n_handlers, bool enable) { @@ -208,9 +223,11 @@ udpif_recv_set(struct udpif *udpif, size_t n_handlers, bool enable) handler->udpif = udpif; list_init(&handler->upcalls); + handler->need_signal = false; xpthread_cond_init(&handler->wake_cond, NULL); ovs_mutex_init(&handler->mutex); - xpthread_create(&handler->thread, NULL, udpif_miss_handler, handler); + xpthread_create(&handler->thread, NULL, udpif_upcall_handler, + handler); } xpthread_create(&udpif->dispatcher, NULL, udpif_dispatcher, udpif); } @@ -221,7 +238,6 @@ udpif_wait(struct udpif *udpif) { uint64_t seq = seq_read(udpif->wait_seq); if (!guarded_list_is_empty(&udpif->drop_keys) || - !guarded_list_is_empty(&udpif->upcalls) || !guarded_list_is_empty(&udpif->fmbs)) { poll_immediate_wake(); } else { @@ -254,18 +270,8 @@ udpif_revalidate(struct udpif *udpif) udpif_drop_key_clear(udpif); } -/* Retrieves the next upcall which ofproto-dpif is responsible for handling. - * The caller is responsible for destroying the returned upcall with - * upcall_destroy(). */ -struct upcall * -upcall_next(struct udpif *udpif) -{ - struct list *next = guarded_list_pop_front(&udpif->upcalls); - return next ? CONTAINER_OF(next, struct upcall, list_node) : NULL; -} - /* Destroys and deallocates 'upcall'. */ -void +static void upcall_destroy(struct upcall *upcall) { if (upcall) { @@ -362,9 +368,8 @@ udpif_drop_key_clear(struct udpif *udpif) } } -/* The dispatcher thread is responsible for receving upcalls from the kernel, - * assigning the miss upcalls to a miss_handler thread, and assigning the more - * complex ones to ofproto-dpif directly. */ +/* The dispatcher thread is responsible for receiving upcalls from the kernel, + * assigning them to a upcall_handler thread. */ static void * udpif_dispatcher(void *arg) { @@ -385,11 +390,11 @@ udpif_dispatcher(void *arg) * by the dispatcher thread. Once finished it passes the processed miss * upcalls to ofproto-dpif where they're installed in the datapath. */ static void * -udpif_miss_handler(void *arg) +udpif_upcall_handler(void *arg) { struct handler *handler = arg; - set_subprogram_name("miss_handler"); + set_subprogram_name("upcall_handler"); for (;;) { struct list misses = LIST_INITIALIZER(&misses); size_t i; @@ -415,7 +420,7 @@ udpif_miss_handler(void *arg) } ovs_mutex_unlock(&handler->mutex); - handle_miss_upcalls(handler->udpif, &misses); + handle_upcalls(handler->udpif, &misses); } } @@ -483,13 +488,14 @@ classify_upcall(const struct upcall *upcall) static void recv_upcalls(struct udpif *udpif) { - static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(60, 60); - size_t n_udpif_new_upcalls = 0; - struct handler *handler; int n; for (;;) { + uint32_t hash = udpif->secret; + struct handler *handler; struct upcall *upcall; + size_t n_bytes, left; + struct nlattr *nla; int error; upcall = xmalloc(sizeof *upcall); @@ -502,85 +508,65 @@ recv_upcalls(struct udpif *udpif) break; } - upcall->type = classify_upcall(upcall); - if (upcall->type == BAD_UPCALL) { - upcall_destroy(upcall); - } else if (upcall->type == MISS_UPCALL) { - struct dpif_upcall *dupcall = &upcall->dpif_upcall; - uint32_t hash = udpif->secret; - struct nlattr *nla; - size_t n_bytes, left; - - n_bytes = 0; - NL_ATTR_FOR_EACH (nla, left, dupcall->key, dupcall->key_len) { - enum ovs_key_attr type = nl_attr_type(nla); - if (type == OVS_KEY_ATTR_IN_PORT - || type == OVS_KEY_ATTR_TCP - || type == OVS_KEY_ATTR_UDP) { - if (nl_attr_get_size(nla) == 4) { - ovs_be32 attr = nl_attr_get_be32(nla); - hash = mhash_add(hash, (OVS_FORCE uint32_t) attr); - n_bytes += 4; - } else { - VLOG_WARN("Netlink attribute with incorrect size."); - } + n_bytes = 0; + NL_ATTR_FOR_EACH (nla, left, upcall->dpif_upcall.key, + upcall->dpif_upcall.key_len) { + enum ovs_key_attr type = nl_attr_type(nla); + if (type == OVS_KEY_ATTR_IN_PORT + || type == OVS_KEY_ATTR_TCP + || type == OVS_KEY_ATTR_UDP) { + if (nl_attr_get_size(nla) == 4) { + hash = mhash_add(hash, nl_attr_get_u32(nla)); + n_bytes += 4; + } else { + VLOG_WARN_RL(&rl, + "Netlink attribute with incorrect size."); } } - hash = mhash_finish(hash, n_bytes); + } + hash = mhash_finish(hash, n_bytes); - handler = &udpif->handlers[hash % udpif->n_handlers]; + handler = &udpif->handlers[hash % udpif->n_handlers]; - ovs_mutex_lock(&handler->mutex); - if (handler->n_upcalls < MAX_QUEUE_LENGTH) { - list_push_back(&handler->upcalls, &upcall->list_node); - handler->n_new_upcalls = ++handler->n_upcalls; - - if (handler->n_new_upcalls >= FLOW_MISS_MAX_BATCH) { - xpthread_cond_signal(&handler->wake_cond); - } - ovs_mutex_unlock(&handler->mutex); - if (!VLOG_DROP_DBG(&rl)) { - struct ds ds = DS_EMPTY_INITIALIZER; - - odp_flow_key_format(upcall->dpif_upcall.key, - upcall->dpif_upcall.key_len, - &ds); - VLOG_DBG("dispatcher: miss enqueue (%s)", ds_cstr(&ds)); - ds_destroy(&ds); - } - } else { - ovs_mutex_unlock(&handler->mutex); - COVERAGE_INC(miss_queue_overflow); - upcall_destroy(upcall); + ovs_mutex_lock(&handler->mutex); + if (handler->n_upcalls < MAX_QUEUE_LENGTH) { + list_push_back(&handler->upcalls, &upcall->list_node); + if (handler->n_upcalls == 0) { + handler->need_signal = true; } - } else { - size_t len; - - len = guarded_list_push_back(&udpif->upcalls, &upcall->list_node, - MAX_QUEUE_LENGTH); - if (len > 0) { - n_udpif_new_upcalls = len; - if (n_udpif_new_upcalls >= FLOW_MISS_MAX_BATCH) { - seq_change(udpif->wait_seq); - } - } else { - COVERAGE_INC(upcall_queue_overflow); - upcall_destroy(upcall); + handler->n_upcalls++; + if (handler->need_signal && + handler->n_upcalls >= FLOW_MISS_MAX_BATCH) { + handler->need_signal = false; + xpthread_cond_signal(&handler->wake_cond); } + ovs_mutex_unlock(&handler->mutex); + if (!VLOG_DROP_DBG(&rl)) { + struct ds ds = DS_EMPTY_INITIALIZER; + + odp_flow_key_format(upcall->dpif_upcall.key, + upcall->dpif_upcall.key_len, + &ds); + VLOG_DBG("dispatcher: enqueue (%s)", ds_cstr(&ds)); + ds_destroy(&ds); + } + } else { + ovs_mutex_unlock(&handler->mutex); + COVERAGE_INC(upcall_queue_overflow); + upcall_destroy(upcall); } } + for (n = 0; n < udpif->n_handlers; ++n) { - handler = &udpif->handlers[n]; - if (handler->n_new_upcalls) { - handler->n_new_upcalls = 0; + struct handler *handler = &udpif->handlers[n]; + + if (handler->need_signal) { + handler->need_signal = false; ovs_mutex_lock(&handler->mutex); xpthread_cond_signal(&handler->wake_cond); ovs_mutex_unlock(&handler->mutex); } } - if (n_udpif_new_upcalls) { - seq_change(udpif->wait_seq); - } } static struct flow_miss * @@ -599,7 +585,7 @@ flow_miss_find(struct hmap *todo, const struct ofproto_dpif *ofproto, } static void -handle_miss_upcalls(struct udpif *udpif, struct list *upcalls) +handle_upcalls(struct udpif *udpif, struct list *upcalls) { struct dpif_op *opsp[FLOW_MISS_MAX_BATCH]; struct dpif_op ops[FLOW_MISS_MAX_BATCH]; @@ -608,6 +594,7 @@ handle_miss_upcalls(struct udpif *udpif, struct list *upcalls) size_t n_misses, n_ops, i; struct flow_miss *miss; unsigned int reval_seq; + enum upcall_type type; bool fail_open; /* Extract the flow from each upcall. Construct in fmb->misses a hash @@ -640,6 +627,8 @@ handle_miss_upcalls(struct udpif *udpif, struct list *upcalls) struct flow_miss *miss = &fmb->miss_buf[n_misses]; struct flow_miss *existing_miss; struct ofproto_dpif *ofproto; + struct dpif_sflow *sflow; + struct dpif_ipfix *ipfix; odp_port_t odp_in_port; struct flow flow; int error; @@ -647,8 +636,39 @@ handle_miss_upcalls(struct udpif *udpif, struct list *upcalls) error = xlate_receive(udpif->backer, packet, dupcall->key, dupcall->key_len, &flow, &miss->key_fitness, &ofproto, &odp_in_port); + if (error) { + if (error == ENODEV) { + struct drop_key *drop_key; + + /* Received packet on datapath port for which we couldn't + * associate an ofproto. This can happen if a port is removed + * while traffic is being received. Print a rate-limited + * message in case it happens frequently. Install a drop flow + * so that future packets of the flow are inexpensively dropped + * in the kernel. */ + VLOG_INFO_RL(&rl, "received packet on unassociated datapath " + "port %"PRIu32, odp_in_port); + + drop_key = xmalloc(sizeof *drop_key); + drop_key->key = xmemdup(dupcall->key, dupcall->key_len); + drop_key->key_len = dupcall->key_len; + + if (guarded_list_push_back(&udpif->drop_keys, + &drop_key->list_node, + MAX_QUEUE_LENGTH)) { + seq_change(udpif->wait_seq); + } else { + COVERAGE_INC(drop_queue_overflow); + drop_key_destroy(drop_key); + } + } + list_remove(&upcall->list_node); + upcall_destroy(upcall); + continue; + } - if (!error) { + type = classify_upcall(upcall); + if (type == MISS_UPCALL) { uint32_t hash; flow_extract(packet, flow.skb_priority, flow.pkt_mark, @@ -677,35 +697,57 @@ handle_miss_upcalls(struct udpif *udpif, struct list *upcalls) miss->stats.n_packets++; upcall->flow_miss = miss; - } else { - if (error == ENODEV) { - struct drop_key *drop_key; - - /* Received packet on datapath port for which we couldn't - * associate an ofproto. This can happen if a port is removed - * while traffic is being received. Print a rate-limited - * message in case it happens frequently. Install a drop flow - * so that future packets of the flow are inexpensively dropped - * in the kernel. */ - VLOG_INFO_RL(&rl, "received packet on unassociated datapath " - "port %"PRIu32, odp_in_port); - - drop_key = xmalloc(sizeof *drop_key); - drop_key->key = xmemdup(dupcall->key, dupcall->key_len); - drop_key->key_len = dupcall->key_len; + continue; + } - if (guarded_list_push_back(&udpif->drop_keys, - &drop_key->list_node, - MAX_QUEUE_LENGTH)) { - seq_change(udpif->wait_seq); - } else { - COVERAGE_INC(drop_queue_overflow); - drop_key_destroy(drop_key); - } + switch (type) { + case SFLOW_UPCALL: + sflow = xlate_get_sflow(ofproto); + if (sflow) { + union user_action_cookie cookie; + + memset(&cookie, 0, sizeof cookie); + memcpy(&cookie, nl_attr_get(dupcall->userdata), + sizeof cookie.sflow); + dpif_sflow_received(sflow, dupcall->packet, &flow, odp_in_port, + &cookie); + dpif_sflow_unref(sflow); } - list_remove(&upcall->list_node); - upcall_destroy(upcall); + break; + case IPFIX_UPCALL: + ipfix = xlate_get_ipfix(ofproto); + if (ipfix) { + dpif_ipfix_bridge_sample(ipfix, dupcall->packet, &flow); + dpif_ipfix_unref(ipfix); + } + break; + case FLOW_SAMPLE_UPCALL: + ipfix = xlate_get_ipfix(ofproto); + if (ipfix) { + union user_action_cookie cookie; + + memset(&cookie, 0, sizeof cookie); + memcpy(&cookie, nl_attr_get(dupcall->userdata), + sizeof cookie.flow_sample); + + /* The flow reflects exactly the contents of the packet. + * Sample the packet using it. */ + dpif_ipfix_flow_sample(ipfix, dupcall->packet, &flow, + cookie.flow_sample.collector_set_id, + cookie.flow_sample.probability, + cookie.flow_sample.obs_domain_id, + cookie.flow_sample.obs_point_id); + dpif_ipfix_unref(ipfix); + } + break; + case BAD_UPCALL: + break; + case MISS_UPCALL: + NOT_REACHED(); } + + list_remove(&upcall->list_node); + upcall_destroy(upcall); } /* Initialize each 'struct flow_miss's ->xout. diff --git a/ofproto/ofproto-dpif-upcall.h b/ofproto/ofproto-dpif-upcall.h index cd97e79db..da7571937 100644 --- a/ofproto/ofproto-dpif-upcall.h +++ b/ofproto/ofproto-dpif-upcall.h @@ -40,38 +40,6 @@ void udpif_wait(struct udpif *); void udpif_revalidate(struct udpif *); -/* udpif can handle some upcalls on its own. Others need the main ofproto_dpif - * code to handle them. This interface passes upcalls not handled by udpif up - * to the ofproto_dpif main thread. */ - -/* Type of an upcall. */ -enum upcall_type { - /* Handled internally by udpif code. Not returned by upcall_next().*/ - BAD_UPCALL, /* Some kind of bug somewhere. */ - MISS_UPCALL, /* A flow miss. */ - - /* Require main thread's involvement. May be returned by upcall_next(). */ - SFLOW_UPCALL, /* sFlow sample. */ - FLOW_SAMPLE_UPCALL, /* Per-flow sampling. */ - IPFIX_UPCALL /* Per-bridge sampling. */ -}; - -/* An upcall. */ -struct upcall { - struct list list_node; /* For queuing upcalls. */ - struct flow_miss *flow_miss; /* This upcall's flow_miss. */ - - enum upcall_type type; /* Classification. */ - - /* Raw upcall plus data for keeping track of the memory backing it. */ - struct dpif_upcall dpif_upcall; /* As returned by dpif_recv() */ - struct ofpbuf upcall_buf; /* Owns some data in 'dpif_upcall'. */ - uint64_t upcall_stub[512 / 8]; /* Buffer to reduce need for malloc(). */ -}; - -struct upcall *upcall_next(struct udpif *); -void upcall_destroy(struct upcall *); - /* udpif figures out how to forward packets, and does forward them, but it * can't set up datapath flows on its own. This interface passes packet * forwarding data from udpif to the higher level ofproto_dpif to allow the diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c index a5b6814aa..930abc31a 100644 --- a/ofproto/ofproto-dpif-xlate.c +++ b/ofproto/ofproto-dpif-xlate.c @@ -56,6 +56,10 @@ VLOG_DEFINE_THIS_MODULE(ofproto_dpif_xlate); * flow translation. */ #define MAX_RESUBMIT_RECURSION 64 +/* Maximum number of resubmit actions in a flow translation, whether they are + * recursive or not. */ +#define MAX_RESUBMITS (MAX_RESUBMIT_RECURSION * MAX_RESUBMIT_RECURSION) + struct ovs_rwlock xlate_rwlock = OVS_RWLOCK_INITIALIZER; struct xbridge { @@ -158,7 +162,17 @@ struct xlate_ctx { /* The rule that we are currently translating, or NULL. */ struct rule_dpif *rule; - int recurse; /* Recursion level, via xlate_table_action. */ + int mpls_depth_delta; /* Delta of the mpls stack depth since + * actions were last committed. + * Must be between -1 and 1 inclusive. */ + ovs_be32 pre_push_mpls_lse; /* Used to record the top-most MPLS LSE + * prior to an mpls_push so that it may be + * used for a subsequent mpls_pop. */ + + /* Resubmit statistics, via xlate_table_action(). */ + int recurse; /* Current resubmit nesting depth. */ + int resubmits; /* Total number of resubmits. */ + uint32_t orig_skb_priority; /* Priority when packet arrived. */ uint8_t table_id; /* OpenFlow table ID where flow was found. */ uint32_t sflow_n_outputs; /* Number of output ports. */ @@ -1534,7 +1548,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, /* If 'struct flow' gets additional metadata, we'll need to zero it out * before traversing a patch port. */ - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 20); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 21); if (!xport) { xlate_report(ctx, "Nonexistent output port"); @@ -1645,7 +1659,8 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, if (out_port != ODPP_NONE) { commit_odp_actions(flow, &ctx->base_flow, - &ctx->xout->odp_actions, &ctx->xout->wc); + &ctx->xout->odp_actions, &ctx->xout->wc, + &ctx->mpls_depth_delta); nl_msg_put_odp_port(&ctx->xout->odp_actions, OVS_ACTION_ATTR_OUTPUT, out_port); @@ -1677,6 +1692,7 @@ xlate_recursively(struct xlate_ctx *ctx, struct rule_dpif *rule) rule_dpif_credit_stats(rule, ctx->xin->resubmit_stats); } + ctx->resubmits++; ctx->recurse++; ctx->rule = rule; actions = rule_dpif_get_actions(rule); @@ -1690,7 +1706,18 @@ static void xlate_table_action(struct xlate_ctx *ctx, ofp_port_t in_port, uint8_t table_id, bool may_packet_in) { - if (ctx->recurse < MAX_RESUBMIT_RECURSION) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); + + if (ctx->recurse >= MAX_RESUBMIT_RECURSION) { + VLOG_ERR_RL(&rl, "resubmit actions recursed over %d times", + MAX_RESUBMIT_RECURSION); + } else if (ctx->resubmits >= MAX_RESUBMITS) { + VLOG_ERR_RL(&rl, "over %d resubmit actions", MAX_RESUBMITS); + } else if (ctx->xout->odp_actions.size > UINT16_MAX) { + VLOG_ERR_RL(&rl, "resubmits yielded over 64 kB of actions"); + } else if (ctx->stack.size >= 65536) { + VLOG_ERR_RL(&rl, "resubmits yielded over 64 kB of stack"); + } else { struct rule_dpif *rule; ofp_port_t old_in_port = ctx->xin->flow.in_port.ofp_port; uint8_t old_table_id = ctx->table_id; @@ -1730,12 +1757,10 @@ xlate_table_action(struct xlate_ctx *ctx, } ctx->table_id = old_table_id; - } else { - static struct vlog_rate_limit recurse_rl = VLOG_RATE_LIMIT_INIT(1, 1); - - VLOG_ERR_RL(&recurse_rl, "resubmit actions recursed over %d times", - MAX_RESUBMIT_RECURSION); + return; } + + ctx->exit = true; } static void @@ -1800,7 +1825,8 @@ execute_controller_action(struct xlate_ctx *ctx, int len, memset(&key.tunnel, 0, sizeof key.tunnel); commit_odp_actions(&ctx->xin->flow, &ctx->base_flow, - &ctx->xout->odp_actions, &ctx->xout->wc); + &ctx->xout->odp_actions, &ctx->xout->wc, + &ctx->mpls_depth_delta); odp_execute_actions(NULL, packet, &key, ctx->xout->odp_actions.data, ctx->xout->odp_actions.size, NULL, NULL); @@ -1820,7 +1846,7 @@ execute_controller_action(struct xlate_ctx *ctx, int len, ofpbuf_delete(packet); } -static void +static bool compose_mpls_push_action(struct xlate_ctx *ctx, ovs_be16 eth_type) { struct flow_wildcards *wc = &ctx->xout->wc; @@ -1828,12 +1854,35 @@ compose_mpls_push_action(struct xlate_ctx *ctx, ovs_be16 eth_type) ovs_assert(eth_type_mpls(eth_type)); + /* If mpls_depth_delta is negative then an MPLS POP action has been + * composed and the resulting MPLS label stack is unknown. This means + * an MPLS PUSH action can't be composed as it needs to know either the + * top-most MPLS LSE to use as a template for the new MPLS LSE, or that + * there is no MPLS label stack present. Thus, stop processing. + * + * If mpls_depth_delta is positive then an MPLS PUSH action has been + * composed and no further MPLS PUSH action may be performed without + * losing MPLS LSE and ether type information held in xtx->xin->flow. + * Thus, stop processing. + * + * If the MPLS LSE of the flow and base_flow differ then the MPLS LSE + * has been updated. Performing a MPLS PUSH action may be would result in + * losing MPLS LSE and ether type information held in xtx->xin->flow. + * Thus, stop processing. + * + * It is planned that in the future this case will be handled + * by recirculation */ + if (ctx->mpls_depth_delta || + ctx->xin->flow.mpls_lse != ctx->base_flow.mpls_lse) { + return true; + } + memset(&wc->masks.mpls_lse, 0xff, sizeof wc->masks.mpls_lse); - memset(&wc->masks.mpls_depth, 0xff, sizeof wc->masks.mpls_depth); - if (flow->mpls_depth) { + ctx->pre_push_mpls_lse = ctx->xin->flow.mpls_lse; + + if (eth_type_mpls(ctx->xin->flow.dl_type)) { flow->mpls_lse &= ~htonl(MPLS_BOS_MASK); - flow->mpls_depth++; } else { ovs_be32 label; uint8_t tc, ttl; @@ -1848,30 +1897,48 @@ compose_mpls_push_action(struct xlate_ctx *ctx, ovs_be16 eth_type) tc = (flow->nw_tos & IP_DSCP_MASK) >> 2; ttl = flow->nw_ttl ? flow->nw_ttl : 0x40; flow->mpls_lse = set_mpls_lse_values(ttl, tc, 1, label); - flow->mpls_depth = 1; } flow->dl_type = eth_type; + ctx->mpls_depth_delta++; + + return false; } -static void +static bool compose_mpls_pop_action(struct xlate_ctx *ctx, ovs_be16 eth_type) { struct flow_wildcards *wc = &ctx->xout->wc; - struct flow *flow = &ctx->xin->flow; - ovs_assert(eth_type_mpls(ctx->xin->flow.dl_type)); - ovs_assert(!eth_type_mpls(eth_type)); + if (!eth_type_mpls(ctx->xin->flow.dl_type)) { + return true; + } + + /* If mpls_depth_delta is negative then an MPLS POP action has been + * composed. Performing another MPLS POP action + * would result in losing ether type that results from + * the already composed MPLS POP. Thus, stop processing. + * + * It is planned that in the future this case will be handled + * by recirculation */ + if (ctx->mpls_depth_delta < 0) { + return true; + } memset(&wc->masks.mpls_lse, 0xff, sizeof wc->masks.mpls_lse); - memset(&wc->masks.mpls_depth, 0xff, sizeof wc->masks.mpls_depth); - if (flow->mpls_depth) { - flow->mpls_depth--; - flow->mpls_lse = htonl(0); - if (!flow->mpls_depth) { - flow->dl_type = eth_type; - } + /* If mpls_depth_delta is positive then an MPLS PUSH action has been + * executed and the previous MPLS LSE saved in ctx->pre_push_mpls_lse. The + * flow's MPLS LSE should be restored to that value to allow any + * subsequent actions that update of the LSE to be executed correctly. + */ + if (ctx->mpls_depth_delta > 0) { + ctx->xin->flow.mpls_lse = ctx->pre_push_mpls_lse; } + + ctx->xin->flow.dl_type = eth_type; + ctx->mpls_depth_delta--; + + return false; } static bool @@ -1907,6 +1974,18 @@ compose_set_mpls_ttl_action(struct xlate_ctx *ctx, uint8_t ttl) return true; } + /* If mpls_depth_delta is negative then an MPLS POP action has been + * executed and the resulting MPLS label stack is unknown. This means + * a SET MPLS TTL push action can't be executed as it needs to manipulate + * the top-most MPLS LSE. Thus, stop processing. + * + * It is planned that in the future this case will be handled + * by recirculation. + */ + if (ctx->mpls_depth_delta < 0) { + return true; + } + ctx->xout->wc.masks.mpls_lse |= htonl(MPLS_TTL_MASK); set_mpls_lse_ttl(&ctx->xin->flow.mpls_lse, ttl); return false; @@ -2134,7 +2213,8 @@ xlate_sample_action(struct xlate_ctx *ctx, uint32_t probability = (os->probability << 16) | os->probability; commit_odp_actions(&ctx->xin->flow, &ctx->base_flow, - &ctx->xout->odp_actions, &ctx->xout->wc); + &ctx->xout->odp_actions, &ctx->xout->wc, + &ctx->mpls_depth_delta); compose_flow_sample_cookie(os->probability, os->collector_set_id, os->obs_domain_id, os->obs_point_id, &cookie); @@ -2309,11 +2389,17 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len, break; case OFPACT_PUSH_MPLS: - compose_mpls_push_action(ctx, ofpact_get_PUSH_MPLS(a)->ethertype); + if (compose_mpls_push_action(ctx, + ofpact_get_PUSH_MPLS(a)->ethertype)) { + return; + } break; case OFPACT_POP_MPLS: - compose_mpls_pop_action(ctx, ofpact_get_POP_MPLS(a)->ethertype); + if (compose_mpls_pop_action(ctx, + ofpact_get_POP_MPLS(a)->ethertype)) { + return; + } break; case OFPACT_SET_MPLS_TTL: @@ -2462,6 +2548,44 @@ xlate_out_copy(struct xlate_out *dst, const struct xlate_out *src) ofpbuf_put(&dst->odp_actions, src->odp_actions.data, src->odp_actions.size); } + +/* Returns a reference to the sflow handled associated with ofproto, or NULL if + * there is none. The caller is responsible for decrementing the results ref + * count with dpif_sflow_unref(). */ +struct dpif_sflow * +xlate_get_sflow(const struct ofproto_dpif *ofproto) +{ + struct dpif_sflow *sflow = NULL; + struct xbridge *xbridge; + + ovs_rwlock_rdlock(&xlate_rwlock); + xbridge = xbridge_lookup(ofproto); + if (xbridge) { + sflow = dpif_sflow_ref(xbridge->sflow); + } + ovs_rwlock_unlock(&xlate_rwlock); + + return sflow; +} + +/* Returns a reference to the ipfix handled associated with ofproto, or NULL if + * there is none. The caller is responsible for decrementing the results ref + * count with dpif_ipfix_unref(). */ +struct dpif_ipfix * +xlate_get_ipfix(const struct ofproto_dpif *ofproto) +{ + struct dpif_ipfix *ipfix = NULL; + struct xbridge *xbridge; + + ovs_rwlock_rdlock(&xlate_rwlock); + xbridge = xbridge_lookup(ofproto); + if (xbridge) { + ipfix = dpif_ipfix_ref(xbridge->ipfix); + } + ovs_rwlock_unlock(&xlate_rwlock); + + return ipfix; +} static struct skb_priority_to_dscp * get_skb_priority(const struct xport *xport, uint32_t skb_priority) @@ -2595,9 +2719,11 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout) } ctx.recurse = 0; + ctx.resubmits = 0; ctx.orig_skb_priority = flow->skb_priority; ctx.table_id = 0; ctx.exit = false; + ctx.mpls_depth_delta = 0; if (xin->ofpacts) { ofpacts = xin->ofpacts; @@ -2682,6 +2808,15 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout) } } + if (nl_attr_oversized(ctx.xout->odp_actions.size)) { + /* These datapath actions are too big for a Netlink attribute, so we + * can't execute them. */ + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); + + VLOG_ERR_RL(&rl, "discarding oversize datapath actions"); + ofpbuf_clear(&ctx.xout->odp_actions); + } + ofpbuf_uninit(&ctx.stack); /* Clear the metadata and register wildcard masks, because we won't diff --git a/ofproto/ofproto-dpif-xlate.h b/ofproto/ofproto-dpif-xlate.h index a54a9e4a2..6403f50aa 100644 --- a/ofproto/ofproto-dpif-xlate.h +++ b/ofproto/ofproto-dpif-xlate.h @@ -154,4 +154,10 @@ void xlate_in_init(struct xlate_in *, struct ofproto_dpif *, void xlate_out_uninit(struct xlate_out *); void xlate_actions_for_side_effects(struct xlate_in *); void xlate_out_copy(struct xlate_out *dst, const struct xlate_out *src); + +struct dpif_sflow *xlate_get_sflow(const struct ofproto_dpif *) + OVS_EXCLUDED(xlate_rwlock); +struct dpif_ipfix *xlate_get_ipfix(const struct ofproto_dpif *) + OVS_EXCLUDED(xlate_rwlock); + #endif /* ofproto-dpif-xlate.h */ diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index 80874b87f..80e97e04c 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -68,13 +68,14 @@ VLOG_DEFINE_THIS_MODULE(ofproto_dpif); COVERAGE_DEFINE(ofproto_dpif_expired); -COVERAGE_DEFINE(facet_changed_rule); COVERAGE_DEFINE(facet_revalidate); COVERAGE_DEFINE(facet_unexpected); -COVERAGE_DEFINE(facet_suppress); +COVERAGE_DEFINE(facet_create); +COVERAGE_DEFINE(facet_remove); +COVERAGE_DEFINE(subfacet_create); +COVERAGE_DEFINE(subfacet_destroy); COVERAGE_DEFINE(subfacet_install_fail); COVERAGE_DEFINE(packet_in_overflow); -COVERAGE_DEFINE(flow_mod_overflow); /* Number of implemented OpenFlow tables. */ enum { N_TABLES = 255 }; @@ -437,20 +438,6 @@ struct dpif_backer { unsigned avg_n_subfacet; /* Average number of flows. */ long long int avg_subfacet_life; /* Average life span of subfacets. */ - /* The average number of subfacets... */ - struct avg_subfacet_rates hourly; /* ...over the last hour. */ - struct avg_subfacet_rates daily; /* ...over the last day. */ - struct avg_subfacet_rates lifetime; /* ...over the switch lifetime. */ - long long int last_minute; /* Last time 'hourly' was updated. */ - - /* Number of subfacets added or deleted since 'last_minute'. */ - unsigned subfacet_add_count; - unsigned subfacet_del_count; - - /* Number of subfacets added or deleted from 'created' to 'last_minute.' */ - unsigned long long int total_subfacet_add_count; - unsigned long long int total_subfacet_del_count; - /* Number of upcall handling threads. */ unsigned int n_handler_threads; }; @@ -459,7 +446,6 @@ struct dpif_backer { static struct shash all_dpif_backers = SHASH_INITIALIZER(&all_dpif_backers); static void drop_key_clear(struct dpif_backer *); -static void update_moving_averages(struct dpif_backer *backer); struct ofproto_dpif { struct hmap_node all_ofproto_dpifs_node; /* In 'all_ofproto_dpifs'. */ @@ -1217,14 +1203,6 @@ open_dpif_backer(const char *type, struct dpif_backer **backerp) backer->max_n_subfacet = 0; backer->created = time_msec(); - backer->last_minute = backer->created; - memset(&backer->hourly, 0, sizeof backer->hourly); - memset(&backer->daily, 0, sizeof backer->daily); - memset(&backer->lifetime, 0, sizeof backer->lifetime); - backer->subfacet_add_count = 0; - backer->subfacet_del_count = 0; - backer->total_subfacet_add_count = 0; - backer->total_subfacet_del_count = 0; backer->avg_n_subfacet = 0; backer->avg_subfacet_life = 0; @@ -3427,107 +3405,12 @@ handle_flow_misses(struct dpif_backer *backer, struct flow_miss_batch *fmb) } } -static void -handle_sflow_upcall(struct dpif_backer *backer, - const struct dpif_upcall *upcall) -{ - struct ofproto_dpif *ofproto; - union user_action_cookie cookie; - struct flow flow; - odp_port_t odp_in_port; - - if (xlate_receive(backer, upcall->packet, upcall->key, upcall->key_len, - &flow, NULL, &ofproto, &odp_in_port) - || !ofproto->sflow) { - return; - } - - memset(&cookie, 0, sizeof cookie); - memcpy(&cookie, nl_attr_get(upcall->userdata), sizeof cookie.sflow); - dpif_sflow_received(ofproto->sflow, upcall->packet, &flow, - odp_in_port, &cookie); -} - -static void -handle_flow_sample_upcall(struct dpif_backer *backer, - const struct dpif_upcall *upcall) -{ - struct ofproto_dpif *ofproto; - union user_action_cookie cookie; - struct flow flow; - - if (xlate_receive(backer, upcall->packet, upcall->key, upcall->key_len, - &flow, NULL, &ofproto, NULL) - || !ofproto->ipfix) { - return; - } - - memset(&cookie, 0, sizeof cookie); - memcpy(&cookie, nl_attr_get(upcall->userdata), sizeof cookie.flow_sample); - - /* The flow reflects exactly the contents of the packet. Sample - * the packet using it. */ - dpif_ipfix_flow_sample(ofproto->ipfix, upcall->packet, &flow, - cookie.flow_sample.collector_set_id, - cookie.flow_sample.probability, - cookie.flow_sample.obs_domain_id, - cookie.flow_sample.obs_point_id); -} - -static void -handle_ipfix_upcall(struct dpif_backer *backer, - const struct dpif_upcall *upcall) -{ - struct ofproto_dpif *ofproto; - struct flow flow; - - if (xlate_receive(backer, upcall->packet, upcall->key, upcall->key_len, - &flow, NULL, &ofproto, NULL) - || !ofproto->ipfix) { - return; - } - - /* The flow reflects exactly the contents of the packet. Sample - * the packet using it. */ - dpif_ipfix_bridge_sample(ofproto->ipfix, upcall->packet, &flow); -} - static void handle_upcalls(struct dpif_backer *backer) { struct flow_miss_batch *fmb; int n_processed; - for (n_processed = 0; n_processed < FLOW_MISS_MAX_BATCH; n_processed++) { - struct upcall *upcall = upcall_next(backer->udpif); - - if (!upcall) { - break; - } - - switch (upcall->type) { - case SFLOW_UPCALL: - handle_sflow_upcall(backer, &upcall->dpif_upcall); - break; - - case FLOW_SAMPLE_UPCALL: - handle_flow_sample_upcall(backer, &upcall->dpif_upcall); - break; - - case IPFIX_UPCALL: - handle_ipfix_upcall(backer, &upcall->dpif_upcall); - break; - - case BAD_UPCALL: - break; - - case MISS_UPCALL: - NOT_REACHED(); - } - - upcall_destroy(upcall); - } - for (n_processed = 0; n_processed < FLOW_MISS_MAX_BATCH; n_processed++) { struct drop_key *drop_key = drop_key_next(backer->udpif); if (!drop_key) { @@ -3739,8 +3622,6 @@ update_stats(struct dpif_backer *backer) run_fast_rl(); } dpif_flow_dump_done(&dump); - - update_moving_averages(backer); } /* Calculates and returns the number of milliseconds of idle time after which @@ -3923,6 +3804,7 @@ facet_create(const struct flow_miss *miss) struct facet *facet; struct match match; + COVERAGE_INC(facet_create); facet = xzalloc(sizeof *facet); facet->ofproto = miss->ofproto; facet->used = miss->stats.used; @@ -3986,6 +3868,7 @@ facet_remove(struct facet *facet) { struct subfacet *subfacet, *next_subfacet; + COVERAGE_INC(facet_remove); ovs_assert(!list_is_empty(&facet->subfacets)); /* First uninstall all of the subfacets to get final statistics. */ @@ -4523,6 +4406,7 @@ subfacet_create(struct facet *facet, struct flow_miss *miss) subfacet = xmalloc(sizeof *subfacet); } + COVERAGE_INC(subfacet_create); hmap_insert(&backer->subfacets, &subfacet->hmap_node, key_hash); list_push_back(&facet->subfacets, &subfacet->list_node); subfacet->facet = facet; @@ -4535,7 +4419,6 @@ subfacet_create(struct facet *facet, struct flow_miss *miss) subfacet->path = SF_NOT_INSTALLED; subfacet->backer = backer; - backer->subfacet_add_count++; return subfacet; } @@ -4545,11 +4428,8 @@ static void subfacet_destroy__(struct subfacet *subfacet) { struct facet *facet = subfacet->facet; - struct ofproto_dpif *ofproto = facet->ofproto; - - /* Update ofproto stats before uninstall the subfacet. */ - ofproto->backer->subfacet_del_count++; + COVERAGE_INC(subfacet_destroy); subfacet_uninstall(subfacet); hmap_remove(&subfacet->backer->subfacets, &subfacet->hmap_node); list_remove(&subfacet->list_node); @@ -5340,12 +5220,13 @@ static void ofproto_unixctl_trace(struct unixctl_conn *conn, int argc, const char *argv[], void *aux OVS_UNUSED) { - const struct dpif_backer *backer; + const struct dpif_backer *backer = NULL; struct ofproto_dpif *ofproto; struct ofpbuf odp_key, odp_mask; struct ofpbuf *packet; struct ds result; struct flow flow; + struct simap port_names; char *s; packet = NULL; @@ -5353,6 +5234,7 @@ ofproto_unixctl_trace(struct unixctl_conn *conn, int argc, const char *argv[], ds_init(&result); ofpbuf_init(&odp_key, 0); ofpbuf_init(&odp_mask, 0); + simap_init(&port_names); /* Handle "-generate" or a hex string as the last argument. */ if (!strcmp(argv[argc - 1], "-generate")) { @@ -5369,37 +5251,42 @@ ofproto_unixctl_trace(struct unixctl_conn *conn, int argc, const char *argv[], } } + /* odp_flow can have its in_port specified as a name instead of port no. + * We do not yet know whether a given flow is a odp_flow or a br_flow. + * But, to know whether a flow is odp_flow through odp_flow_from_string(), + * we need to create a simap of name to port no. */ + if (argc == 3) { + const char *dp_type; + if (!strncmp(argv[1], "ovs-", 4)) { + dp_type = argv[1] + 4; + } else { + dp_type = argv[1]; + } + backer = shash_find_data(&all_dpif_backers, dp_type); + } else { + struct shash_node *node; + if (shash_count(&all_dpif_backers) == 1) { + node = shash_first(&all_dpif_backers); + backer = node->data; + } + } + if (backer && backer->dpif) { + struct dpif_port dpif_port; + struct dpif_port_dump port_dump; + DPIF_PORT_FOR_EACH (&dpif_port, &port_dump, backer->dpif) { + simap_put(&port_names, dpif_port.name, + odp_to_u32(dpif_port.port_no)); + } + } + /* Parse the flow and determine whether a datapath or * bridge is specified. If function odp_flow_key_from_string() * returns 0, the flow is a odp_flow. If function * parse_ofp_exact_flow() returns 0, the flow is a br_flow. */ - if (!odp_flow_from_string(argv[argc - 1], NULL, &odp_key, &odp_mask)) { - /* If the odp_flow is the second argument, - * the datapath name is the first argument. */ - if (argc == 3) { - const char *dp_type; - if (!strncmp(argv[1], "ovs-", 4)) { - dp_type = argv[1] + 4; - } else { - dp_type = argv[1]; - } - backer = shash_find_data(&all_dpif_backers, dp_type); - if (!backer) { - unixctl_command_reply_error(conn, "Cannot find datapath " - "of this name"); - goto exit; - } - } else { - /* No datapath name specified, so there should be only one - * datapath. */ - struct shash_node *node; - if (shash_count(&all_dpif_backers) != 1) { - unixctl_command_reply_error(conn, "Must specify datapath " - "name, there is more than one type of datapath"); - goto exit; - } - node = shash_first(&all_dpif_backers); - backer = node->data; + if (!odp_flow_from_string(argv[argc - 1], &port_names, &odp_key, &odp_mask)) { + if (!backer) { + unixctl_command_reply_error(conn, "Cannot find the datapath"); + goto exit; } if (xlate_receive(backer, NULL, odp_key.data, odp_key.size, &flow, @@ -5452,6 +5339,7 @@ exit: ofpbuf_delete(packet); ofpbuf_uninit(&odp_key); ofpbuf_uninit(&odp_mask); + simap_destroy(&port_names); } static void @@ -5636,14 +5524,6 @@ ofproto_unixctl_dpif_dump_dps(struct unixctl_conn *conn, int argc OVS_UNUSED, ds_destroy(&ds); } -static void -show_dp_rates(struct ds *ds, const char *heading, - const struct avg_subfacet_rates *rates) -{ - ds_put_format(ds, "%s add rate: %5.3f/min, del rate: %5.3f/min\n", - heading, rates->add_rate, rates->del_rate); -} - static void dpif_show_backer(const struct dpif_backer *backer, struct ds *ds) { @@ -5651,7 +5531,6 @@ dpif_show_backer(const struct dpif_backer *backer, struct ds *ds) struct ofproto_dpif *ofproto; struct shash ofproto_shash; uint64_t n_hit, n_missed; - long long int minutes; size_t i; n_hit = n_missed = 0; @@ -5669,15 +5548,6 @@ dpif_show_backer(const struct dpif_backer *backer, struct ds *ds) backer->avg_n_subfacet, backer->max_n_subfacet, backer->avg_subfacet_life); - minutes = (time_msec() - backer->created) / (1000 * 60); - if (minutes >= 60) { - show_dp_rates(ds, "\thourly avg:", &backer->hourly); - } - if (minutes >= 60 * 24) { - show_dp_rates(ds, "\tdaily avg:", &backer->daily); - } - show_dp_rates(ds, "\toverall avg:", &backer->lifetime); - shash_init(&ofproto_shash); ofprotos = get_ofprotos(&ofproto_shash); for (i = 0; i < shash_count(&ofproto_shash); i++) { @@ -5884,7 +5754,7 @@ ofproto_unixctl_dpif_dump_flows(struct unixctl_conn *conn, } odp_flow_format(subfacet->key, subfacet->key_len, - mask.data, mask.size, &ds, false); + mask.data, mask.size, NULL, &ds, false); ds_put_format(&ds, ", packets:%"PRIu64", bytes:%"PRIu64", used:", subfacet->dp_packet_count, subfacet->dp_byte_count); @@ -6230,51 +6100,6 @@ odp_port_to_ofp_port(const struct ofproto_dpif *ofproto, odp_port_t odp_port) } } -/* Compute exponentially weighted moving average, adding 'new' as the newest, - * most heavily weighted element. 'base' designates the rate of decay: after - * 'base' further updates, 'new''s weight in the EWMA decays to about 1/e - * (about .37). */ -static void -exp_mavg(double *avg, int base, double new) -{ - *avg = (*avg * (base - 1) + new) / base; -} - -static void -update_moving_averages(struct dpif_backer *backer) -{ - const int min_ms = 60 * 1000; /* milliseconds in one minute. */ - long long int minutes = (time_msec() - backer->created) / min_ms; - - if (minutes > 0) { - backer->lifetime.add_rate = (double) backer->total_subfacet_add_count - / minutes; - backer->lifetime.del_rate = (double) backer->total_subfacet_del_count - / minutes; - } else { - backer->lifetime.add_rate = 0.0; - backer->lifetime.del_rate = 0.0; - } - - /* Update hourly averages on the minute boundaries. */ - if (time_msec() - backer->last_minute >= min_ms) { - exp_mavg(&backer->hourly.add_rate, 60, backer->subfacet_add_count); - exp_mavg(&backer->hourly.del_rate, 60, backer->subfacet_del_count); - - /* Update daily averages on the hour boundaries. */ - if ((backer->last_minute - backer->created) / min_ms % 60 == 59) { - exp_mavg(&backer->daily.add_rate, 24, backer->hourly.add_rate); - exp_mavg(&backer->daily.del_rate, 24, backer->hourly.del_rate); - } - - backer->total_subfacet_add_count += backer->subfacet_add_count; - backer->total_subfacet_del_count += backer->subfacet_del_count; - backer->subfacet_add_count = 0; - backer->subfacet_del_count = 0; - backer->last_minute += min_ms; - } -} - const struct ofproto_class ofproto_dpif_class = { init, enumerate_types, diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h index 03b19c84c..de566e3bc 100644 --- a/ofproto/ofproto-provider.h +++ b/ofproto/ofproto-provider.h @@ -81,10 +81,10 @@ struct ofproto { /* Datapath. */ struct hmap ports; /* Contains "struct ofport"s. */ struct shash port_by_name; - unsigned long *ofp_port_ids;/* Bitmap of used OpenFlow port numbers. */ struct simap ofp_requests; /* OpenFlow port number requests. */ uint16_t alloc_port_no; /* Last allocated OpenFlow port number. */ uint16_t max_ports; /* Max possible OpenFlow port num, plus one. */ + struct hmap ofport_usage; /* Map ofport to last used time. */ /* Flow tables. */ long long int eviction_group_timer; /* For rate limited reheapification. */ diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index ae3928386..8e4f300ac 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -57,14 +57,11 @@ VLOG_DEFINE_THIS_MODULE(ofproto); -COVERAGE_DEFINE(ofproto_error); COVERAGE_DEFINE(ofproto_flush); -COVERAGE_DEFINE(ofproto_no_packet_in); COVERAGE_DEFINE(ofproto_packet_out); COVERAGE_DEFINE(ofproto_queue_req); COVERAGE_DEFINE(ofproto_recv_openflow); COVERAGE_DEFINE(ofproto_reinit_ports); -COVERAGE_DEFINE(ofproto_uninstallable); COVERAGE_DEFINE(ofproto_update_port); enum ofproto_state { @@ -240,6 +237,22 @@ static void update_port(struct ofproto *, const char *devname); static int init_ports(struct ofproto *); static void reinit_ports(struct ofproto *); +static long long int ofport_get_usage(const struct ofproto *, + ofp_port_t ofp_port); +static void ofport_set_usage(struct ofproto *, ofp_port_t ofp_port, + long long int last_used); + +/* Ofport usage. + * + * Keeps track of the currently used and recently used ofport values and is + * used to prevent immediate recycling of ofport values. */ +struct ofport_usage { + struct hmap_node hmap_node; /* In struct ofproto's "ofport_usage" hmap. */ + ofp_port_t ofp_port; /* OpenFlow port number. */ + long long int last_used; /* Last time the 'ofp_port' was used. LLONG_MAX + represents in-use ofports. */ +}; + /* rule. */ static void ofproto_rule_destroy__(struct rule *); static void ofproto_rule_send_removed(struct rule *, uint8_t reason); @@ -485,6 +498,7 @@ ofproto_create(const char *datapath_name, const char *datapath_type, ofproto->dp_desc = NULL; ofproto->frag_handling = OFPC_FRAG_NORMAL; hmap_init(&ofproto->ports); + hmap_init(&ofproto->ofport_usage); shash_init(&ofproto->port_by_name); simap_init(&ofproto->ofp_requests); ofproto->max_ports = ofp_to_u16(OFPP_MAX); @@ -518,11 +532,6 @@ ofproto_create(const char *datapath_name, const char *datapath_type, return error; } - /* The "max_ports" member should have been set by ->construct(ofproto). - * Port 0 is not a valid OpenFlow port, so mark that as unavailable. */ - ofproto->ofp_port_ids = bitmap_allocate(ofproto->max_ports); - bitmap_set1(ofproto->ofp_port_ids, 0); - /* Check that hidden tables, if any, are at the end. */ ovs_assert(ofproto->n_tables); for (i = 0; i + 1 < ofproto->n_tables; i++) { @@ -1227,8 +1236,8 @@ ofproto_destroy__(struct ofproto *ofproto) free(ofproto->serial_desc); free(ofproto->dp_desc); hmap_destroy(&ofproto->ports); + hmap_destroy(&ofproto->ofport_usage); shash_destroy(&ofproto->port_by_name); - bitmap_free(ofproto->ofp_port_ids); simap_destroy(&ofproto->ofp_requests); OFPROTO_FOR_EACH_TABLE (table, ofproto) { @@ -1248,6 +1257,7 @@ ofproto_destroy(struct ofproto *p) OVS_EXCLUDED(ofproto_mutex) { struct ofport *ofport, *next_ofport; + struct ofport_usage *usage, *next_usage; if (!p) { return; @@ -1265,6 +1275,11 @@ ofproto_destroy(struct ofproto *p) ofport_destroy(ofport); } + HMAP_FOR_EACH_SAFE (usage, next_usage, hmap_node, &p->ofport_usage) { + hmap_remove(&p->ofport_usage, &usage->hmap_node); + free(usage); + } + p->ofproto_class->destruct(p); ofproto_destroy__(p); } @@ -1941,35 +1956,45 @@ alloc_ofp_port(struct ofproto *ofproto, const char *netdev_name) port_idx = port_idx ? port_idx : UINT16_MAX; if (port_idx >= ofproto->max_ports - || bitmap_is_set(ofproto->ofp_port_ids, port_idx)) { - uint16_t end_port_no = ofproto->alloc_port_no; + || ofport_get_usage(ofproto, u16_to_ofp(port_idx)) == LLONG_MAX) { + uint16_t lru_ofport = 0, end_port_no = ofproto->alloc_port_no; + long long int last_used_at, lru = LLONG_MAX; /* Search for a free OpenFlow port number. We try not to * immediately reuse them to prevent problems due to old * flows. */ for (;;) { if (++ofproto->alloc_port_no >= ofproto->max_ports) { - ofproto->alloc_port_no = 0; + ofproto->alloc_port_no = 1; } - if (!bitmap_is_set(ofproto->ofp_port_ids, - ofproto->alloc_port_no)) { + last_used_at = ofport_get_usage(ofproto, + u16_to_ofp(ofproto->alloc_port_no)); + if (!last_used_at) { port_idx = ofproto->alloc_port_no; break; + } else if (last_used_at < lru) { + lru = last_used_at; + lru_ofport = ofproto->alloc_port_no; } + if (ofproto->alloc_port_no == end_port_no) { + if (lru_ofport) { + port_idx = lru_ofport; + break; + } return OFPP_NONE; } } } - bitmap_set1(ofproto->ofp_port_ids, port_idx); + ofport_set_usage(ofproto, u16_to_ofp(port_idx), LLONG_MAX); return u16_to_ofp(port_idx); } static void -dealloc_ofp_port(const struct ofproto *ofproto, ofp_port_t ofp_port) +dealloc_ofp_port(struct ofproto *ofproto, ofp_port_t ofp_port) { if (ofp_to_u16(ofp_port) < ofproto->max_ports) { - bitmap_set0(ofproto->ofp_port_ids, ofp_to_u16(ofp_port)); + ofport_set_usage(ofproto, ofp_port, time_msec()); } } @@ -2194,6 +2219,41 @@ ofproto_get_port(const struct ofproto *ofproto, ofp_port_t ofp_port) return NULL; } +static long long int +ofport_get_usage(const struct ofproto *ofproto, ofp_port_t ofp_port) +{ + struct ofport_usage *usage; + + HMAP_FOR_EACH_IN_BUCKET (usage, hmap_node, hash_ofp_port(ofp_port), + &ofproto->ofport_usage) { + if (usage->ofp_port == ofp_port) { + return usage->last_used; + } + } + return 0; +} + +static void +ofport_set_usage(struct ofproto *ofproto, ofp_port_t ofp_port, + long long int last_used) +{ + struct ofport_usage *usage; + HMAP_FOR_EACH_IN_BUCKET (usage, hmap_node, hash_ofp_port(ofp_port), + &ofproto->ofport_usage) { + if (usage->ofp_port == ofp_port) { + usage->last_used = last_used; + return; + } + } + ovs_assert(last_used == LLONG_MAX); + + usage = xmalloc(sizeof *usage); + usage->ofp_port = ofp_port; + usage->last_used = last_used; + hmap_insert(&ofproto->ofport_usage, &usage->hmap_node, + hash_ofp_port(ofp_port)); +} + int ofproto_port_get_stats(const struct ofport *port, struct netdev_stats *stats) { diff --git a/ovsdb/SPECS b/ovsdb/SPECS index 5bdb9741d..5656b9d7f 100644 --- a/ovsdb/SPECS +++ b/ovsdb/SPECS @@ -316,7 +316,9 @@ over HTTP, for these reasons: * The JSON-RPC specification for HTTP transport is incomplete. -We are using TCP port 6632 for the database JSON-RPC connection. +We are currently using TCP port 6632 for the database JSON-RPC +connection, but future versions will switch to using IANA-assigned TCP +port 6640. The database wire protocol consists of the following JSON-RPC methods: diff --git a/ovsdb/log.c b/ovsdb/log.c index ea3c3f348..131602576 100644 --- a/ovsdb/log.c +++ b/ovsdb/log.c @@ -32,9 +32,6 @@ #include "socket-util.h" #include "transaction.h" #include "util.h" -#include "vlog.h" - -VLOG_DEFINE_THIS_MODULE(ovsdb_log); enum ovsdb_log_mode { OVSDB_LOG_READ, diff --git a/ovsdb/ovsdb-server.1.in b/ovsdb/ovsdb-server.1.in index 4628b59b3..f86e8f367 100644 --- a/ovsdb/ovsdb-server.1.in +++ b/ovsdb/ovsdb-server.1.in @@ -73,6 +73,10 @@ client before sending an inactivity probe message. It is an error for \fIcolumn\fR to have another type. .RE . +.IP +To connect or listen on multiple connection methods, use multiple +\fB\-\-remote\fR options. +. .IP "\fB\-\-run=\fIcommand\fR]" Ordinarily \fBovsdb\-server\fR runs forever, or until it is told to exit (see \fBRUNTIME MANAGEMENT COMMANDS\fR below). With this option, diff --git a/ovsdb/ovsdb-tool.c b/ovsdb/ovsdb-tool.c index 077e7f5cb..11e61e6b4 100644 --- a/ovsdb/ovsdb-tool.c +++ b/ovsdb/ovsdb-tool.c @@ -40,8 +40,6 @@ #include "util.h" #include "vlog.h" -VLOG_DEFINE_THIS_MODULE(ovsdb_tool); - /* -m, --more: Verbosity level for "show-log" command output. */ static int show_log_verbosity; diff --git a/rhel/etc_init.d_openvswitch b/rhel/etc_init.d_openvswitch index 7e6413256..2878d397c 100755 --- a/rhel/etc_init.d_openvswitch +++ b/rhel/etc_init.d_openvswitch @@ -5,7 +5,7 @@ # chkconfig: 2345 09 91 # description: Manage Open vSwitch kernel modules and user-space daemons -# Copyright (C) 2009, 2010, 2011 Nicira, Inc. +# Copyright (C) 2009, 2010, 2011, 2013 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -81,6 +81,7 @@ case $1 in ;; status) ovs_ctl status + exit $? ;; version) ovs_ctl version diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at index f67c3ab79..b70637326 100644 --- a/tests/ofproto-dpif.at +++ b/tests/ofproto-dpif.at @@ -268,6 +268,7 @@ cookie=0x8 table=6 in_port=85 actions=mod_tp_src:85,controller,resubmit(86,7) cookie=0x9 table=7 in_port=86 actions=mod_tp_dst:86,controller,controller cookie=0xa dl_src=40:44:44:44:44:41 actions=mod_vlan_vid:99,mod_vlan_pcp:1,controller cookie=0xa dl_src=40:44:44:44:44:42 actions=push_mpls:0x8847,load:10->OXM_OF_MPLS_LABEL[[]],load:3->OXM_OF_MPLS_TC[[]],controller +cookie=0xa dl_src=41:44:44:44:44:42 actions=push_mpls:0x8847,load:10->OXM_OF_MPLS_LABEL[[]],load:3->OXM_OF_MPLS_TC[[]],pop_mpls:0x0800,controller cookie=0xa dl_src=40:44:44:44:44:43 actions=push_mpls:0x8847,load:10->OXM_OF_MPLS_LABEL[[]],load:3->OXM_OF_MPLS_TC[[]],controller cookie=0xa dl_src=40:44:44:44:44:44 actions=push_mpls:0x8847,load:10->OXM_OF_MPLS_LABEL[[]],load:3->OXM_OF_MPLS_TC[[]],controller cookie=0xa dl_src=40:44:44:44:44:45 actions=push_mpls:0x8847,load:10->OXM_OF_MPLS_LABEL[[]],load:3->OXM_OF_MPLS_TC[[]],dec_mpls_ttl,controller @@ -383,6 +384,26 @@ mpls,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=40:44:44:44:44:42,dl_dst=50:54: dnl Modified MPLS controller action. AT_CHECK([ovs-ofctl monitor br0 65534 -P nxm --detach --pidfile 2> ofctl_monitor.log]) +for i in 1 2 3; do + ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=41:44:44:44:44:42,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no)' +done +OVS_WAIT_UNTIL([test `wc -l < ofctl_monitor.log` -ge 6]) +ovs-appctl -t ovs-ofctl exit + +AT_CHECK([cat ofctl_monitor.log], [0], [dnl +NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=60 in_port=1 (via action) data_len=60 (unbuffered) +tcp,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=41:44:44:44:44:42,dl_dst=50:54:00:00:00:07,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=64 tcp_csum:0 +dnl +NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=60 in_port=1 (via action) data_len=60 (unbuffered) +tcp,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=41:44:44:44:44:42,dl_dst=50:54:00:00:00:07,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=64 tcp_csum:0 +dnl +NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=60 in_port=1 (via action) data_len=60 (unbuffered) +tcp,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=41:44:44:44:44:42,dl_dst=50:54:00:00:00:07,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=64 tcp_csum:0 +]) + +dnl Modified MPLS controller action. +AT_CHECK([ovs-ofctl monitor br0 65534 -P nxm --detach --pidfile 2> ofctl_monitor.log]) + dnl in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x8847),mpls(label=100,tc=3,ttl=64,bos=1) for i in 1 2 3; do @@ -703,6 +724,7 @@ AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl cookie=0xa, n_packets=3, n_bytes=180, dl_src=40:44:44:44:44:46 actions=push_mpls:0x8847,load:0xa->OXM_OF_MPLS_LABEL[[]],load:0x3->OXM_OF_MPLS_TC[[]],set_mpls_ttl(10),CONTROLLER:65535 cookie=0xa, n_packets=3, n_bytes=180, dl_src=40:44:44:44:44:47 actions=push_mpls:0x8847,load:0xa->OXM_OF_MPLS_LABEL[[]],load:0x3->OXM_OF_MPLS_TC[[]],dec_mpls_ttl,set_mpls_ttl(10),CONTROLLER:65535 cookie=0xa, n_packets=3, n_bytes=180, dl_src=40:44:44:44:44:48 actions=push_mpls:0x8847,load:0xa->OXM_OF_MPLS_LABEL[[]],load:0x3->OXM_OF_MPLS_TC[[]],set_mpls_ttl(10),dec_mpls_ttl,CONTROLLER:65535 + cookie=0xa, n_packets=3, n_bytes=180, dl_src=41:44:44:44:44:42 actions=push_mpls:0x8847,load:0xa->OXM_OF_MPLS_LABEL[[]],load:0x3->OXM_OF_MPLS_TC[[]],pop_mpls:0x0800,CONTROLLER:65535 cookie=0xb, n_packets=3, n_bytes=180, mpls,dl_src=50:55:55:55:55:55 actions=load:0x3e8->OXM_OF_MPLS_LABEL[[]],CONTROLLER:65535 cookie=0xc, n_packets=3, n_bytes=180, dl_src=70:77:77:77:77:77 actions=push_mpls:0x8848,load:0x3e8->OXM_OF_MPLS_LABEL[[]],load:0x7->OXM_OF_MPLS_TC[[]],CONTROLLER:65535 cookie=0xd, n_packets=3, n_bytes=186, dl_src=60:66:66:66:66:66 actions=pop_mpls:0x0800,CONTROLLER:65535 @@ -1139,8 +1161,15 @@ in_port=2 actions=output:1 ]) AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) -odp_flow="in_port(1)" +odp_flow="in_port(p1)" br_flow="in_port=1" +# Test command: ofproto/trace odp_flow with in_port as a name. +AT_CHECK([ovs-appctl ofproto/trace "$odp_flow"], [0], [stdout]) +AT_CHECK([tail -1 stdout], [0], [dnl +Datapath actions: 2 +]) + +odp_flow="in_port(1)" # Test command: ofproto/trace odp_flow AT_CHECK([ovs-appctl ofproto/trace "$odp_flow"], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [dnl @@ -1285,7 +1314,7 @@ m4_foreach( [AT_CHECK([ovs-appctl ofproto/trace wrong_name "$odp_flow" option], [2], [], [stderr]) AT_CHECK([tail -2 stderr], [0], [dnl -Cannot find datapath of this name +Cannot find the datapath ovs-appctl: ovs-vswitchd: server returned an error ])]) @@ -1298,7 +1327,7 @@ m4_foreach( [AT_CHECK([ovs-appctl ofproto/trace "" "$odp_flow" option], [2], [], [stderr]) AT_CHECK([tail -2 stderr], [0], [dnl -Cannot find datapath of this name +Cannot find the datapath ovs-appctl: ovs-vswitchd: server returned an error ])]) @@ -1311,7 +1340,7 @@ m4_foreach( [AT_CHECK([ovs-appctl ofproto/trace ovs-system "$odp_flow" option], [2], [], [stderr]) AT_CHECK([tail -2 stderr], [0], [dnl -Cannot find datapath of this name +Cannot find the datapath ovs-appctl: ovs-vswitchd: server returned an error ])]) @@ -1324,7 +1353,7 @@ m4_foreach( [AT_CHECK([ovs-appctl ofproto/trace br0 "$odp_flow" option], [2], [], [stderr]) AT_CHECK([tail -2 stderr], [0], [dnl -Cannot find datapath of this name +Cannot find the datapath ovs-appctl: ovs-vswitchd: server returned an error ])]) @@ -2108,7 +2137,6 @@ ADD_OF_PORTS([br1], [3]) AT_CHECK([ovs-appctl dpif/show], [0], [dnl dummy@ovs-dummy: hit:0 missed:0 flows: cur: 0, avg: 0, max: 0, life span: 0ms - overall avg: add rate: 0.000/min, del rate: 0.000/min br0: hit:0 missed:0 br0 65534/100: (dummy) p1 1/1: (dummy) @@ -2201,7 +2229,6 @@ warped AT_CHECK([ovs-appctl dpif/show], [0], [dnl dummy@ovs-dummy: hit:13 missed:2 flows: cur: 2, avg: 1, max: 2, life span: 1250ms - overall avg: add rate: 0.000/min, del rate: 0.000/min br0: hit:9 missed:1 br0 65534/100: (dummy) p2 2/2: (dummy) @@ -2253,8 +2280,6 @@ AT_CHECK([ovs-appctl time/warp 10000], [0], [warped AT_CHECK([ovs-appctl dpif/show | sed 's/ 10[[0-9]]\{3\}(ms)$/ 10000(ms)/'], [0], [dnl dummy@ovs-dummy: hit:0 missed:61 flows: cur: 0, avg: 0, max: 1, life span: 1666ms - hourly avg: add rate: 0.641/min, del rate: 0.641/min - overall avg: add rate: 1.000/min, del rate: 1.000/min br0: hit:0 missed:61 br0 65534/100: (dummy) p1 1/1: (dummy) @@ -2816,3 +2841,101 @@ AT_CHECK([ovs-appctl bond/show | sed -n '/^.*may_enable:.*/p'], [0], [dnl OVS_VSWITCHD_STOP AT_CLEANUP + +# Unit test for appctl coverage/show command +AT_SETUP([ofproto-dpif - coverage/show]) +OVS_VSWITCHD_START + +ovs-appctl time/stop +# before the first 5 seconds, nothing can be calculated but the total count. +# there should be two unixctl commands received, but the count of the "appctl +# coverage/show" command is not updated to the total. so there show only 1. +AT_CHECK([ovs-appctl coverage/show | sed -n '/^unixctl_received.*/p'], [], [dnl +unixctl_received 0.0/sec 0.000/sec 0.0000/sec total: 1 +]) + +ovs-appctl time/warp 5000 +# at first 5 second time instant, should have stats. +AT_CHECK([ovs-appctl coverage/show | sed -n '/^unixctl_received.*/p'], [], [dnl +unixctl_received 0.6/sec 0.050/sec 0.0008/sec total: 3 +]) + +for i in `seq 0 10`; do ovs-appctl time/warp 5000; done +# advance to first 1 minute time instant. +AT_CHECK([ovs-appctl coverage/show | sed -n '/^unixctl_received.*/p'], [], [dnl +unixctl_received 0.2/sec 0.250/sec 0.0042/sec total: 15 +]) + +ovs-appctl time/warp 60000 +# advance to next 1 minute time instant directly, should observe the per-minute +# rate drop. +AT_CHECK([ovs-appctl coverage/show | sed -n '/^unixctl_received.*/p'], [], [dnl +unixctl_received 0.4/sec 0.033/sec 0.0047/sec total: 17 +]) + +OVS_VSWITCHD_STOP +AT_CLEANUP + +AT_BANNER([ofproto-dpif - flow translation resource limits]) + +AT_SETUP([ofproto-dpif - infinite resubmit]) +OVS_VSWITCHD_START +AT_CHECK([ovs-ofctl add-flow br0 actions=resubmit:1,resubmit:2,output:3]) +AT_CHECK([ovs-appctl ofproto/trace br0 'eth_dst=ff:ff:ff:ff:ff:ff'], + [0], [stdout]) +AT_CHECK([tail -1 stdout], [0], [Datapath actions: drop +]) +AT_CHECK([grep -c 'resubmit actions recursed over 64 times' ovs-vswitchd.log], + [0], [1 +]) +OVS_VSWITCHD_STOP(["/resubmit actions recursed/d"]) +AT_CLEANUP + +AT_SETUP([ofproto-dpif - exponential resubmit chain]) +OVS_VSWITCHD_START +ADD_OF_PORTS([br0], 1) +(for i in `seq 1 64`; do + j=`expr $i + 1` + echo "in_port=$i, actions=resubmit:$j, resubmit:$j, local" + done + echo "in_port=65, actions=local") > flows + AT_CHECK([ovs-ofctl add-flows br0 flows]) +AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1'], [0], [stdout]) +AT_CHECK([grep -c 'over 4096 resubmit actions' ovs-vswitchd.log], [0], [1 +]) +OVS_VSWITCHD_STOP(["/over.*resubmit actions/d"]) +AT_CLEANUP + +AT_SETUP([ofproto-dpif - too many output actions]) +OVS_VSWITCHD_START +ADD_OF_PORTS([br0], 1) +(for i in `seq 1 12`; do + j=`expr $i + 1` + echo "in_port=$i, actions=resubmit:$j, resubmit:$j, local" + done + echo "in_port=13, actions=local,local,local,local,local,local,local,local") > flows +AT_CHECK([ovs-ofctl add-flows br0 flows]) +AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1'], [0], [stdout]) +AT_CHECK([grep -c 'resubmits yielded over 64 kB of actions' ovs-vswitchd.log], [0], [1 +]) +AT_CHECK([grep -c 'discarding oversize datapath actions' ovs-vswitchd.log], [0], [1 +]) +OVS_VSWITCHD_STOP(["/resubmits yielded over 64 kB of actions/d +/discarding oversize datapath actions/d"]) +AT_CLEANUP + +AT_SETUP([ofproto-dpif - stack too deep]) +OVS_VSWITCHD_START +ADD_OF_PORTS([br0], 1) +(for i in `seq 1 12`; do + j=`expr $i + 1` + echo "in_port=$i, actions=resubmit:$j, resubmit:$j, local" + done + push="push:NXM_NX_REG0[[]]" + echo "in_port=13, actions=$push,$push,$push,$push,$push,$push,$push,$push") > flows + AT_CHECK([ovs-ofctl add-flows br0 flows]) +AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1'], [0], [stdout]) +AT_CHECK([grep -c 'resubmits yielded over 64 kB of stack' ovs-vswitchd.log], [0], [1 +]) +OVS_VSWITCHD_STOP(["/resubmits yielded over 64 kB of stack/d"]) +AT_CLEANUP diff --git a/tests/ofproto-macros.at b/tests/ofproto-macros.at index 839d41ffe..3bcffc252 100644 --- a/tests/ofproto-macros.at +++ b/tests/ofproto-macros.at @@ -89,6 +89,10 @@ m4_define([OVS_VSWITCHD_START], m4_divert_push([PREPARE_TESTS]) check_logs () { sed -n "$1 +/timeval.*Unreasonably long [[0-9]]*ms poll interval/d +/timeval.*faults: [[0-9]]* minor, [[0-9]]* major/d +/timeval.*disk: [[0-9]]* reads, [[0-9]]* writes/d +/timeval.*context switches: [[0-9]]* voluntary, [[0-9]]* involuntary/d /|WARN|/p /|ERR|/p /|EMER|/p" ovs-vswitchd.log ovsdb-server.log diff --git a/tests/test-bundle.c b/tests/test-bundle.c index 41e2e38e1..1bb2b0b21 100644 --- a/tests/test-bundle.c +++ b/tests/test-bundle.c @@ -141,8 +141,6 @@ main(int argc, char *argv[]) flows = xmalloc(N_FLOWS * sizeof *flows); for (i = 0; i < N_FLOWS; i++) { random_bytes(&flows[i], sizeof flows[i]); - memset(flows[i].zeros, 0, sizeof flows[i].zeros); - flows[i].mpls_depth = 0; flows[i].regs[0] = ofp_to_u16(OFPP_NONE); } diff --git a/tests/test-multipath.c b/tests/test-multipath.c index f1b12e263..4ba36922e 100644 --- a/tests/test-multipath.c +++ b/tests/test-multipath.c @@ -66,8 +66,6 @@ main(int argc, char *argv[]) struct flow flow; random_bytes(&flow, sizeof flow); - memset(flow.zeros, 0, sizeof flow.zeros); - flow.mpls_depth = 0; mp.max_link = n - 1; multipath_execute(&mp, &flow, &wc); diff --git a/tests/test-odp.c b/tests/test-odp.c index 45605e4b9..183a3b3cd 100644 --- a/tests/test-odp.c +++ b/tests/test-odp.c @@ -86,7 +86,7 @@ parse_keys(bool wc_keys) ds_init(&out); if (wc_keys) { odp_flow_format(odp_key.data, odp_key.size, - odp_mask.data, odp_mask.size, &out, false); + odp_mask.data, odp_mask.size, NULL, &out, false); } else { odp_flow_key_format(odp_key.data, odp_key.size, &out); } diff --git a/tests/tunnel.at b/tests/tunnel.at index 697c21741..982d22a5d 100644 --- a/tests/tunnel.at +++ b/tests/tunnel.at @@ -14,7 +14,7 @@ actions=IN_PORT AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) -AT_CHECK([ovs-appctl dpif/show | tail -n +5], [0], [dnl +AT_CHECK([ovs-appctl dpif/show | tail -n +4], [0], [dnl br0 65534/100: (dummy) p1 1/1: (gre: remote_ip=1.1.1.1) p2 2/1: (gre: local_ip=2.2.2.2, remote_ip=1.1.1.1) @@ -37,7 +37,7 @@ dnl reconfigure, local_ip, remote_ip AT_CHECK([ovs-vsctl set Interface p2 type=gre options:local_ip=2.2.2.3 \ options:df_default=false options:ttl=1 options:csum=true \ -- set Interface p3 type=gre64]) -AT_CHECK([ovs-appctl dpif/show | tail -n +5], [0], [dnl +AT_CHECK([ovs-appctl dpif/show | tail -n +4], [0], [dnl br0 65534/100: (dummy) p1 1/1: (gre: remote_ip=1.1.1.1) p2 2/1: (gre: csum=true, df_default=false, local_ip=2.2.2.3, remote_ip=1.1.1.1, ttl=1) @@ -72,7 +72,7 @@ actions=2 AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) -AT_CHECK([ovs-appctl dpif/show | tail -n +5], [0], [dnl +AT_CHECK([ovs-appctl dpif/show | tail -n +4], [0], [dnl br0 65534/100: (dummy) p1 1/1: (gre: remote_ip=1.1.1.1) p2 2/2: (dummy) @@ -116,7 +116,7 @@ actions=output:1 AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) -AT_CHECK([ovs-appctl dpif/show | tail -n +5], [0], [dnl +AT_CHECK([ovs-appctl dpif/show | tail -n +4], [0], [dnl br0 65534/100: (dummy) p1 1/1: (gre: key=5, local_ip=2.2.2.2, remote_ip=1.1.1.1) p2 2/2: (dummy) @@ -148,7 +148,7 @@ actions=output:1 AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) -AT_CHECK([ovs-appctl dpif/show | tail -n +5], [0], [dnl +AT_CHECK([ovs-appctl dpif/show | tail -n +4], [0], [dnl br0 65534/100: (dummy) p1 1/1: (gre: remote_ip=1.1.1.1, tos=inherit, ttl=inherit) p2 2/2: (dummy) @@ -190,7 +190,7 @@ actions=set_tunnel:1,output:1,set_tunnel:2,output:2,set_tunnel:3,output:3,set_tu AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) -AT_CHECK([ovs-appctl dpif/show | tail -n +5], [0], [dnl +AT_CHECK([ovs-appctl dpif/show | tail -n +4], [0], [dnl br0 65534/100: (dummy) p1 1/1: (gre: key=flow, remote_ip=1.1.1.1) p2 2/1: (gre: key=flow, remote_ip=2.2.2.2) @@ -222,7 +222,7 @@ actions=IN_PORT,output:1,output:2,output:3 AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) -AT_CHECK([ovs-appctl dpif/show | tail -n +5], [0], [dnl +AT_CHECK([ovs-appctl dpif/show | tail -n +4], [0], [dnl br0 65534/100: (dummy) p1 1/1: (gre: key=1, remote_ip=1.1.1.1) p2 2/1: (gre: in_key=2, out_key=3, remote_ip=1.1.1.1) @@ -274,7 +274,7 @@ tun_id=4,actions=output:5 AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) -AT_CHECK([ovs-appctl dpif/show | tail -n +5], [0], [dnl +AT_CHECK([ovs-appctl dpif/show | tail -n +4], [0], [dnl br0 65534/100: (dummy) p1 1/1: (gre: key=flow, remote_ip=1.1.1.1) p2 2/1: (gre: key=3, remote_ip=3.3.3.3) @@ -310,7 +310,7 @@ AT_SETUP([tunnel - VXLAN]) OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=vxlan \ options:remote_ip=1.1.1.1 ofport_request=1]) -AT_CHECK([ovs-appctl dpif/show | tail -n +5], [0], [dnl +AT_CHECK([ovs-appctl dpif/show | tail -n +4], [0], [dnl br0 65534/100: (dummy) p1 1/1: (vxlan: remote_ip=1.1.1.1) ]) @@ -322,7 +322,7 @@ AT_SETUP([tunnel - LISP]) OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=lisp \ options:remote_ip=1.1.1.1 ofport_request=1]) -AT_CHECK([ovs-appctl dpif/show | tail -n +5], [0], [dnl +AT_CHECK([ovs-appctl dpif/show | tail -n +4], [0], [dnl br0 65534/100: (dummy) p1 1/1: (lisp: remote_ip=1.1.1.1) ]) @@ -334,7 +334,7 @@ AT_SETUP([tunnel - different VXLAN UDP port]) OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=vxlan \ options:remote_ip=1.1.1.1 ofport_request=1 options:dst_port=4341]) -AT_CHECK([ovs-appctl dpif/show | tail -n +5], [0], [dnl +AT_CHECK([ovs-appctl dpif/show | tail -n +4], [0], [dnl br0 65534/100: (dummy) p1 1/1: (vxlan: dst_port=4341, remote_ip=1.1.1.1) ]) @@ -343,7 +343,7 @@ dnl change UDP port AT_CHECK([ovs-vsctl -- set Interface p1 options:dst_port=5000]) -AT_CHECK([ovs-appctl dpif/show | tail -n +5], [0], [dnl +AT_CHECK([ovs-appctl dpif/show | tail -n +4], [0], [dnl br0 65534/100: (dummy) p1 1/2: (vxlan: dst_port=5000, remote_ip=1.1.1.1) ]) @@ -352,7 +352,7 @@ dnl change UDP port to default AT_CHECK([ovs-vsctl -- set Interface p1 options:dst_port=4789]) -AT_CHECK([ovs-appctl dpif/show | tail -n +5], [0], [dnl +AT_CHECK([ovs-appctl dpif/show | tail -n +4], [0], [dnl br0 65534/100: (dummy) p1 1/1: (vxlan: remote_ip=1.1.1.1) ]) diff --git a/third-party/ofp-tcpdump.patch b/third-party/ofp-tcpdump.patch index 5c47061b2..b1ac9a571 100644 --- a/third-party/ofp-tcpdump.patch +++ b/third-party/ofp-tcpdump.patch @@ -116,7 +116,7 @@ else if (length > 0 && (sport == LDP_PORT || dport == LDP_PORT)) { ldp_print(bp, length); - } -+ } else if (sport == OFP_TCP_PORT || dport == OFP_TCP_PORT) { ++ } else if (sport == OFP_OLD_PORT || dport == OFP_OLD_PORT) { + openflow_print(bp, length); + } diff --git a/utilities/bugtool/ovs-bugtool.8.in b/utilities/bugtool/ovs-bugtool.8.in index b528dbaeb..6f4e0b583 100644 --- a/utilities/bugtool/ovs-bugtool.8.in +++ b/utilities/bugtool/ovs-bugtool.8.in @@ -33,9 +33,9 @@ Print verbose debugging output. Use the capabilities specified in a comma-separated list. . .IP "\fB\-\-log\-days=\fIdays\fR" -Include the logs rotated in the previous \fIdays\fR days in the debug bundle. -The number of log files included has a big impact on the eventual bundle size. -The default value is 20 days. +Include the logs with last modification time in the previous \fIdays\fR days +in the debug bundle. The number of log files included has a big impact on the +eventual bundle size. The default value is 20 days. . .IP "\fB\-\-output=\fIfiletype\fR" Generate a debug bundle with the specified file type. Options include @@ -48,7 +48,8 @@ Use only Open vSwitch relevant capabilities. Suppress output. . .IP "\fB\-\-unlimited\fR" -Do not exclude files which are too large. +Do not exclude files which are too large. Also skip checking free disk space. +By default up to 90 percent of the free disk space can be used. . .IP "\fB\-\-yestoall\fR" Answer yes to all prompts. diff --git a/utilities/bugtool/ovs-bugtool.in b/utilities/bugtool/ovs-bugtool.in index 61c21db5c..35b8aef19 100755 --- a/utilities/bugtool/ovs-bugtool.in +++ b/utilities/bugtool/ovs-bugtool.in @@ -207,7 +207,7 @@ CAP_MULTIPATH = 'multipath' CAP_NETWORK_CONFIG = 'network-config' CAP_NETWORK_INFO = 'network-info' CAP_NETWORK_STATUS = 'network-status' -CAP_OPENVSWITCH_LOGS = 'ovs-system-logs' +CAP_OPENVSWITCH_LOGS = 'ovs-system-logs' CAP_PROCESS_LIST = 'process-list' CAP_SYSTEM_LOGS = 'system-logs' CAP_SYSTEM_SERVICES = 'system-services' @@ -220,8 +220,10 @@ caps = {} cap_sizes = {} unlimited_data = False dbg = False -# Default value for the number of rotated logs. +# Default value for the number of days to collect logs. log_days = 20 +log_last_mod_time = None +free_disk_space = None def cap(key, pii=PII_MAYBE, min_size=-1, max_size=-1, min_time=-1, max_time=-1, mime=MIME_TEXT, checked=True, hidden=False): @@ -284,7 +286,8 @@ def cmd_output(cap, args, label=None, filter=None, binary=False): data[label] = {'cap': cap, 'cmd_args': args, 'filter': filter, 'binary': binary} -def file_output(cap, path_list, newest_first=False): + +def file_output(cap, path_list, newest_first=False, last_mod_time=None): """ If newest_first is True, the list of files in path_list is sorted by file modification time in descending order, else its sorted @@ -297,19 +300,18 @@ def file_output(cap, path_list, newest_first=False): s = os.stat(path) except OSError, e: continue - path_entries.append((path, s)) + if last_mod_time is None or s.st_mtime >= last_mod_time: + path_entries.append((path, s)) mtime = lambda(path, stat): stat.st_mtime path_entries.sort(key=mtime, reverse=newest_first) for p in path_entries: - if unlimited_data or caps[cap][MAX_SIZE] == -1 or \ - cap_sizes[cap] < caps[cap][MAX_SIZE]: + if check_space(cap, p[0], p[1].st_size): data[p] = {'cap': cap, 'filename': p[0]} - cap_sizes[cap] += p[1].st_size - else: - output("Omitting %s, size constraint of %s exceeded" % (p[0], cap)) -def tree_output(cap, path, pattern=None, negate=False, newest_first=False): + +def tree_output(cap, path, pattern=None, negate=False, newest_first=False, + last_mod_time=None): """ Walks the directory tree rooted at path. Files in current dir are processed before files in sub-dirs. @@ -319,23 +321,27 @@ def tree_output(cap, path, pattern=None, negate=False, newest_first=False): for root, dirs, files in os.walk(path): fns = [fn for fn in [os.path.join(root, f) for f in files] if os.path.isfile(fn) and matches(fn, pattern, negate)] - file_output(cap, fns, newest_first=newest_first) + file_output(cap, fns, newest_first=newest_first, + last_mod_time=last_mod_time) + + +def prefix_output(cap, prefix, newest_first=False, last_mod_time=None): + """ + Output files with the same prefix. + """ + fns = [] + for root, dirs, files in os.walk(os.path.dirname(prefix)): + fns += [fn for fn in [os.path.join(root, f) for f in files] + if fn.startswith(prefix)] + file_output(cap, fns, newest_first=newest_first, + last_mod_time=last_mod_time) + def func_output(cap, label, func): if cap in entries: t = str(func).split() data[label] = {'cap': cap, 'func': func} -def log_output(cap, logs, newest_first=False): - global log_days - file_output(cap, logs) - file_output(cap, - ['%s.%d' % (f, n) for n in range(1, log_days+1) for f in logs], \ - newest_first=newest_first) - file_output(cap, - ['%s.%d.gz' % (f, n) for n in range(1, log_days+1) for f in logs], \ - newest_first=newest_first) - def collect_data(): process_lists = {} @@ -354,12 +360,8 @@ def collect_data(): f = open(v['filename'], 'r') s = f.read() f.close() - if unlimited_data or caps[cap][MAX_SIZE] == -1 or \ - cap_sizes[cap] < caps[cap][MAX_SIZE]: + if check_space(cap, v['filename'], len(s)): v['output'] = StringIOmtime(s) - cap_sizes[cap] += len(s) - else: - output("Omitting %s, size constraint of %s exceeded" % (v['filename'], cap)) except: pass elif v.has_key('func'): @@ -367,19 +369,16 @@ def collect_data(): s = v['func'](cap) except Exception, e: s = str(e) - if unlimited_data or caps[cap][MAX_SIZE] == -1 or \ - cap_sizes[cap] < caps[cap][MAX_SIZE]: + if check_space(cap, k, len(s)): v['output'] = StringIOmtime(s) - cap_sizes[cap] += len(s) - else: - output("Omitting %s, size constraint of %s exceeded" % (k, cap)) run_procs(process_lists.values()) def main(argv=None): global ANSWER_YES_TO_ALL, SILENT_MODE - global entries, data, dbg, unlimited_data, log_days + global entries, data, dbg, unlimited_data, free_disk_space + global log_days, log_last_mod_time # Filter flags only_ovs_info = False @@ -466,6 +465,7 @@ def main(argv=None): if k == '--log-days': log_days = int(v) + if len(params) != 1: print >>sys.stderr, "Invalid additional arguments", str(params) return 2 @@ -478,6 +478,11 @@ def main(argv=None): print >>sys.stderr, "Cannot set both '--outfd' and '--outfile'" return 2 + if output_file is not None and not unlimited_data: + free_disk_space = get_free_disk_space(output_file) * 90 / 100 + + log_last_mod_time = int(time.time()) - log_days * 86400 + if ANSWER_YES_TO_ALL: output("Warning: '--yestoall' argument provided, will not prompt for individual files.") @@ -590,11 +595,14 @@ exclude those logs from the archive. system_logs = ([ VAR_LOG_DIR + x for x in ['crit.log', 'kern.log', 'daemon.log', 'user.log', 'syslog', 'messages', 'secure', 'debug', 'dmesg', 'boot']]) + for log in system_logs: + prefix_output(CAP_SYSTEM_LOGS, log, last_mod_time=log_last_mod_time) + ovs_logs = ([ OPENVSWITCH_LOG_DIR + x for x in ['ovs-vswitchd.log', 'ovsdb-server.log', 'ovs-xapi-sync.log', 'ovs-monitor-ipsec.log', 'ovs-ctl.log']]) - log_output(CAP_SYSTEM_LOGS, system_logs) - log_output(CAP_OPENVSWITCH_LOGS, ovs_logs) + for log in ovs_logs: + prefix_output(CAP_OPENVSWITCH_LOGS, log, last_mod_time=log_last_mod_time) if not os.path.exists('/var/log/dmesg') and not os.path.exists('/var/log/boot'): cmd_output(CAP_SYSTEM_LOGS, [DMESG]) @@ -813,6 +821,7 @@ def dump_rdac_groups(cap): cmd_output(cap, [MPPUTIL, '-g', group]) def load_plugins(just_capabilities=False, filter=None): + global log_last_mod_time def getText(nodelist): rc = "" for node in nodelist: @@ -873,8 +882,9 @@ def load_plugins(just_capabilities=False, filter=None): if el.tagName == "files": newest_first = getBoolAttr(el, 'newest_first') if el.getAttribute("type") == "logs": - log_output(dir, getText(el.childNodes).split(), - newest_first=newest_first) + for fn in getText(el.childNodes).split(): + prefix_output(dir, fn, newest_first=newest_first, + last_mod_time=log_last_mod_time) else: file_output(dir, getText(el.childNodes).split(), newest_first=newest_first) @@ -883,9 +893,15 @@ def load_plugins(just_capabilities=False, filter=None): if pattern == '': pattern = None negate = getBoolAttr(el, 'negate') newest_first = getBoolAttr(el, 'newest_first') - tree_output(dir, getText(el.childNodes), - pattern and re.compile(pattern) or None, - negate=negate, newest_first=newest_first) + if el.getAttribute("type") == "logs": + tree_output(dir, getText(el.childNodes), + pattern and re.compile(pattern) or None, + negate=negate, newest_first=newest_first, + last_mod_time=log_last_mod_time) + else: + tree_output(dir, getText(el.childNodes), + pattern and re.compile(pattern) or None, + negate=negate, newest_first=newest_first) elif el.tagName == "command": label = el.getAttribute("label") if label == '': label = None @@ -1248,6 +1264,31 @@ def pidof(name): return pids +def check_space(cap, name, size): + global free_disk_space + if free_disk_space is not None and size > free_disk_space: + output("Omitting %s, out of disk space (requested: %u, allowed: %u)" % + (name, size, free_disk_space)) + return False + elif unlimited_data or caps[cap][MAX_SIZE] == -1 or \ + cap_sizes[cap] < caps[cap][MAX_SIZE]: + cap_sizes[cap] += size + if free_disk_space is not None: + free_disk_space -= size + return True + else: + output("Omitting %s, size constraint of %s exceeded" % (name, cap)) + return False + + +def get_free_disk_space(path): + path = os.path.abspath(path) + while not os.path.exists(path): + path = os.path.dirname(path) + s = os.statvfs(path) + return s.f_frsize * s.f_bfree + + class StringIOmtime(StringIO.StringIO): def __init__(self, buf=''): StringIO.StringIO.__init__(self, buf) diff --git a/utilities/ovs-controller.8.in b/utilities/ovs-controller.8.in index 162c585dd..f29de6a06 100644 --- a/utilities/ovs-controller.8.in +++ b/utilities/ovs-controller.8.in @@ -148,6 +148,9 @@ To bind locally to port 6633 (the default) and wait for incoming connections from OpenFlow switches: .IP \fB% ovs\-controller ptcp:\fR +.PP +In the future, the default port number will change to 6653, which is the +IANA-defined value. .SH "BUGS" .PP Configuring a Citrix XenServer to connect to a particular controller diff --git a/utilities/ovs-dpctl.c b/utilities/ovs-dpctl.c index 98b47b8d7..4fb02dda4 100644 --- a/utilities/ovs-dpctl.c +++ b/utilities/ovs-dpctl.c @@ -49,8 +49,6 @@ #include "util.h" #include "vlog.h" -VLOG_DEFINE_THIS_MODULE(dpctl); - /* -s, --statistics: Print port/flow statistics? */ static bool print_statistics; @@ -742,9 +740,12 @@ dpctl_dump_flows(int argc, char *argv[]) { const struct dpif_flow_stats *stats; const struct nlattr *actions; - struct dpif_flow_dump dump; + struct dpif_flow_dump flow_dump; const struct nlattr *key; const struct nlattr *mask; + struct dpif_port dpif_port; + struct dpif_port_dump port_dump; + struct hmap portno_names; size_t actions_len; struct dpif *dpif; size_t key_len; @@ -756,13 +757,19 @@ dpctl_dump_flows(int argc, char *argv[]) run(parsed_dpif_open(name, false, &dpif), "opening datapath"); free(name); + hmap_init(&portno_names); + DPIF_PORT_FOR_EACH (&dpif_port, &port_dump, dpif) { + odp_portno_names_set(&portno_names, dpif_port.port_no, dpif_port.name); + } + ds_init(&ds); - dpif_flow_dump_start(&dump, dpif); - while (dpif_flow_dump_next(&dump, &key, &key_len, + dpif_flow_dump_start(&flow_dump, dpif); + while (dpif_flow_dump_next(&flow_dump, &key, &key_len, &mask, &mask_len, &actions, &actions_len, &stats)) { ds_clear(&ds); - odp_flow_format(key, key_len, mask, mask_len, &ds, verbosity); + odp_flow_format(key, key_len, mask, mask_len, &portno_names, &ds, + verbosity); ds_put_cstr(&ds, ", "); dpif_flow_stats_format(stats, &ds); @@ -770,7 +777,9 @@ dpctl_dump_flows(int argc, char *argv[]) format_odp_actions(&ds, actions, actions_len); printf("%s\n", ds_cstr(&ds)); } - dpif_flow_dump_done(&dump); + dpif_flow_dump_done(&flow_dump); + odp_portno_names_destroy(&portno_names); + hmap_destroy(&portno_names); ds_destroy(&ds); dpif_close(dpif); } @@ -781,25 +790,37 @@ dpctl_put_flow(int argc, char *argv[], enum dpif_flow_put_flags flags) const char *key_s = argv[argc - 2]; const char *actions_s = argv[argc - 1]; struct dpif_flow_stats stats; + struct dpif_port dpif_port; + struct dpif_port_dump port_dump; struct ofpbuf actions; struct ofpbuf key; struct ofpbuf mask; struct dpif *dpif; struct ds s; char *dp_name; + struct simap port_names; + + dp_name = argc == 4 ? xstrdup(argv[1]) : get_one_dp(); + run(parsed_dpif_open(dp_name, false, &dpif), "opening datapath"); + free(dp_name); + + + simap_init(&port_names); + DPIF_PORT_FOR_EACH (&dpif_port, &port_dump, dpif) { + simap_put(&port_names, dpif_port.name, odp_to_u32(dpif_port.port_no)); + } ds_init(&s); ofpbuf_init(&key, 0); ofpbuf_init(&mask, 0); - run(odp_flow_from_string(key_s, NULL, &key, &mask), "parsing flow key"); + run(odp_flow_from_string(key_s, &port_names, &key, &mask), + "parsing flow key"); + + simap_destroy(&port_names); ofpbuf_init(&actions, 0); run(odp_actions_from_string(actions_s, NULL, &actions), "parsing actions"); - dp_name = argc == 4 ? xstrdup(argv[1]) : get_one_dp(); - run(parsed_dpif_open(dp_name, false, &dpif), "opening datapath"); - free(dp_name); - run(dpif_flow_put(dpif, flags, key.data, key.size, mask.size == 0 ? NULL : mask.data, mask.size, @@ -848,23 +869,32 @@ dpctl_del_flow(int argc, char *argv[]) { const char *key_s = argv[argc - 1]; struct dpif_flow_stats stats; + struct dpif_port dpif_port; + struct dpif_port_dump port_dump; struct ofpbuf key; struct ofpbuf mask; /* To be ignored. */ struct dpif *dpif; char *dp_name; - - ofpbuf_init(&key, 0); - ofpbuf_init(&mask, 0); - run(odp_flow_from_string(key_s, NULL, &key, &mask), "parsing flow key"); + struct simap port_names; dp_name = argc == 3 ? xstrdup(argv[1]) : get_one_dp(); run(parsed_dpif_open(dp_name, false, &dpif), "opening datapath"); free(dp_name); + simap_init(&port_names); + DPIF_PORT_FOR_EACH (&dpif_port, &port_dump, dpif) { + simap_put(&port_names, dpif_port.name, odp_to_u32(dpif_port.port_no)); + } + + ofpbuf_init(&key, 0); + ofpbuf_init(&mask, 0); + run(odp_flow_from_string(key_s, &port_names, &key, &mask), "parsing flow key"); + run(dpif_flow_del(dpif, key.data, key.size, print_statistics ? &stats : NULL), "deleting flow"); + simap_destroy(&port_names); ofpbuf_uninit(&key); ofpbuf_uninit(&mask); @@ -1051,7 +1081,7 @@ dpctl_normalize_actions(int argc, char *argv[]) "odp_flow_key_from_string"); ds_clear(&s); - odp_flow_format(keybuf.data, keybuf.size, NULL, 0, &s, verbosity); + odp_flow_format(keybuf.data, keybuf.size, NULL, 0, NULL, &s, verbosity); printf("input flow: %s\n", ds_cstr(&s)); run(odp_flow_key_to_flow(keybuf.data, keybuf.size, &flow), @@ -1112,7 +1142,7 @@ dpctl_normalize_actions(int argc, char *argv[]) printf("no vlan: "); } - if (af->flow.mpls_depth) { + if (eth_type_mpls(af->flow.dl_type)) { printf("mpls(label=%"PRIu32",tc=%d,ttl=%d): ", mpls_lse_to_label(af->flow.mpls_lse), mpls_lse_to_tc(af->flow.mpls_lse), diff --git a/utilities/ovs-ofctl.8.in b/utilities/ovs-ofctl.8.in index 526e12c56..c43b48c7d 100644 --- a/utilities/ovs-ofctl.8.in +++ b/utilities/ovs-ofctl.8.in @@ -1068,21 +1068,17 @@ from the IP TTL (64 if the packet is not IP). If the packet does already contain an MPLS label, pushes a new outermost label as a copy of the existing outermost label. .IP -There are some limitations in the implementation. \fBpush_mpls\fR -followed by another \fBpush_mpls\fR will result in the first -\fBpush_mpls\fR being discarded. +A limitation of the implementation is that processing of actions will stop +if \fBpush_mpls\fR follows another \fBpush_mpls\fR unless there is a +\fBpop_mpls\fR in between. . .IP \fBpop_mpls\fR:\fIethertype\fR Strips the outermost MPLS label stack entry. Currently the implementation restricts \fIethertype\fR to a non-MPLS Ethertype and thus \fBpop_mpls\fR should only be applied to packets with -an MPLS label stack depth of one. -. -.IP -There are some limitations in the implementation. \fBpop_mpls\fR -followed by another \fBpush_mpls\fR without an intermediate -\fBpush_mpls\fR will result in the first \fBpush_mpls\fR being -discarded. +an MPLS label stack depth of one. A further limitation is that processing of +actions will stop if \fBpop_mpls\fR follows another \fBpop_mpls\fR unless +there is a \fBpush_mpls\fR in between. . .IP \fBmod_dl_src\fB:\fImac\fR Sets the source Ethernet address to \fImac\fR. @@ -1514,7 +1510,7 @@ and \fBdel\-flows\fR commands support one additional optional field: .TP \fBout_port=\fIport\fR If set, a matching flow must include an output action to \fIport\fR, -which must an OpenFlow port number or name (e.g. \fBlocal\fR). +which must be an OpenFlow port number or name (e.g. \fBlocal\fR). . .SS "Table Entry Output" . diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index da2dc4240..ec3633c40 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -473,10 +473,9 @@ collect_in_band_managers(const struct ovsrec_open_vswitch *ovs_cfg, SSET_FOR_EACH (target, &targets) { struct sockaddr_in *sin = &managers[n_managers]; - if (stream_parse_target_with_default_ports(target, - JSONRPC_TCP_PORT, - JSONRPC_SSL_PORT, - sin)) { + if (stream_parse_target_with_default_port(target, + OVSDB_OLD_PORT, + sin)) { n_managers++; } } diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index 5fd5b3b2e..c12fd8fc3 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -2816,18 +2816,27 @@
ssl:ip[:port]
-

The specified SSL port (default: 6633) on the host at - the given ip, which must be expressed as an IP address - (not a DNS name). The - column in the table must point to a - valid SSL configuration when this form is used.

+

The specified SSL port on the host at the + given ip, which must be expressed as an IP + address (not a DNS name). The column in the + table must point to a valid SSL configuration when this form + is used.

+

If port is not specified, it currently + defaults to 6633. In the future, the default will change to + 6653, which is the IANA-defined value.

SSL support is an optional feature that is not always built as part of Open vSwitch.

tcp:ip[:port]
-
The specified TCP port (default: 6633) on the host at - the given ip, which must be expressed as an IP address - (not a DNS name).
+
+

The specified TCP port on the host at the + given ip, which must be expressed as an IP + address (not a DNS name).

+

If port is not specified, it currently + defaults to 6633. In the future, the default will change to + 6653, which is the IANA-defined value.

+

The following connection methods are currently supported for service @@ -2836,26 +2845,29 @@

pssl:[port][:ip]
-

- Listens for SSL connections on the specified TCP port - (default: 6633). If ip, which must be expressed as an - IP address (not a DNS name), is specified, then connections are - restricted to the specified local IP address. -

-

- The column in the table must point to a valid SSL - configuration when this form is used. -

+

Listens for SSL connections on the specified TCP + port. If ip, which must be expressed + as an IP address (not a DNS name), is specified, then + connections are restricted to the specified local IP + address. The + column in the table must point + to a valid SSL configuration when this form is used.

+

If port is not specified, it currently + defaults to 6633. In the future, the default will change to + 6653, which is the IANA-defined value.

SSL support is an optional feature that is not always built as part of Open vSwitch.

ptcp:[port][:ip]
- Listens for connections on the specified TCP port - (default: 6633). If ip, which must be expressed as an - IP address (not a DNS name), is specified, then connections are - restricted to the specified local IP address. +

Listens for connections on the specified TCP + port. If ip, which must be expressed + as an IP address (not a DNS name), is specified, then + connections are restricted to the specified local IP + address.

+

If port is not specified, it currently + defaults to 6633. In the future, the default will change to + 6653, which is the IANA-defined value.

When multiple controllers are configured for a single bridge, the @@ -3127,39 +3139,55 @@

ssl:ip[:port]

- The specified SSL port (default: 6632) on the host at - the given ip, which must be expressed as an IP address - (not a DNS name). The - column in the table must point to a - valid SSL configuration when this form is used. + The specified SSL port on the host at the given + ip, which must be expressed as an IP address + (not a DNS name). The column in the + table must point to a valid SSL configuration when this + form is used.

- SSL support is an optional feature that is not always built as - part of Open vSwitch. + If port is not specified, it currently defaults + to 6632. In the future, the default will change to 6640, + which is the IANA-defined value. +

+

+ SSL support is an optional feature that is not always + built as part of Open vSwitch.

tcp:ip[:port]
- The specified TCP port (default: 6632) on the host at - the given ip, which must be expressed as an IP address - (not a DNS name). +

+ The specified TCP port on the host at the given + ip, which must be expressed as an IP address + (not a DNS name). +

+

+ If port is not specified, it currently defaults + to 6632. In the future, the default will change to 6640, + which is the IANA-defined value. +

pssl:[port][:ip]

- Listens for SSL connections on the specified TCP port - (default: 6632). Specify 0 for port to have the - kernel automatically choose an available port. If ip, - which must be expressed as an IP address (not a DNS name), is - specified, then connections are restricted to the specified local - IP address. -

-

- The column in the port. Specify 0 for port to have + the kernel automatically choose an available port. If + ip, which must be expressed as an IP address + (not a DNS name), is specified, then connections are + restricted to the specified local IP address. The column in the table must point to a valid SSL configuration when this form is used.

+

+ If port is not specified, it currently defaults + to 6632. In the future, the default will change to 6640, + which is the IANA-defined value. +

SSL support is an optional feature that is not always built as part of Open vSwitch. @@ -3167,11 +3195,19 @@

ptcp:[port][:ip]
- Listens for connections on the specified TCP port - (default: 6632). Specify 0 for port to have the kernel - automatically choose an available port. If ip, which - must be expressed as an IP address (not a DNS name), is specified, - then connections are restricted to the specified local IP address. +

+ Listens for connections on the specified TCP + port. Specify 0 for port to have + the kernel automatically choose an available port. If + ip, which must be expressed as an IP address + (not a DNS name), is specified, then connections are + restricted to the specified local IP address. +

+

+ If port is not specified, it currently defaults + to 6632. In the future, the default will change to 6640, + which is the IANA-defined value. +

When multiple managers are configured, the