X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=lib%2Fodp-util.c;h=1988c637e5b345ff14b2a28df07b0f2c355ea5f2;hb=29089a540cfa30a834e3ee19a8b4c52ff2e331b2;hp=935633f3100db064ee529ba0f9f51269cfdff368;hpb=7202cbe5521ed327ea0c9c959493a08ad2bf8d01;p=sliver-openvswitch.git diff --git a/lib/odp-util.c b/lib/odp-util.c index 935633f31..1988c637e 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009, 2010, 2011 Nicira Networks. + * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,8 +14,8 @@ * limitations under the License. */ -#include #include +#include #include "odp-util.h" #include #include @@ -30,9 +30,8 @@ #include "flow.h" #include "netlink.h" #include "ofpbuf.h" -#include "openvswitch/tunnel.h" #include "packets.h" -#include "shash.h" +#include "simap.h" #include "timeval.h" #include "util.h" #include "vlog.h" @@ -49,7 +48,7 @@ VLOG_DEFINE_THIS_MODULE(odp_util); * from another. */ static const char *delimiters = ", \t\r\n"; -static int parse_odp_key_attr(const char *, const struct shash *port_names, +static int parse_odp_key_attr(const char *, const struct simap *port_names, struct ofpbuf *); static void format_odp_key_attr(const struct nlattr *a, struct ds *ds); @@ -74,6 +73,8 @@ odp_action_len(uint16_t type) case OVS_ACTION_ATTR_USERSPACE: return -2; case OVS_ACTION_ATTR_PUSH_VLAN: return sizeof(struct ovs_action_push_vlan); case OVS_ACTION_ATTR_POP_VLAN: return 0; + case OVS_ACTION_ATTR_PUSH_MPLS: return sizeof(struct ovs_action_push_mpls); + case OVS_ACTION_ATTR_POP_MPLS: return sizeof(ovs_be16); case OVS_ACTION_ATTR_SET: return -2; case OVS_ACTION_ATTR_SAMPLE: return -2; @@ -93,7 +94,9 @@ ovs_key_attr_to_string(enum ovs_key_attr attr) switch (attr) { case OVS_KEY_ATTR_UNSPEC: return "unspec"; case OVS_KEY_ATTR_ENCAP: return "encap"; - case OVS_KEY_ATTR_PRIORITY: return "priority"; + case OVS_KEY_ATTR_PRIORITY: return "skb_priority"; + case OVS_KEY_ATTR_SKB_MARK: return "skb_mark"; + case OVS_KEY_ATTR_TUNNEL: return "tunnel"; case OVS_KEY_ATTR_IN_PORT: return "in_port"; case OVS_KEY_ATTR_ETHERNET: return "eth"; case OVS_KEY_ATTR_VLAN: return "vlan"; @@ -106,7 +109,7 @@ ovs_key_attr_to_string(enum ovs_key_attr attr) case OVS_KEY_ATTR_ICMPV6: return "icmpv6"; case OVS_KEY_ATTR_ARP: return "arp"; case OVS_KEY_ATTR_ND: return "nd"; - case OVS_KEY_ATTR_TUN_ID: return "tun_id"; + case OVS_KEY_ATTR_MPLS: return "mpls"; case __OVS_KEY_ATTR_MAX: default: @@ -116,21 +119,6 @@ ovs_key_attr_to_string(enum ovs_key_attr attr) } } -static enum ovs_key_attr -ovs_key_attr_from_string(const char *s, size_t len) -{ - enum ovs_key_attr attr; - - for (attr = 0; attr <= OVS_KEY_ATTR_MAX; attr++) { - const char *attr_name = ovs_key_attr_to_string(attr); - if (strlen(attr_name) == len && !memcmp(s, attr_name, len)) { - return attr; - } - } - - return OVS_KEY_ATTR_UNSPEC; -} - static void format_generic_odp_action(struct ds *ds, const struct nlattr *a) { @@ -181,14 +169,89 @@ format_odp_sample_action(struct ds *ds, const struct nlattr *attr) ds_put_format(ds, "))"); } +static const char * +slow_path_reason_to_string(uint32_t data) +{ + enum slow_path_reason bit = (enum slow_path_reason) data; + + switch (bit) { + case SLOW_CFM: + return "cfm"; + case SLOW_LACP: + return "lacp"; + case SLOW_STP: + return "stp"; + case SLOW_IN_BAND: + return "in_band"; + case SLOW_CONTROLLER: + return "controller"; + case SLOW_MATCH: + return "match"; + default: + return NULL; + } +} + +static int +parse_flags(const char *s, const char *(*bit_to_string)(uint32_t), + uint32_t *res) +{ + uint32_t result = 0; + int n = 0; + + if (s[n] != '(') { + return -EINVAL; + } + n++; + + while (s[n] != ')') { + unsigned long long int flags; + uint32_t bit; + int n0; + + if (sscanf(&s[n], "%lli%n", &flags, &n0) > 0 && n0 > 0) { + n += n0 + (s[n + n0] == ','); + result |= flags; + continue; + } + + for (bit = 1; bit; bit <<= 1) { + const char *name = bit_to_string(bit); + size_t len; + + if (!name) { + continue; + } + + len = strlen(name); + if (!strncmp(s + n, name, len) && + (s[n + len] == ',' || s[n + len] == ')')) { + result |= bit; + n += len + (s[n + len] == ','); + break; + } + } + + if (!bit) { + return -EINVAL; + } + } + n++; + + *res = result; + return n; +} + static void format_odp_userspace_action(struct ds *ds, const struct nlattr *attr) { static const struct nl_policy ovs_userspace_policy[] = { [OVS_USERSPACE_ATTR_PID] = { .type = NL_A_U32 }, - [OVS_USERSPACE_ATTR_USERDATA] = { .type = NL_A_U64, .optional = true }, + [OVS_USERSPACE_ATTR_USERDATA] = { .type = NL_A_UNSPEC, + .optional = true }, }; struct nlattr *a[ARRAY_SIZE(ovs_userspace_policy)]; + const struct nlattr *userdata_attr; if (!nl_parse_nested(attr, ovs_userspace_policy, a, ARRAY_SIZE(a))) { ds_put_cstr(ds, "userspace(error)"); @@ -198,21 +261,60 @@ format_odp_userspace_action(struct ds *ds, const struct nlattr *attr) ds_put_format(ds, "userspace(pid=%"PRIu32, nl_attr_get_u32(a[OVS_USERSPACE_ATTR_PID])); - if (a[OVS_USERSPACE_ATTR_USERDATA]) { - uint64_t userdata = nl_attr_get_u64(a[OVS_USERSPACE_ATTR_USERDATA]); - struct user_action_cookie cookie; - - memcpy(&cookie, &userdata, sizeof cookie); + userdata_attr = a[OVS_USERSPACE_ATTR_USERDATA]; + + if (userdata_attr) { + const uint8_t *userdata = nl_attr_get(userdata_attr); + size_t userdata_len = nl_attr_get_size(userdata_attr); + bool userdata_unspec = true; + union user_action_cookie cookie; + + if (userdata_len >= sizeof cookie.type + && userdata_len <= sizeof cookie) { + + memset(&cookie, 0, sizeof cookie); + memcpy(&cookie, userdata, userdata_len); + + userdata_unspec = false; + + if (userdata_len == sizeof cookie.sflow + && cookie.type == USER_ACTION_COOKIE_SFLOW) { + ds_put_format(ds, ",sFlow(" + "vid=%"PRIu16",pcp=%"PRIu8",output=%"PRIu32")", + vlan_tci_to_vid(cookie.sflow.vlan_tci), + vlan_tci_to_pcp(cookie.sflow.vlan_tci), + cookie.sflow.output); + } else if (userdata_len == sizeof cookie.slow_path + && cookie.type == USER_ACTION_COOKIE_SLOW_PATH) { + ds_put_cstr(ds, ",slow_path("); + format_flags(ds, slow_path_reason_to_string, + cookie.slow_path.reason, ','); + ds_put_format(ds, ")"); + } else if (userdata_len == sizeof cookie.flow_sample + && cookie.type == USER_ACTION_COOKIE_FLOW_SAMPLE) { + ds_put_format(ds, ",flow_sample(probability=%"PRIu16 + ",collector_set_id=%"PRIu32 + ",obs_domain_id=%"PRIu32 + ",obs_point_id=%"PRIu32")", + cookie.flow_sample.probability, + cookie.flow_sample.collector_set_id, + cookie.flow_sample.obs_domain_id, + cookie.flow_sample.obs_point_id); + } else if (userdata_len == sizeof cookie.ipfix + && cookie.type == USER_ACTION_COOKIE_IPFIX) { + ds_put_format(ds, ",ipfix"); + } else { + userdata_unspec = true; + } + } - if (cookie.type == USER_ACTION_COOKIE_CONTROLLER) { - ds_put_format(ds, ",controller,length=%"PRIu32, cookie.data); - } else if (cookie.type == USER_ACTION_COOKIE_SFLOW) { - ds_put_format(ds, ",sFlow,n_output=%"PRIu8"," - "vid=%"PRIu16",pcp=%"PRIu8",ifindex=%"PRIu32, - cookie.n_output, vlan_tci_to_vid(cookie.vlan_tci), - vlan_tci_to_pcp(cookie.vlan_tci), cookie.data); - } else { - ds_put_format(ds, ",userdata=0x%"PRIx64, userdata); + if (userdata_unspec) { + size_t i; + ds_put_format(ds, ",userdata("); + for (i = 0; i < userdata_len; i++) { + ds_put_format(ds, "%02x", userdata[i]); + } + ds_put_char(ds, ')'); } } @@ -230,6 +332,16 @@ format_vlan_tci(struct ds *ds, ovs_be16 vlan_tci) } } +static void +format_mpls_lse(struct ds *ds, ovs_be32 mpls_lse) +{ + ds_put_format(ds, "label=%"PRIu32",tc=%d,ttl=%d,bos=%d", + mpls_lse_to_label(mpls_lse), + mpls_lse_to_tc(mpls_lse), + mpls_lse_to_ttl(mpls_lse), + mpls_lse_to_bos(mpls_lse)); +} + static void format_odp_action(struct ds *ds, const struct nlattr *a) { @@ -269,6 +381,18 @@ format_odp_action(struct ds *ds, const struct nlattr *a) case OVS_ACTION_ATTR_POP_VLAN: ds_put_cstr(ds, "pop_vlan"); break; + case OVS_ACTION_ATTR_PUSH_MPLS: { + const struct ovs_action_push_mpls *mpls = nl_attr_get(a); + ds_put_cstr(ds, "push_mpls("); + format_mpls_lse(ds, mpls->mpls_lse); + ds_put_format(ds, ",eth_type=0x%"PRIx16")", ntohs(mpls->mpls_ethertype)); + break; + } + case OVS_ACTION_ATTR_POP_MPLS: { + ovs_be16 ethertype = nl_attr_get_be16(a); + ds_put_format(ds, "pop_mpls(eth_type=0x%"PRIx16")", ntohs(ethertype)); + break; + } case OVS_ACTION_ATTR_SAMPLE: format_odp_sample_action(ds, a); break; @@ -295,10 +419,16 @@ format_odp_actions(struct ds *ds, const struct nlattr *actions, format_odp_action(ds, a); } if (left) { + int i; + if (left == actions_len) { ds_put_cstr(ds, ""); } - ds_put_format(ds, ",***%u leftover bytes***", left); + ds_put_format(ds, ",***%u leftover bytes*** (", left); + for (i = 0; i < left; i++) { + ds_put_format(ds, "%02x", ((const uint8_t *) a)[i]); + } + ds_put_char(ds, ')'); } } else { ds_put_cstr(ds, "drop"); @@ -306,7 +436,7 @@ format_odp_actions(struct ds *ds, const struct nlattr *actions, } static int -parse_odp_action(const char *s, const struct shash *port_names, +parse_odp_action(const char *s, const struct simap *port_names, struct ofpbuf *actions) { /* Many of the sscanf calls in this function use oversized destination @@ -331,42 +461,32 @@ parse_odp_action(const char *s, const struct shash *port_names, if (port_names) { int len = strcspn(s, delimiters); - struct shash_node *node; + struct simap_node *node; - node = shash_find_len(port_names, s, len); + node = simap_find_len(port_names, s, len); if (node) { - nl_msg_put_u32(actions, OVS_ACTION_ATTR_OUTPUT, - (uintptr_t) node->data); + nl_msg_put_u32(actions, OVS_ACTION_ATTR_OUTPUT, node->data); return len; } } { unsigned long long int pid; - unsigned long long int length; - unsigned long long int ifindex; - char userdata_s[32]; - int n_output; + unsigned long long int output; + unsigned long long int probability; + unsigned long long int collector_set_id; + unsigned long long int obs_domain_id; + unsigned long long int obs_point_id; int vid, pcp; int n = -1; if (sscanf(s, "userspace(pid=%lli)%n", &pid, &n) > 0 && n > 0) { - odp_put_userspace_action(pid, NULL, actions); - return n; - } else if (sscanf(s, "userspace(pid=%lli,controller,length=%lli)%n", - &pid, &length, &n) > 0 && n > 0) { - struct user_action_cookie cookie; - - cookie.type = USER_ACTION_COOKIE_CONTROLLER; - cookie.n_output = 0; - cookie.vlan_tci = htons(0); - cookie.data = length; - odp_put_userspace_action(pid, &cookie, actions); + odp_put_userspace_action(pid, NULL, 0, actions); return n; - } else if (sscanf(s, "userspace(pid=%lli,sFlow,n_output=%i,vid=%i," - "pcp=%i,ifindex=%lli)%n", &pid, &n_output, - &vid, &pcp, &ifindex, &n) > 0 && n > 0) { - struct user_action_cookie cookie; + } else if (sscanf(s, "userspace(pid=%lli,sFlow(vid=%i," + "pcp=%i,output=%lli))%n", + &pid, &vid, &pcp, &output, &n) > 0 && n > 0) { + union user_action_cookie cookie; uint16_t tci; tci = vid | (pcp << VLAN_PCP_SHIFT); @@ -375,21 +495,69 @@ parse_odp_action(const char *s, const struct shash *port_names, } cookie.type = USER_ACTION_COOKIE_SFLOW; - cookie.n_output = n_output; - cookie.vlan_tci = htons(tci); - cookie.data = ifindex; - odp_put_userspace_action(pid, &cookie, actions); + cookie.sflow.vlan_tci = htons(tci); + cookie.sflow.output = output; + odp_put_userspace_action(pid, &cookie, sizeof cookie.sflow, + actions); return n; - } else if (sscanf(s, "userspace(pid=%lli,userdata=" - "%31[x0123456789abcdefABCDEF])%n", &pid, userdata_s, - &n) > 0 && n > 0) { - struct user_action_cookie cookie; - uint64_t userdata; - - userdata = strtoull(userdata_s, NULL, 0); - memcpy(&cookie, &userdata, sizeof cookie); - odp_put_userspace_action(pid, &cookie, actions); + } else if (sscanf(s, "userspace(pid=%lli,slow_path%n", &pid, &n) > 0 + && n > 0) { + union user_action_cookie cookie; + int res; + + cookie.type = USER_ACTION_COOKIE_SLOW_PATH; + cookie.slow_path.unused = 0; + cookie.slow_path.reason = 0; + + res = parse_flags(&s[n], slow_path_reason_to_string, + &cookie.slow_path.reason); + if (res < 0) { + return res; + } + n += res; + if (s[n] != ')') { + return -EINVAL; + } + n++; + + odp_put_userspace_action(pid, &cookie, sizeof cookie.slow_path, + actions); + return n; + } else if (sscanf(s, "userspace(pid=%lli,flow_sample(probability=%lli," + "collector_set_id=%lli,obs_domain_id=%lli," + "obs_point_id=%lli))%n", + &pid, &probability, &collector_set_id, + &obs_domain_id, &obs_point_id, &n) > 0 && n > 0) { + union user_action_cookie cookie; + + cookie.type = USER_ACTION_COOKIE_FLOW_SAMPLE; + cookie.flow_sample.probability = probability; + cookie.flow_sample.collector_set_id = collector_set_id; + cookie.flow_sample.obs_domain_id = obs_domain_id; + cookie.flow_sample.obs_point_id = obs_point_id; + odp_put_userspace_action(pid, &cookie, sizeof cookie.flow_sample, + actions); + return n; + } else if (sscanf(s, "userspace(pid=%lli,ipfix)%n", &pid, &n) > 0 + && n > 0) { + union user_action_cookie cookie; + + cookie.type = USER_ACTION_COOKIE_IPFIX; + odp_put_userspace_action(pid, &cookie, sizeof cookie.ipfix, + actions); return n; + } else if (sscanf(s, "userspace(pid=%lli,userdata(%n", &pid, &n) > 0 + && n > 0) { + struct ofpbuf buf; + char *end; + + ofpbuf_init(&buf, 16); + end = ofpbuf_put_hex(&buf, &s[n], NULL); + if (end[0] == ')' && end[1] == ')') { + odp_put_userspace_action(pid, buf.data, buf.size, actions); + ofpbuf_uninit(&buf); + return (end + 2) - s; + } } } @@ -409,33 +577,35 @@ parse_odp_action(const char *s, const struct shash *port_names, return retval + 5; } - if (!strncmp(s, "push(", 5)) { - size_t start_ofs; - int retval; + { + struct ovs_action_push_vlan push; + int tpid = ETH_TYPE_VLAN; + int vid, pcp; + int cfi = 1; + int n = -1; - start_ofs = nl_msg_start_nested(actions, OVS_ACTION_ATTR_PUSH); - retval = parse_odp_key_attr(s + 5, port_names, actions); - if (retval < 0) { - return retval; - } - if (s[retval + 5] != ')') { - return -EINVAL; + if ((sscanf(s, "push_vlan(vid=%i,pcp=%i)%n", &vid, &pcp, &n) > 0 + && n > 0) + || (sscanf(s, "push_vlan(vid=%i,pcp=%i,cfi=%i)%n", + &vid, &pcp, &cfi, &n) > 0 && n > 0) + || (sscanf(s, "push_vlan(tpid=%i,vid=%i,pcp=%i)%n", + &tpid, &vid, &pcp, &n) > 0 && n > 0) + || (sscanf(s, "push_vlan(tpid=%i,vid=%i,pcp=%i,cfi=%i)%n", + &tpid, &vid, &pcp, &cfi, &n) > 0 && n > 0)) { + push.vlan_tpid = htons(tpid); + push.vlan_tci = htons((vid << VLAN_VID_SHIFT) + | (pcp << VLAN_PCP_SHIFT) + | (cfi ? VLAN_CFI : 0)); + nl_msg_put_unspec(actions, OVS_ACTION_ATTR_PUSH_VLAN, + &push, sizeof push); + + return n; } - nl_msg_end_nested(actions, start_ofs); - return retval + 6; } - if (!strncmp(s, "pop(", 4)) { - enum ovs_key_attr key; - size_t len; - - len = strcspn(s + 4, ")"); - key = ovs_key_attr_from_string(s + 4, len); - if (key == OVS_KEY_ATTR_UNSPEC || s[4 + len] != ')') { - return -EINVAL; - } - nl_msg_put_u16(actions, OVS_ACTION_ATTR_POP, key); - return len + 5; + if (!strncmp(s, "pop_vlan", 8)) { + nl_msg_put_flag(actions, OVS_ACTION_ATTR_POP_VLAN); + return 8; } { @@ -460,7 +630,7 @@ parse_odp_action(const char *s, const struct shash *port_names, for (;;) { int retval; - s += strspn(s, delimiters); + n += strspn(s + n, delimiters); if (s[n] == ')') { break; } @@ -470,7 +640,6 @@ parse_odp_action(const char *s, const struct shash *port_names, return retval; } n += retval; - } nl_msg_end_nested(actions, actions_ofs); nl_msg_end_nested(actions, sample_ofs); @@ -488,7 +657,7 @@ parse_odp_action(const char *s, const struct shash *port_names, * Netlink attributes. On failure, no data is appended to 'actions'. Either * way, 'actions''s data might be reallocated. */ int -odp_actions_from_string(const char *s, const struct shash *port_names, +odp_actions_from_string(const char *s, const struct simap *port_names, struct ofpbuf *actions) { size_t old_size; @@ -530,11 +699,13 @@ odp_flow_key_attr_len(uint16_t type) switch ((enum ovs_key_attr) type) { case OVS_KEY_ATTR_ENCAP: return -2; case OVS_KEY_ATTR_PRIORITY: return 4; - case OVS_KEY_ATTR_TUN_ID: return 8; + case OVS_KEY_ATTR_SKB_MARK: return 4; + case OVS_KEY_ATTR_TUNNEL: return -2; case OVS_KEY_ATTR_IN_PORT: return 4; case OVS_KEY_ATTR_ETHERNET: return sizeof(struct ovs_key_ethernet); case OVS_KEY_ATTR_VLAN: return sizeof(ovs_be16); case OVS_KEY_ATTR_ETHERTYPE: return 2; + case OVS_KEY_ATTR_MPLS: return sizeof(struct ovs_key_mpls); case OVS_KEY_ATTR_IPV4: return sizeof(struct ovs_key_ipv4); case OVS_KEY_ATTR_IPV6: return sizeof(struct ovs_key_ipv6); case OVS_KEY_ATTR_TCP: return sizeof(struct ovs_key_tcp); @@ -585,6 +756,111 @@ ovs_frag_type_to_string(enum ovs_frag_type type) } } +static int +tunnel_key_attr_len(int type) +{ + switch (type) { + case OVS_TUNNEL_KEY_ATTR_ID: return 8; + case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: return 4; + case OVS_TUNNEL_KEY_ATTR_IPV4_DST: return 4; + case OVS_TUNNEL_KEY_ATTR_TOS: return 1; + case OVS_TUNNEL_KEY_ATTR_TTL: return 1; + case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: return 0; + case OVS_TUNNEL_KEY_ATTR_CSUM: return 0; + case __OVS_TUNNEL_KEY_ATTR_MAX: + return -1; + } + return -1; +} + +static enum odp_key_fitness +tun_key_from_attr(const struct nlattr *attr, struct flow_tnl *tun) +{ + unsigned int left; + const struct nlattr *a; + bool ttl = false; + bool unknown = false; + + NL_NESTED_FOR_EACH(a, left, attr) { + uint16_t type = nl_attr_type(a); + size_t len = nl_attr_get_size(a); + int expected_len = tunnel_key_attr_len(type); + + if (len != expected_len && expected_len >= 0) { + return ODP_FIT_ERROR; + } + + switch (type) { + case OVS_TUNNEL_KEY_ATTR_ID: + tun->tun_id = nl_attr_get_be64(a); + tun->flags |= FLOW_TNL_F_KEY; + break; + case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: + tun->ip_src = nl_attr_get_be32(a); + break; + case OVS_TUNNEL_KEY_ATTR_IPV4_DST: + tun->ip_dst = nl_attr_get_be32(a); + break; + case OVS_TUNNEL_KEY_ATTR_TOS: + tun->ip_tos = nl_attr_get_u8(a); + break; + case OVS_TUNNEL_KEY_ATTR_TTL: + tun->ip_ttl = nl_attr_get_u8(a); + ttl = true; + break; + case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: + tun->flags |= FLOW_TNL_F_DONT_FRAGMENT; + break; + case OVS_TUNNEL_KEY_ATTR_CSUM: + tun->flags |= FLOW_TNL_F_CSUM; + break; + default: + /* Allow this to show up as unexpected, if there are unknown + * tunnel attribute, eventually resulting in ODP_FIT_TOO_MUCH. */ + unknown = true; + break; + } + } + + if (!ttl) { + return ODP_FIT_ERROR; + } + if (unknown) { + return ODP_FIT_TOO_MUCH; + } + return ODP_FIT_PERFECT; +} + +static void +tun_key_to_attr(struct ofpbuf *a, const struct flow_tnl *tun_key) +{ + size_t tun_key_ofs; + + tun_key_ofs = nl_msg_start_nested(a, OVS_KEY_ATTR_TUNNEL); + + if (tun_key->flags & FLOW_TNL_F_KEY) { + nl_msg_put_be64(a, OVS_TUNNEL_KEY_ATTR_ID, tun_key->tun_id); + } + if (tun_key->ip_src) { + nl_msg_put_be32(a, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, tun_key->ip_src); + } + if (tun_key->ip_dst) { + nl_msg_put_be32(a, OVS_TUNNEL_KEY_ATTR_IPV4_DST, tun_key->ip_dst); + } + if (tun_key->ip_tos) { + nl_msg_put_u8(a, OVS_TUNNEL_KEY_ATTR_TOS, tun_key->ip_tos); + } + nl_msg_put_u8(a, OVS_TUNNEL_KEY_ATTR_TTL, tun_key->ip_ttl); + if (tun_key->flags & FLOW_TNL_F_DONT_FRAGMENT) { + nl_msg_put_flag(a, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT); + } + if (tun_key->flags & FLOW_TNL_F_CSUM) { + nl_msg_put_flag(a, OVS_TUNNEL_KEY_ATTR_CSUM); + } + + nl_msg_end_nested(a, tun_key_ofs); +} + static void format_odp_key_attr(const struct nlattr *a, struct ds *ds) { @@ -597,6 +873,7 @@ format_odp_key_attr(const struct nlattr *a, struct ds *ds) const struct ovs_key_icmpv6 *icmpv6_key; const struct ovs_key_arp *arp_key; const struct ovs_key_nd *nd_key; + struct flow_tnl tun_key; enum ovs_key_attr attr = nl_attr_type(a); int expected_len; @@ -620,11 +897,29 @@ format_odp_key_attr(const struct nlattr *a, struct ds *ds) break; case OVS_KEY_ATTR_PRIORITY: - ds_put_format(ds, "(%"PRIu32")", nl_attr_get_u32(a)); + ds_put_format(ds, "(%#"PRIx32")", nl_attr_get_u32(a)); break; - case OVS_KEY_ATTR_TUN_ID: - ds_put_format(ds, "(%#"PRIx64")", ntohll(nl_attr_get_be64(a))); + case OVS_KEY_ATTR_SKB_MARK: + ds_put_format(ds, "(%#"PRIx32")", nl_attr_get_u32(a)); + break; + + case OVS_KEY_ATTR_TUNNEL: + memset(&tun_key, 0, sizeof tun_key); + if (tun_key_from_attr(a, &tun_key) == ODP_FIT_ERROR) { + ds_put_format(ds, "(error)"); + } else { + ds_put_format(ds, "(tun_id=0x%"PRIx64",src="IP_FMT",dst="IP_FMT"," + "tos=0x%"PRIx8",ttl=%"PRIu8",flags(", + ntohll(tun_key.tun_id), + IP_ARGS(tun_key.ip_src), + IP_ARGS(tun_key.ip_dst), + tun_key.ip_tos, tun_key.ip_ttl); + + format_flags(ds, flow_tun_flag_to_string, + (uint32_t) tun_key.flags, ','); + ds_put_format(ds, "))"); + } break; case OVS_KEY_ATTR_IN_PORT: @@ -644,6 +939,14 @@ format_odp_key_attr(const struct nlattr *a, struct ds *ds) ds_put_char(ds, ')'); break; + case OVS_KEY_ATTR_MPLS: { + const struct ovs_key_mpls *mpls_key = nl_attr_get(a); + ds_put_char(ds, '('); + format_mpls_lse(ds, mpls_key->mpls_top_lse); + ds_put_char(ds, ')'); + break; + } + case OVS_KEY_ATTR_ETHERTYPE: ds_put_format(ds, "(0x%04"PRIx16")", ntohs(nl_attr_get_be16(a))); @@ -653,8 +956,8 @@ format_odp_key_attr(const struct nlattr *a, struct ds *ds) ipv4_key = nl_attr_get(a); ds_put_format(ds, "(src="IP_FMT",dst="IP_FMT",proto=%"PRIu8 ",tos=%#"PRIx8",ttl=%"PRIu8",frag=%s)", - IP_ARGS(&ipv4_key->ipv4_src), - IP_ARGS(&ipv4_key->ipv4_dst), + IP_ARGS(ipv4_key->ipv4_src), + IP_ARGS(ipv4_key->ipv4_dst), ipv4_key->ipv4_proto, ipv4_key->ipv4_tos, ipv4_key->ipv4_ttl, ovs_frag_type_to_string(ipv4_key->ipv4_frag)); @@ -705,7 +1008,7 @@ format_odp_key_attr(const struct nlattr *a, struct ds *ds) arp_key = nl_attr_get(a); ds_put_format(ds, "(sip="IP_FMT",tip="IP_FMT",op=%"PRIu16"," "sha="ETH_ADDR_FMT",tha="ETH_ADDR_FMT")", - IP_ARGS(&arp_key->arp_sip), IP_ARGS(&arp_key->arp_tip), + IP_ARGS(arp_key->arp_sip), IP_ARGS(arp_key->arp_tip), ntohs(arp_key->arp_op), ETH_ADDR_ARGS(arp_key->arp_sha), ETH_ADDR_ARGS(arp_key->arp_tha)); break; @@ -753,10 +1056,16 @@ odp_flow_key_format(const struct nlattr *key, size_t key_len, struct ds *ds) format_odp_key_attr(a, ds); } if (left) { + int i; + if (left == key_len) { ds_put_cstr(ds, ""); } - ds_put_format(ds, ",***%u leftover bytes***", left); + ds_put_format(ds, ",***%u leftover bytes*** (", left); + for (i = 0; i < left; i++) { + ds_put_format(ds, "%02x", ((const uint8_t *) a)[i]); + } + ds_put_char(ds, ')'); } } else { ds_put_cstr(ds, ""); @@ -798,8 +1107,17 @@ ovs_frag_type_from_string(const char *s, enum ovs_frag_type *type) return true; } +static ovs_be32 +mpls_lse_from_components(int mpls_label, int mpls_tc, int mpls_ttl, int mpls_bos) +{ + return (htonl((mpls_label << MPLS_LABEL_SHIFT) | + (mpls_tc << MPLS_TC_SHIFT) | + (mpls_ttl << MPLS_TTL_SHIFT) | + (mpls_bos << MPLS_BOS_SHIFT))); +} + static int -parse_odp_key_attr(const char *s, const struct shash *port_names, +parse_odp_key_attr(const char *s, const struct simap *port_names, struct ofpbuf *key) { /* Many of the sscanf calls in this function use oversized destination @@ -816,20 +1134,52 @@ parse_odp_key_attr(const char *s, const struct shash *port_names, unsigned long long int priority; int n = -1; - if (sscanf(s, "priority(%lli)%n", &priority, &n) > 0 && n > 0) { + if (sscanf(s, "skb_priority(%llx)%n", &priority, &n) > 0 && n > 0) { nl_msg_put_u32(key, OVS_KEY_ATTR_PRIORITY, priority); return n; } } + { + unsigned long long int mark; + int n = -1; + + if (sscanf(s, "skb_mark(%llx)%n", &mark, &n) > 0 && n > 0) { + nl_msg_put_u32(key, OVS_KEY_ATTR_SKB_MARK, mark); + return n; + } + } + { char tun_id_s[32]; + int tos, ttl; + struct flow_tnl tun_key; int n = -1; - if (sscanf(s, "tun_id(%31[x0123456789abcdefABCDEF])%n", - tun_id_s, &n) > 0 && n > 0) { - uint64_t tun_id = strtoull(tun_id_s, NULL, 0); - nl_msg_put_be64(key, OVS_KEY_ATTR_TUN_ID, htonll(tun_id)); + if (sscanf(s, "tunnel(tun_id=%31[x0123456789abcdefABCDEF]," + "src="IP_SCAN_FMT",dst="IP_SCAN_FMT + ",tos=%i,ttl=%i,flags%n", tun_id_s, + IP_SCAN_ARGS(&tun_key.ip_src), + IP_SCAN_ARGS(&tun_key.ip_dst), &tos, &ttl, + &n) > 0 && n > 0) { + int res; + uint32_t flags; + + tun_key.tun_id = htonll(strtoull(tun_id_s, NULL, 0)); + tun_key.ip_tos = tos; + tun_key.ip_ttl = ttl; + res = parse_flags(&s[n], flow_tun_flag_to_string, &flags); + tun_key.flags = (uint16_t) flags; + + if (res < 0) { + return res; + } + n += res; + if (s[n] != ')') { + return -EINVAL; + } + n++; + tun_key_to_attr(key, &tun_key); return n; } } @@ -846,14 +1196,14 @@ parse_odp_key_attr(const char *s, const struct shash *port_names, if (port_names && !strncmp(s, "in_port(", 8)) { const char *name; - const struct shash_node *node; + const struct simap_node *node; int name_len; name = s + 8; name_len = strcspn(s, ")"); - node = shash_find_len(port_names, name, name_len); + node = simap_find_len(port_names, name, name_len); if (node) { - nl_msg_put_u32(key, OVS_KEY_ATTR_IN_PORT, (uintptr_t) node->data); + nl_msg_put_u32(key, OVS_KEY_ATTR_IN_PORT, node->data); return 8 + name_len + 1; } } @@ -906,6 +1256,22 @@ parse_odp_key_attr(const char *s, const struct shash *port_names, } } + { + int label, tc, ttl, bos; + int n = -1; + + if (sscanf(s, "mpls(label=%"SCNi32",tc=%i,ttl=%i,bos=%i)%n", + &label, &tc, &ttl, &bos, &n) > 0 && + n > 0) { + struct ovs_key_mpls *mpls; + + mpls = nl_msg_put_unspec_uninit(key, OVS_KEY_ATTR_MPLS, + sizeof *mpls); + mpls->mpls_top_lse = mpls_lse_from_components(label, tc, ttl, bos); + return n; + } + } + { ovs_be32 ipv4_src; ovs_be32 ipv4_dst; @@ -1107,7 +1473,7 @@ parse_odp_key_attr(const char *s, const struct shash *port_names, break; } - retval = parse_odp_key_attr(s, key); + retval = parse_odp_key_attr(s, port_names, key); if (retval < 0) { return retval; } @@ -1130,15 +1496,15 @@ parse_odp_key_attr(const char *s, const struct shash *port_names, * data is appended to 'key'. Either way, 'key''s data might be * reallocated. * - * If 'port_names' is nonnull, it points to an shash that maps from a port name - * to a port number cast to void *. (Port names may be used instead of port - * numbers in in_port.) + * If 'port_names' is nonnull, it points to an simap that maps from a port name + * to a port number. (Port names may be used instead of port numbers in + * in_port.) * * On success, the attributes appended to 'key' are individually syntactically * valid, but they may not be valid as a sequence. 'key' might, for example, * have duplicated keys. odp_flow_key_to_flow() will detect those errors. */ int -odp_flow_key_from_string(const char *s, const struct shash *port_names, +odp_flow_key_from_string(const char *s, const struct simap *port_names, struct ofpbuf *key) { const size_t old_size = key->size; @@ -1162,31 +1528,42 @@ odp_flow_key_from_string(const char *s, const struct shash *port_names, } static uint8_t -ovs_to_odp_frag(uint8_t ovs_frag) +ovs_to_odp_frag(uint8_t nw_frag) { - return (ovs_frag & FLOW_NW_FRAG_LATER ? OVS_FRAG_TYPE_LATER - : ovs_frag & FLOW_NW_FRAG_ANY ? OVS_FRAG_TYPE_FIRST - : OVS_FRAG_TYPE_NONE); + return (nw_frag == 0 ? OVS_FRAG_TYPE_NONE + : nw_frag == FLOW_NW_FRAG_ANY ? OVS_FRAG_TYPE_FIRST + : OVS_FRAG_TYPE_LATER); } -/* Appends a representation of 'flow' as OVS_KEY_ATTR_* attributes to 'buf'. */ +/* Appends a representation of 'flow' as OVS_KEY_ATTR_* attributes to 'buf'. + * 'flow->in_port' is ignored (since it is likely to be an OpenFlow port + * number rather than a datapath port number). Instead, if 'odp_in_port' + * is anything other than OVSP_NONE, it is included in 'buf' as the input + * port. + * + * 'buf' must have at least ODPUTIL_FLOW_KEY_BYTES bytes of space, or be + * capable of being expanded to allow for that much space. */ void -odp_flow_key_from_flow(struct ofpbuf *buf, const struct flow *flow) +odp_flow_key_from_flow(struct ofpbuf *buf, const struct flow *flow, + uint32_t odp_in_port) { struct ovs_key_ethernet *eth_key; size_t encap; - if (flow->priority) { - nl_msg_put_u32(buf, OVS_KEY_ATTR_PRIORITY, flow->priority); + if (flow->skb_priority) { + nl_msg_put_u32(buf, OVS_KEY_ATTR_PRIORITY, flow->skb_priority); + } + + if (flow->tunnel.ip_dst) { + tun_key_to_attr(buf, &flow->tunnel); } - if (flow->tun_id != htonll(0)) { - nl_msg_put_be64(buf, OVS_KEY_ATTR_TUN_ID, flow->tun_id); + if (flow->skb_mark) { + nl_msg_put_u32(buf, OVS_KEY_ATTR_SKB_MARK, flow->skb_mark); } - if (flow->in_port != OFPP_NONE) { - nl_msg_put_u32(buf, OVS_KEY_ATTR_IN_PORT, - ofp_port_to_odp_port(flow->in_port)); + if (odp_in_port != OVSP_NONE) { + nl_msg_put_u32(buf, OVS_KEY_ATTR_IN_PORT, odp_in_port); } eth_key = nl_msg_put_unspec_uninit(buf, OVS_KEY_ATTR_ETHERNET, @@ -1234,7 +1611,8 @@ odp_flow_key_from_flow(struct ofpbuf *buf, const struct flow *flow) ipv6_key->ipv6_tclass = flow->nw_tos; ipv6_key->ipv6_hlimit = flow->nw_ttl; ipv6_key->ipv6_frag = ovs_to_odp_frag(flow->nw_frag); - } else if (flow->dl_type == htons(ETH_TYPE_ARP)) { + } else if (flow->dl_type == htons(ETH_TYPE_ARP) || + flow->dl_type == htons(ETH_TYPE_RARP)) { struct ovs_key_arp *arp_key; arp_key = nl_msg_put_unspec_uninit(buf, OVS_KEY_ATTR_ARP, @@ -1247,10 +1625,15 @@ odp_flow_key_from_flow(struct ofpbuf *buf, const struct flow *flow) memcpy(arp_key->arp_tha, flow->arp_tha, ETH_ADDR_LEN); } - if ((flow->dl_type == htons(ETH_TYPE_IP) - || flow->dl_type == htons(ETH_TYPE_IPV6)) - && !(flow->nw_frag & FLOW_NW_FRAG_LATER)) { + if (flow->mpls_depth) { + struct ovs_key_mpls *mpls_key; + + mpls_key = nl_msg_put_unspec_uninit(buf, OVS_KEY_ATTR_MPLS, + sizeof *mpls_key); + mpls_key->mpls_top_lse = flow->mpls_lse; + } + if (is_ip_any(flow) && !(flow->nw_frag & FLOW_NW_FRAG_LATER)) { if (flow->nw_proto == IPPROTO_TCP) { struct ovs_key_tcp *tcp_key; @@ -1302,30 +1685,39 @@ unencap: } } +uint32_t +odp_flow_key_hash(const struct nlattr *key, size_t key_len) +{ + BUILD_ASSERT_DECL(!(NLA_ALIGNTO % sizeof(uint32_t))); + return hash_words((const uint32_t *) key, key_len / sizeof(uint32_t), 0); +} + static void log_odp_key_attributes(struct vlog_rate_limit *rl, const char *title, - uint32_t attrs, + uint64_t attrs, int out_of_range_attr, const struct nlattr *key, size_t key_len) { struct ds s; int i; - if (VLOG_DROP_WARN(rl)) { + if (VLOG_DROP_DBG(rl)) { return; } ds_init(&s); - ds_put_format(&s, "%s:", title); - for (i = 0; i < 32; i++) { - if (attrs & (1u << i)) { + for (i = 0; i < 64; i++) { + if (attrs & (UINT64_C(1) << i)) { ds_put_format(&s, " %s", ovs_key_attr_to_string(i)); } } + if (out_of_range_attr) { + ds_put_format(&s, " %d (and possibly others)", out_of_range_attr); + } ds_put_cstr(&s, ": "); odp_flow_key_format(key, key_len, &s); - VLOG_WARN("%s", ds_cstr(&s)); + VLOG_DBG("%s:%s", title, ds_cstr(&s)); ds_destroy(&s); } @@ -1335,8 +1727,7 @@ odp_to_ovs_frag(uint8_t odp_frag, struct flow *flow) static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); if (odp_frag > OVS_FRAG_TYPE_LATER) { - VLOG_ERR_RL(&rl, "invalid frag %"PRIu8" in flow key", - odp_frag); + VLOG_ERR_RL(&rl, "invalid frag %"PRIu8" in flow key", odp_frag); return false; } @@ -1349,51 +1740,56 @@ odp_to_ovs_frag(uint8_t odp_frag, struct flow *flow) return true; } -static int +static bool parse_flow_nlattrs(const struct nlattr *key, size_t key_len, - const struct nlattr *attrs[], uint64_t *present_attrsp) + const struct nlattr *attrs[], uint64_t *present_attrsp, + int *out_of_range_attrp) { - static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(10, 10); const struct nlattr *nla; uint64_t present_attrs; size_t left; + BUILD_ASSERT(OVS_KEY_ATTR_MAX < CHAR_BIT * sizeof present_attrs); present_attrs = 0; + *out_of_range_attrp = 0; NL_ATTR_FOR_EACH (nla, left, key, key_len) { uint16_t type = nl_attr_type(nla); size_t len = nl_attr_get_size(nla); int expected_len = odp_flow_key_attr_len(type); - if (len != expected_len && expected_len != -2) { - if (expected_len == -1) { - VLOG_ERR_RL(&rl, "unknown attribute %"PRIu16" in flow key", - type); - } else { - VLOG_ERR_RL(&rl, "attribute %s has length %zu but should have " - "length %d", ovs_key_attr_to_string(type), - len, expected_len); - } - return EINVAL; - } else if (present_attrs & (UINT64_C(1) << type)) { - VLOG_ERR_RL(&rl, "duplicate %s attribute in flow key", - ovs_key_attr_to_string(type)); - return EINVAL; + if (len != expected_len && expected_len >= 0) { + VLOG_ERR_RL(&rl, "attribute %s has length %zu but should have " + "length %d", ovs_key_attr_to_string(type), + len, expected_len); + return false; } - present_attrs |= UINT64_C(1) << type; - attrs[type] = nla; + if (type > OVS_KEY_ATTR_MAX) { + *out_of_range_attrp = type; + } else { + if (present_attrs & (UINT64_C(1) << type)) { + VLOG_ERR_RL(&rl, "duplicate %s attribute in flow key", + ovs_key_attr_to_string(type)); + return false; + } + + present_attrs |= UINT64_C(1) << type; + attrs[type] = nla; + } } if (left) { VLOG_ERR_RL(&rl, "trailing garbage in flow key"); - return EINVAL; + return false; } *present_attrsp = present_attrs; - return 0; + return true; } -static int -check_expectations(uint64_t present_attrs, uint64_t expected_attrs, +static enum odp_key_fitness +check_expectations(uint64_t present_attrs, int out_of_range_attr, + uint64_t expected_attrs, const struct nlattr *key, size_t key_len) { uint64_t missing_attrs; @@ -1401,135 +1797,61 @@ check_expectations(uint64_t present_attrs, uint64_t expected_attrs, missing_attrs = expected_attrs & ~present_attrs; if (missing_attrs) { - static struct vlog_rate_limit miss_rl = VLOG_RATE_LIMIT_INIT(10, 10); - log_odp_key_attributes(&miss_rl, "expected but not present", - missing_attrs, key, key_len); - return EINVAL; + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(10, 10); + log_odp_key_attributes(&rl, "expected but not present", + missing_attrs, 0, key, key_len); + return ODP_FIT_TOO_LITTLE; } extra_attrs = present_attrs & ~expected_attrs; - if (extra_attrs) { - static struct vlog_rate_limit extra_rl = VLOG_RATE_LIMIT_INIT(10, 10); - log_odp_key_attributes(&extra_rl, "present but not expected", - extra_attrs, key, key_len); - return EINVAL; + if (extra_attrs || out_of_range_attr) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(10, 10); + log_odp_key_attributes(&rl, "present but not expected", + extra_attrs, out_of_range_attr, key, key_len); + return ODP_FIT_TOO_MUCH; } - return 0; + return ODP_FIT_PERFECT; } -/* Converts the 'key_len' bytes of OVS_KEY_ATTR_* attributes in 'key' to a flow - * structure in 'flow'. Returns 0 if successful, otherwise EINVAL. */ -int -odp_flow_key_to_flow(const struct nlattr *key, size_t key_len, - struct flow *flow) +static bool +parse_ethertype(const struct nlattr *attrs[OVS_KEY_ATTR_MAX + 1], + uint64_t present_attrs, uint64_t *expected_attrs, + struct flow *flow) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); - const struct nlattr *attrs[OVS_KEY_ATTR_MAX + 1]; - uint64_t expected_attrs; - uint64_t present_attrs; - int error; - - memset(flow, 0, sizeof *flow); - - error = parse_flow_nlattrs(key, key_len, attrs, &present_attrs); - if (error) { - return error; - } - - expected_attrs = 0; - - if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_PRIORITY)) { - flow->priority = nl_attr_get_u32(attrs[OVS_KEY_ATTR_PRIORITY]); - expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_PRIORITY; - } - - if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_TUN_ID)) { - flow->tun_id = nl_attr_get_be64(attrs[OVS_KEY_ATTR_TUN_ID]); - expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_TUN_ID; - } - - if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_IN_PORT)) { - uint32_t in_port = nl_attr_get_u32(attrs[OVS_KEY_ATTR_IN_PORT]); - if (in_port >= UINT16_MAX || in_port >= OFPP_MAX) { - VLOG_ERR_RL(&rl, "in_port %"PRIu32" out of supported range", - in_port); - return EINVAL; - } - flow->in_port = odp_port_to_ofp_port(in_port); - expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_IN_PORT; - } else { - flow->in_port = OFPP_NONE; - } - - if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_ETHERNET)) { - const struct ovs_key_ethernet *eth_key; - - eth_key = nl_attr_get(attrs[OVS_KEY_ATTR_ETHERNET]); - memcpy(flow->dl_src, eth_key->eth_src, ETH_ADDR_LEN); - memcpy(flow->dl_dst, eth_key->eth_dst, ETH_ADDR_LEN); - } else { - VLOG_ERR_RL(&rl, "missing Ethernet attribute in flow key"); - return EINVAL; - } - expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_ETHERNET; - - if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_ETHERTYPE) - && (nl_attr_get_be16(attrs[OVS_KEY_ATTR_ETHERTYPE]) - == htons(ETH_TYPE_VLAN))) { - /* The Ethernet type is 0x8100 so there must be a VLAN tag - * and encapsulated protocol information. */ - const struct nlattr *encap; - __be16 tci; - int error; - - expected_attrs |= ((UINT64_C(1) << OVS_KEY_ATTR_ETHERTYPE) | - (UINT64_C(1) << OVS_KEY_ATTR_VLAN) | - (UINT64_C(1) << OVS_KEY_ATTR_ENCAP)); - error = check_expectations(present_attrs, expected_attrs, - key, key_len); - if (error) { - return error; - } - - encap = attrs[OVS_KEY_ATTR_ENCAP]; - tci = nl_attr_get_be16(attrs[OVS_KEY_ATTR_VLAN]); - if (tci & htons(VLAN_CFI)) { - flow->vlan_tci = tci; - - error = parse_flow_nlattrs(nl_attr_get(encap), - nl_attr_get_size(encap), - attrs, &present_attrs); - if (error) { - return error; - } - expected_attrs = 0; - } else if (tci == htons(0)) { - /* Corner case for a truncated 802.1Q header. */ - if (nl_attr_get_size(encap)) { - return EINVAL; - } - - flow->dl_type = htons(ETH_TYPE_VLAN); - return 0; - } else { - return EINVAL; - } - } if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_ETHERTYPE)) { flow->dl_type = nl_attr_get_be16(attrs[OVS_KEY_ATTR_ETHERTYPE]); if (ntohs(flow->dl_type) < 1536) { VLOG_ERR_RL(&rl, "invalid Ethertype %"PRIu16" in flow key", ntohs(flow->dl_type)); - return EINVAL; + return false; } - expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_ETHERTYPE; + *expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_ETHERTYPE; } else { flow->dl_type = htons(FLOW_DL_TYPE_NONE); } + return true; +} - if (flow->dl_type == htons(ETH_TYPE_IP)) { +static enum odp_key_fitness +parse_l2_5_onward(const struct nlattr *attrs[OVS_KEY_ATTR_MAX + 1], + uint64_t present_attrs, int out_of_range_attr, + uint64_t expected_attrs, struct flow *flow, + const struct nlattr *key, size_t key_len) +{ + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); + + if (eth_type_mpls(flow->dl_type)) { + expected_attrs |= (UINT64_C(1) << OVS_KEY_ATTR_MPLS); + + if (!(present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_MPLS))) { + return ODP_FIT_TOO_LITTLE; + } + flow->mpls_lse = nl_attr_get_be32(attrs[OVS_KEY_ATTR_MPLS]); + flow->mpls_depth++; + } else if (flow->dl_type == htons(ETH_TYPE_IP)) { expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_IPV4; if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_IPV4)) { const struct ovs_key_ipv4 *ipv4_key; @@ -1541,7 +1863,7 @@ odp_flow_key_to_flow(const struct nlattr *key, size_t key_len, flow->nw_tos = ipv4_key->ipv4_tos; flow->nw_ttl = ipv4_key->ipv4_ttl; if (!odp_to_ovs_frag(ipv4_key->ipv4_frag, flow)) { - return EINVAL; + return ODP_FIT_ERROR; } } } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) { @@ -1557,10 +1879,11 @@ odp_flow_key_to_flow(const struct nlattr *key, size_t key_len, flow->nw_tos = ipv6_key->ipv6_tclass; flow->nw_ttl = ipv6_key->ipv6_hlimit; if (!odp_to_ovs_frag(ipv6_key->ipv6_frag, flow)) { - return EINVAL; + return ODP_FIT_ERROR; } } - } else if (flow->dl_type == htons(ETH_TYPE_ARP)) { + } else if (flow->dl_type == htons(ETH_TYPE_ARP) || + flow->dl_type == htons(ETH_TYPE_RARP)) { expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_ARP; if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_ARP)) { const struct ovs_key_arp *arp_key; @@ -1571,7 +1894,7 @@ odp_flow_key_to_flow(const struct nlattr *key, size_t key_len, if (arp_key->arp_op & htons(0xff00)) { VLOG_ERR_RL(&rl, "unsupported ARP opcode %"PRIu16" in flow " "key", ntohs(arp_key->arp_op)); - return EINVAL; + return ODP_FIT_ERROR; } flow->nw_proto = ntohs(arp_key->arp_op); memcpy(flow->arp_sha, arp_key->arp_sha, ETH_ADDR_LEN); @@ -1641,27 +1964,483 @@ odp_flow_key_to_flow(const struct nlattr *key, size_t key_len, } } - return check_expectations(present_attrs, expected_attrs, key, key_len); + return check_expectations(present_attrs, out_of_range_attr, expected_attrs, + key, key_len); +} + +/* Parse 802.1Q header then encapsulated L3 attributes. */ +static enum odp_key_fitness +parse_8021q_onward(const struct nlattr *attrs[OVS_KEY_ATTR_MAX + 1], + uint64_t present_attrs, int out_of_range_attr, + uint64_t expected_attrs, struct flow *flow, + const struct nlattr *key, size_t key_len) +{ + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); + + const struct nlattr *encap + = (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_ENCAP) + ? attrs[OVS_KEY_ATTR_ENCAP] : NULL); + enum odp_key_fitness encap_fitness; + enum odp_key_fitness fitness; + ovs_be16 tci; + + /* Calulate fitness of outer attributes. */ + expected_attrs |= ((UINT64_C(1) << OVS_KEY_ATTR_VLAN) | + (UINT64_C(1) << OVS_KEY_ATTR_ENCAP)); + fitness = check_expectations(present_attrs, out_of_range_attr, + expected_attrs, key, key_len); + + /* Get the VLAN TCI value. */ + if (!(present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_VLAN))) { + return ODP_FIT_TOO_LITTLE; + } + tci = nl_attr_get_be16(attrs[OVS_KEY_ATTR_VLAN]); + if (tci == htons(0)) { + /* Corner case for a truncated 802.1Q header. */ + if (fitness == ODP_FIT_PERFECT && nl_attr_get_size(encap)) { + return ODP_FIT_TOO_MUCH; + } + return fitness; + } else if (!(tci & htons(VLAN_CFI))) { + VLOG_ERR_RL(&rl, "OVS_KEY_ATTR_VLAN 0x%04"PRIx16" is nonzero " + "but CFI bit is not set", ntohs(tci)); + return ODP_FIT_ERROR; + } + + /* Set vlan_tci. + * Remove the TPID from dl_type since it's not the real Ethertype. */ + flow->vlan_tci = tci; + flow->dl_type = htons(0); + + /* Now parse the encapsulated attributes. */ + if (!parse_flow_nlattrs(nl_attr_get(encap), nl_attr_get_size(encap), + attrs, &present_attrs, &out_of_range_attr)) { + return ODP_FIT_ERROR; + } + expected_attrs = 0; + + if (!parse_ethertype(attrs, present_attrs, &expected_attrs, flow)) { + return ODP_FIT_ERROR; + } + encap_fitness = parse_l2_5_onward(attrs, present_attrs, out_of_range_attr, + expected_attrs, flow, key, key_len); + + /* The overall fitness is the worse of the outer and inner attributes. */ + return MAX(fitness, encap_fitness); +} + +/* Converts the 'key_len' bytes of OVS_KEY_ATTR_* attributes in 'key' to a flow + * structure in 'flow'. Returns an ODP_FIT_* value that indicates how well + * 'key' fits our expectations for what a flow key should contain. + * + * The 'in_port' will be the datapath's understanding of the port. The + * caller will need to translate with odp_port_to_ofp_port() if the + * OpenFlow port is needed. + * + * This function doesn't take the packet itself as an argument because none of + * the currently understood OVS_KEY_ATTR_* attributes require it. Currently, + * it is always possible to infer which additional attribute(s) should appear + * by looking at the attributes for lower-level protocols, e.g. if the network + * protocol in OVS_KEY_ATTR_IPV4 or OVS_KEY_ATTR_IPV6 is IPPROTO_TCP then we + * know that a OVS_KEY_ATTR_TCP attribute must appear and that otherwise it + * must be absent. */ +enum odp_key_fitness +odp_flow_key_to_flow(const struct nlattr *key, size_t key_len, + struct flow *flow) +{ + const struct nlattr *attrs[OVS_KEY_ATTR_MAX + 1]; + uint64_t expected_attrs; + uint64_t present_attrs; + int out_of_range_attr; + + memset(flow, 0, sizeof *flow); + + /* Parse attributes. */ + if (!parse_flow_nlattrs(key, key_len, attrs, &present_attrs, + &out_of_range_attr)) { + return ODP_FIT_ERROR; + } + expected_attrs = 0; + + /* Metadata. */ + if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_PRIORITY)) { + flow->skb_priority = nl_attr_get_u32(attrs[OVS_KEY_ATTR_PRIORITY]); + expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_PRIORITY; + } + + if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_SKB_MARK)) { + flow->skb_mark = nl_attr_get_u32(attrs[OVS_KEY_ATTR_SKB_MARK]); + expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_SKB_MARK; + } + + if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_TUNNEL)) { + enum odp_key_fitness res; + + res = tun_key_from_attr(attrs[OVS_KEY_ATTR_TUNNEL], &flow->tunnel); + if (res == ODP_FIT_ERROR) { + return ODP_FIT_ERROR; + } else if (res == ODP_FIT_PERFECT) { + expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_TUNNEL; + } + } + + if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_IN_PORT)) { + flow->in_port = nl_attr_get_u32(attrs[OVS_KEY_ATTR_IN_PORT]); + expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_IN_PORT; + } else { + flow->in_port = OVSP_NONE; + } + + /* Ethernet header. */ + if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_ETHERNET)) { + const struct ovs_key_ethernet *eth_key; + + eth_key = nl_attr_get(attrs[OVS_KEY_ATTR_ETHERNET]); + memcpy(flow->dl_src, eth_key->eth_src, ETH_ADDR_LEN); + memcpy(flow->dl_dst, eth_key->eth_dst, ETH_ADDR_LEN); + } + expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_ETHERNET; + + /* Get Ethertype or 802.1Q TPID or FLOW_DL_TYPE_NONE. */ + if (!parse_ethertype(attrs, present_attrs, &expected_attrs, flow)) { + return ODP_FIT_ERROR; + } + + if (flow->dl_type == htons(ETH_TYPE_VLAN)) { + return parse_8021q_onward(attrs, present_attrs, out_of_range_attr, + expected_attrs, flow, key, key_len); + } + return parse_l2_5_onward(attrs, present_attrs, out_of_range_attr, + expected_attrs, flow, key, key_len); +} + +/* Returns 'fitness' as a string, for use in debug messages. */ +const char * +odp_key_fitness_to_string(enum odp_key_fitness fitness) +{ + switch (fitness) { + case ODP_FIT_PERFECT: + return "OK"; + case ODP_FIT_TOO_MUCH: + return "too_much"; + case ODP_FIT_TOO_LITTLE: + return "too_little"; + case ODP_FIT_ERROR: + return "error"; + default: + return ""; + } } /* Appends an OVS_ACTION_ATTR_USERSPACE action to 'odp_actions' that specifies - * Netlink PID 'pid'. If 'cookie' is nonnull, adds a userdata attribute whose - * contents contains 'cookie' and returns the offset within 'odp_actions' of - * the start of the cookie. (If 'cookie' is null, then the return value is not - * meaningful.) */ + * Netlink PID 'pid'. If 'userdata' is nonnull, adds a userdata attribute + * whose contents are the 'userdata_size' bytes at 'userdata' and returns the + * offset within 'odp_actions' of the start of the cookie. (If 'userdata' is + * null, then the return value is not meaningful.) */ size_t -odp_put_userspace_action(uint32_t pid, const struct user_action_cookie *cookie, +odp_put_userspace_action(uint32_t pid, + const void *userdata, size_t userdata_size, struct ofpbuf *odp_actions) { + size_t userdata_ofs; size_t offset; offset = nl_msg_start_nested(odp_actions, OVS_ACTION_ATTR_USERSPACE); nl_msg_put_u32(odp_actions, OVS_USERSPACE_ATTR_PID, pid); - if (cookie) { + if (userdata) { + userdata_ofs = odp_actions->size + NLA_HDRLEN; nl_msg_put_unspec(odp_actions, OVS_USERSPACE_ATTR_USERDATA, - cookie, sizeof *cookie); + userdata, userdata_size); + } else { + userdata_ofs = 0; } nl_msg_end_nested(odp_actions, offset); - return cookie ? odp_actions->size - NLA_ALIGN(sizeof *cookie) : 0; + return userdata_ofs; +} + +void +odp_put_tunnel_action(const struct flow_tnl *tunnel, + struct ofpbuf *odp_actions) +{ + size_t offset = nl_msg_start_nested(odp_actions, OVS_ACTION_ATTR_SET); + tun_key_to_attr(odp_actions, tunnel); + nl_msg_end_nested(odp_actions, offset); +} + +/* The commit_odp_actions() function and its helpers. */ + +static void +commit_set_action(struct ofpbuf *odp_actions, enum ovs_key_attr key_type, + const void *key, size_t key_size) +{ + size_t offset = nl_msg_start_nested(odp_actions, OVS_ACTION_ATTR_SET); + nl_msg_put_unspec(odp_actions, key_type, key, key_size); + nl_msg_end_nested(odp_actions, offset); +} + +void +odp_put_skb_mark_action(const uint32_t skb_mark, + struct ofpbuf *odp_actions) +{ + commit_set_action(odp_actions, OVS_KEY_ATTR_SKB_MARK, &skb_mark, + sizeof(skb_mark)); +} + +/* If any of the flow key data that ODP actions can modify are different in + * 'base->tunnel' and 'flow->tunnel', appends a set_tunnel ODP action to + * 'odp_actions' that change the flow tunneling information in key from + * 'base->tunnel' into 'flow->tunnel', and then changes 'base->tunnel' in the + * same way. In other words, operates the same as commit_odp_actions(), but + * only on tunneling information. */ +void +commit_odp_tunnel_action(const struct flow *flow, struct flow *base, + struct ofpbuf *odp_actions) +{ + /* A valid IPV4_TUNNEL must have non-zero ip_dst. */ + if (flow->tunnel.ip_dst) { + if (!memcmp(&base->tunnel, &flow->tunnel, sizeof base->tunnel)) { + return; + } + memcpy(&base->tunnel, &flow->tunnel, sizeof base->tunnel); + odp_put_tunnel_action(&base->tunnel, odp_actions); + } +} + +static void +commit_set_ether_addr_action(const struct flow *flow, struct flow *base, + struct ofpbuf *odp_actions) +{ + struct ovs_key_ethernet eth_key; + + if (eth_addr_equals(base->dl_src, flow->dl_src) && + eth_addr_equals(base->dl_dst, flow->dl_dst)) { + return; + } + + memcpy(base->dl_src, flow->dl_src, ETH_ADDR_LEN); + memcpy(base->dl_dst, flow->dl_dst, ETH_ADDR_LEN); + + memcpy(eth_key.eth_src, base->dl_src, ETH_ADDR_LEN); + memcpy(eth_key.eth_dst, base->dl_dst, ETH_ADDR_LEN); + + commit_set_action(odp_actions, OVS_KEY_ATTR_ETHERNET, + ð_key, sizeof(eth_key)); +} + +static void +commit_vlan_action(const struct flow *flow, struct flow *base, + struct ofpbuf *odp_actions) +{ + if (base->vlan_tci == flow->vlan_tci) { + return; + } + + if (base->vlan_tci & htons(VLAN_CFI)) { + nl_msg_put_flag(odp_actions, OVS_ACTION_ATTR_POP_VLAN); + } + + if (flow->vlan_tci & htons(VLAN_CFI)) { + struct ovs_action_push_vlan vlan; + + vlan.vlan_tpid = htons(ETH_TYPE_VLAN); + vlan.vlan_tci = flow->vlan_tci; + nl_msg_put_unspec(odp_actions, OVS_ACTION_ATTR_PUSH_VLAN, + &vlan, sizeof vlan); + } + base->vlan_tci = flow->vlan_tci; +} + +static void +commit_mpls_action(const struct flow *flow, struct flow *base, + struct ofpbuf *odp_actions) +{ + if (flow->mpls_lse == base->mpls_lse && + flow->mpls_depth == base->mpls_depth) { + return; + } + + if (flow->mpls_depth < base->mpls_depth) { + if (base->mpls_depth - flow->mpls_depth > 1) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(10, 10); + VLOG_WARN_RL(&rl, "Multiple mpls_pop actions reduced to " + " a single mpls_pop action"); + } + + nl_msg_put_be16(odp_actions, OVS_ACTION_ATTR_POP_MPLS, flow->dl_type); + } else if (flow->mpls_depth > base->mpls_depth) { + struct ovs_action_push_mpls *mpls; + + if (flow->mpls_depth - base->mpls_depth > 1) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(10, 10); + VLOG_WARN_RL(&rl, "Multiple mpls_push actions reduced to " + " a single mpls_push action"); + } + + mpls = nl_msg_put_unspec_uninit(odp_actions, OVS_ACTION_ATTR_PUSH_MPLS, + sizeof *mpls); + memset(mpls, 0, sizeof *mpls); + mpls->mpls_ethertype = flow->dl_type; + mpls->mpls_lse = flow->mpls_lse; + } else { + struct ovs_key_mpls mpls_key; + + mpls_key.mpls_top_lse = flow->mpls_lse; + commit_set_action(odp_actions, OVS_KEY_ATTR_MPLS, + &mpls_key, sizeof(mpls_key)); + } + + base->dl_type = flow->dl_type; + base->mpls_lse = flow->mpls_lse; + base->mpls_depth = flow->mpls_depth; +} + +static void +commit_set_ipv4_action(const struct flow *flow, struct flow *base, + struct ofpbuf *odp_actions) +{ + struct ovs_key_ipv4 ipv4_key; + + if (base->nw_src == flow->nw_src && + base->nw_dst == flow->nw_dst && + base->nw_tos == flow->nw_tos && + base->nw_ttl == flow->nw_ttl && + base->nw_frag == flow->nw_frag) { + return; + } + + ipv4_key.ipv4_src = base->nw_src = flow->nw_src; + ipv4_key.ipv4_dst = base->nw_dst = flow->nw_dst; + ipv4_key.ipv4_tos = base->nw_tos = flow->nw_tos; + ipv4_key.ipv4_ttl = base->nw_ttl = flow->nw_ttl; + ipv4_key.ipv4_proto = base->nw_proto; + ipv4_key.ipv4_frag = ovs_to_odp_frag(base->nw_frag); + + commit_set_action(odp_actions, OVS_KEY_ATTR_IPV4, + &ipv4_key, sizeof(ipv4_key)); +} + +static void +commit_set_ipv6_action(const struct flow *flow, struct flow *base, + struct ofpbuf *odp_actions) +{ + struct ovs_key_ipv6 ipv6_key; + + if (ipv6_addr_equals(&base->ipv6_src, &flow->ipv6_src) && + ipv6_addr_equals(&base->ipv6_dst, &flow->ipv6_dst) && + base->ipv6_label == flow->ipv6_label && + base->nw_tos == flow->nw_tos && + base->nw_ttl == flow->nw_ttl && + base->nw_frag == flow->nw_frag) { + return; + } + + base->ipv6_src = flow->ipv6_src; + memcpy(&ipv6_key.ipv6_src, &base->ipv6_src, sizeof(ipv6_key.ipv6_src)); + base->ipv6_dst = flow->ipv6_dst; + memcpy(&ipv6_key.ipv6_dst, &base->ipv6_dst, sizeof(ipv6_key.ipv6_dst)); + + ipv6_key.ipv6_label = base->ipv6_label = flow->ipv6_label; + ipv6_key.ipv6_tclass = base->nw_tos = flow->nw_tos; + ipv6_key.ipv6_hlimit = base->nw_ttl = flow->nw_ttl; + ipv6_key.ipv6_proto = base->nw_proto; + ipv6_key.ipv6_frag = ovs_to_odp_frag(base->nw_frag); + + commit_set_action(odp_actions, OVS_KEY_ATTR_IPV6, + &ipv6_key, sizeof(ipv6_key)); +} + +static void +commit_set_nw_action(const struct flow *flow, struct flow *base, + struct ofpbuf *odp_actions) +{ + /* Check if flow really have an IP header. */ + if (!flow->nw_proto) { + return; + } + + if (base->dl_type == htons(ETH_TYPE_IP)) { + commit_set_ipv4_action(flow, base, odp_actions); + } else if (base->dl_type == htons(ETH_TYPE_IPV6)) { + commit_set_ipv6_action(flow, base, odp_actions); + } +} + +static void +commit_set_port_action(const struct flow *flow, struct flow *base, + struct ofpbuf *odp_actions) +{ + if (!is_ip_any(base) || (!base->tp_src && !base->tp_dst)) { + return; + } + + if (base->tp_src == flow->tp_src && + base->tp_dst == flow->tp_dst) { + return; + } + + if (flow->nw_proto == IPPROTO_TCP) { + struct ovs_key_tcp port_key; + + port_key.tcp_src = base->tp_src = flow->tp_src; + port_key.tcp_dst = base->tp_dst = flow->tp_dst; + + commit_set_action(odp_actions, OVS_KEY_ATTR_TCP, + &port_key, sizeof(port_key)); + + } else if (flow->nw_proto == IPPROTO_UDP) { + struct ovs_key_udp port_key; + + port_key.udp_src = base->tp_src = flow->tp_src; + port_key.udp_dst = base->tp_dst = flow->tp_dst; + + commit_set_action(odp_actions, OVS_KEY_ATTR_UDP, + &port_key, sizeof(port_key)); + } +} + +static void +commit_set_priority_action(const struct flow *flow, struct flow *base, + struct ofpbuf *odp_actions) +{ + if (base->skb_priority == flow->skb_priority) { + return; + } + base->skb_priority = flow->skb_priority; + + commit_set_action(odp_actions, OVS_KEY_ATTR_PRIORITY, + &base->skb_priority, sizeof(base->skb_priority)); +} + +static void +commit_set_skb_mark_action(const struct flow *flow, struct flow *base, + struct ofpbuf *odp_actions) +{ + if (base->skb_mark == flow->skb_mark) { + return; + } + base->skb_mark = flow->skb_mark; + + odp_put_skb_mark_action(base->skb_mark, odp_actions); +} +/* If any of the flow key data that ODP actions can modify are different in + * 'base' and 'flow', appends ODP actions to 'odp_actions' that change the flow + * key from 'base' into 'flow', and then changes 'base' the same way. Does not + * commit set_tunnel actions. Users should call commit_odp_tunnel_action() + * in addition to this function if needed. */ +void +commit_odp_actions(const struct flow *flow, struct flow *base, + struct ofpbuf *odp_actions) +{ + commit_set_ether_addr_action(flow, base, odp_actions); + commit_vlan_action(flow, base, odp_actions); + commit_set_nw_action(flow, base, odp_actions); + commit_set_port_action(flow, base, odp_actions); + /* Commiting MPLS actions should occur after committing nw and port + * actions. This is because committing MPLS actions may alter a packet so + * that it is no longer IP and thus nw and port actions are no longer valid. + */ + commit_mpls_action(flow, base, odp_actions); + commit_set_priority_action(flow, base, odp_actions); + commit_set_skb_mark_action(flow, base, odp_actions); }