#include "tunnel.h"
#include "vlan.h"
#include "vport.h"
-#include "vport-generic.h"
#include "vport-internal_dev.h"
#define PORT_TABLE_SIZE 1024
if (null_ports) {
lookup.daddr = 0;
lookup.saddr = 0;
+ lookup.in_key = 0;
lookup.tunnel_type = tunnel_type;
vport = port_table_lookup(&lookup, mutable);
if (vport)
ovs_vport_receive(vport, skb);
}
-static bool check_ipv4_address(__be32 addr)
-{
- if (ipv4_is_multicast(addr) || ipv4_is_lbcast(addr)
- || ipv4_is_loopback(addr) || ipv4_is_zeronet(addr))
- return false;
-
- return true;
-}
-
-static bool ipv4_should_icmp(struct sk_buff *skb)
-{
- struct iphdr *old_iph = ip_hdr(skb);
-
- /* Don't respond to L2 broadcast. */
- if (is_multicast_ether_addr(eth_hdr(skb)->h_dest))
- return false;
-
- /* Don't respond to L3 broadcast or invalid addresses. */
- if (!check_ipv4_address(old_iph->daddr) ||
- !check_ipv4_address(old_iph->saddr))
- return false;
-
- /* Only respond to the first fragment. */
- if (old_iph->frag_off & htons(IP_OFFSET))
- return false;
-
- /* Don't respond to ICMP error messages. */
- if (old_iph->protocol == IPPROTO_ICMP) {
- u8 icmp_type, *icmp_typep;
-
- icmp_typep = skb_header_pointer(skb, (u8 *)old_iph +
- (old_iph->ihl << 2) +
- offsetof(struct icmphdr, type) -
- skb->data, sizeof(icmp_type),
- &icmp_type);
-
- if (!icmp_typep)
- return false;
-
- if (*icmp_typep > NR_ICMP_TYPES
- || (*icmp_typep <= ICMP_PARAMETERPROB
- && *icmp_typep != ICMP_ECHOREPLY
- && *icmp_typep != ICMP_ECHO))
- return false;
- }
-
- return true;
-}
-
-static void ipv4_build_icmp(struct sk_buff *skb, struct sk_buff *nskb,
- unsigned int mtu, unsigned int payload_length)
-{
- struct iphdr *iph, *old_iph = ip_hdr(skb);
- struct icmphdr *icmph;
- u8 *payload;
-
- iph = (struct iphdr *)skb_put(nskb, sizeof(struct iphdr));
- icmph = (struct icmphdr *)skb_put(nskb, sizeof(struct icmphdr));
- payload = skb_put(nskb, payload_length);
-
- /* IP */
- iph->version = 4;
- iph->ihl = sizeof(struct iphdr) >> 2;
- iph->tos = (old_iph->tos & IPTOS_TOS_MASK) |
- IPTOS_PREC_INTERNETCONTROL;
- iph->tot_len = htons(sizeof(struct iphdr)
- + sizeof(struct icmphdr)
- + payload_length);
- get_random_bytes(&iph->id, sizeof(iph->id));
- iph->frag_off = 0;
- iph->ttl = IPDEFTTL;
- iph->protocol = IPPROTO_ICMP;
- iph->daddr = old_iph->saddr;
- iph->saddr = old_iph->daddr;
-
- ip_send_check(iph);
-
- /* ICMP */
- icmph->type = ICMP_DEST_UNREACH;
- icmph->code = ICMP_FRAG_NEEDED;
- icmph->un.gateway = htonl(mtu);
- icmph->checksum = 0;
-
- nskb->csum = csum_partial((u8 *)icmph, sizeof(struct icmphdr), 0);
- nskb->csum = skb_copy_and_csum_bits(skb, (u8 *)old_iph - skb->data,
- payload, payload_length,
- nskb->csum);
- icmph->checksum = csum_fold(nskb->csum);
-}
-
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-static bool ipv6_should_icmp(struct sk_buff *skb)
-{
- struct ipv6hdr *old_ipv6h = ipv6_hdr(skb);
- int addr_type;
- int payload_off = (u8 *)(old_ipv6h + 1) - skb->data;
- u8 nexthdr = ipv6_hdr(skb)->nexthdr;
- __be16 frag_off;
-
- /* Check source address is valid. */
- addr_type = ipv6_addr_type(&old_ipv6h->saddr);
- if (addr_type & IPV6_ADDR_MULTICAST || addr_type == IPV6_ADDR_ANY)
- return false;
-
- /* Don't reply to unspecified addresses. */
- if (ipv6_addr_type(&old_ipv6h->daddr) == IPV6_ADDR_ANY)
- return false;
-
- /* Don't respond to ICMP error messages. */
- payload_off = ipv6_skip_exthdr(skb, payload_off, &nexthdr, &frag_off);
- if (payload_off < 0)
- return false;
-
- if (nexthdr == NEXTHDR_ICMP) {
- u8 icmp_type, *icmp_typep;
-
- icmp_typep = skb_header_pointer(skb, payload_off +
- offsetof(struct icmp6hdr,
- icmp6_type),
- sizeof(icmp_type), &icmp_type);
-
- if (!icmp_typep || !(*icmp_typep & ICMPV6_INFOMSG_MASK))
- return false;
- }
-
- return true;
-}
-
-static void ipv6_build_icmp(struct sk_buff *skb, struct sk_buff *nskb,
- unsigned int mtu, unsigned int payload_length)
-{
- struct ipv6hdr *ipv6h, *old_ipv6h = ipv6_hdr(skb);
- struct icmp6hdr *icmp6h;
- u8 *payload;
-
- ipv6h = (struct ipv6hdr *)skb_put(nskb, sizeof(struct ipv6hdr));
- icmp6h = (struct icmp6hdr *)skb_put(nskb, sizeof(struct icmp6hdr));
- payload = skb_put(nskb, payload_length);
-
- /* IPv6 */
- ipv6h->version = 6;
- ipv6h->priority = 0;
- memset(&ipv6h->flow_lbl, 0, sizeof(ipv6h->flow_lbl));
- ipv6h->payload_len = htons(sizeof(struct icmp6hdr)
- + payload_length);
- ipv6h->nexthdr = NEXTHDR_ICMP;
- ipv6h->hop_limit = IPV6_DEFAULT_HOPLIMIT;
- ipv6h->daddr = old_ipv6h->saddr;
- ipv6h->saddr = old_ipv6h->daddr;
-
- /* ICMPv6 */
- icmp6h->icmp6_type = ICMPV6_PKT_TOOBIG;
- icmp6h->icmp6_code = 0;
- icmp6h->icmp6_cksum = 0;
- icmp6h->icmp6_mtu = htonl(mtu);
-
- nskb->csum = csum_partial((u8 *)icmp6h, sizeof(struct icmp6hdr), 0);
- nskb->csum = skb_copy_and_csum_bits(skb, (u8 *)old_ipv6h - skb->data,
- payload, payload_length,
- nskb->csum);
- icmp6h->icmp6_cksum = csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
- sizeof(struct icmp6hdr)
- + payload_length,
- ipv6h->nexthdr, nskb->csum);
-}
-#endif /* IPv6 */
-
-bool ovs_tnl_frag_needed(struct vport *vport,
- const struct tnl_mutable_config *mutable,
- struct sk_buff *skb, unsigned int mtu)
-{
- unsigned int eth_hdr_len = ETH_HLEN;
- unsigned int total_length = 0, header_length = 0, payload_length;
- struct ethhdr *eh, *old_eh = eth_hdr(skb);
- struct sk_buff *nskb;
-
- /* Sanity check */
- if (skb->protocol == htons(ETH_P_IP)) {
- if (mtu < IP_MIN_MTU)
- return false;
-
- if (!ipv4_should_icmp(skb))
- return true;
- }
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
- else if (skb->protocol == htons(ETH_P_IPV6)) {
- if (mtu < IPV6_MIN_MTU)
- return false;
-
- /*
- * In theory we should do PMTUD on IPv6 multicast messages but
- * we don't have an address to send from so just fragment.
- */
- if (ipv6_addr_type(&ipv6_hdr(skb)->daddr) & IPV6_ADDR_MULTICAST)
- return false;
-
- if (!ipv6_should_icmp(skb))
- return true;
- }
-#endif
- else
- return false;
-
- /* Allocate */
- if (old_eh->h_proto == htons(ETH_P_8021Q))
- eth_hdr_len = VLAN_ETH_HLEN;
-
- payload_length = skb->len - eth_hdr_len;
- if (skb->protocol == htons(ETH_P_IP)) {
- header_length = sizeof(struct iphdr) + sizeof(struct icmphdr);
- total_length = min_t(unsigned int, header_length +
- payload_length, 576);
- }
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
- else {
- header_length = sizeof(struct ipv6hdr) +
- sizeof(struct icmp6hdr);
- total_length = min_t(unsigned int, header_length +
- payload_length, IPV6_MIN_MTU);
- }
-#endif
-
- payload_length = total_length - header_length;
-
- nskb = dev_alloc_skb(NET_IP_ALIGN + eth_hdr_len + header_length +
- payload_length);
- if (!nskb)
- return false;
-
- skb_reserve(nskb, NET_IP_ALIGN);
-
- /* Ethernet / VLAN */
- eh = (struct ethhdr *)skb_put(nskb, eth_hdr_len);
- memcpy(eh->h_dest, old_eh->h_source, ETH_ALEN);
- memcpy(eh->h_source, mutable->eth_addr, ETH_ALEN);
- nskb->protocol = eh->h_proto = old_eh->h_proto;
- if (old_eh->h_proto == htons(ETH_P_8021Q)) {
- struct vlan_ethhdr *vh = (struct vlan_ethhdr *)eh;
-
- vh->h_vlan_TCI = vlan_eth_hdr(skb)->h_vlan_TCI;
- vh->h_vlan_encapsulated_proto = skb->protocol;
- } else
- vlan_set_tci(nskb, vlan_get_tci(skb));
- skb_reset_mac_header(nskb);
-
- /* Protocol */
- if (skb->protocol == htons(ETH_P_IP))
- ipv4_build_icmp(skb, nskb, mtu, payload_length);
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
- else
- ipv6_build_icmp(skb, nskb, mtu, payload_length);
-#endif
-
- if (unlikely(compute_ip_summed(nskb, false))) {
- kfree_skb(nskb);
- return false;
- }
-
- ovs_vport_receive(vport, nskb);
-
- return true;
-}
-
-static bool check_mtu(struct sk_buff *skb,
- struct vport *vport,
- const struct tnl_mutable_config *mutable,
- const struct rtable *rt, __be16 *frag_offp,
- int tunnel_hlen)
-{
- bool df_inherit;
- bool pmtud;
- __be16 frag_off;
- int mtu = 0;
- unsigned int packet_length = skb->len - ETH_HLEN;
-
- if (OVS_CB(skb)->tun_key->ipv4_dst) {
- df_inherit = false;
- pmtud = false;
- frag_off = OVS_CB(skb)->tun_key->tun_flags & OVS_FLOW_TNL_F_DONT_FRAGMENT ?
- htons(IP_DF) : 0;
- } else {
- df_inherit = mutable->flags & TNL_F_DF_INHERIT;
- pmtud = mutable->flags & TNL_F_PMTUD;
- frag_off = mutable->flags & TNL_F_DF_DEFAULT ? htons(IP_DF) : 0;
- }
-
- /* Allow for one level of tagging in the packet length. */
- if (!vlan_tx_tag_present(skb) &&
- eth_hdr(skb)->h_proto == htons(ETH_P_8021Q))
- packet_length -= VLAN_HLEN;
-
- if (pmtud) {
- int vlan_header = 0;
-
- /* The tag needs to go in packet regardless of where it
- * currently is, so subtract it from the MTU.
- */
- if (vlan_tx_tag_present(skb) ||
- eth_hdr(skb)->h_proto == htons(ETH_P_8021Q))
- vlan_header = VLAN_HLEN;
-
- mtu = dst_mtu(&rt_dst(rt))
- - ETH_HLEN
- - tunnel_hlen
- - vlan_header;
- }
-
- if (skb->protocol == htons(ETH_P_IP)) {
- struct iphdr *iph = ip_hdr(skb);
-
- if (df_inherit)
- frag_off = iph->frag_off & htons(IP_DF);
-
- if (pmtud && iph->frag_off & htons(IP_DF)) {
- mtu = max(mtu, IP_MIN_MTU);
-
- if (packet_length > mtu &&
- ovs_tnl_frag_needed(vport, mutable, skb, mtu))
- return false;
- }
- }
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
- else if (skb->protocol == htons(ETH_P_IPV6)) {
- /* IPv6 requires end hosts to do fragmentation
- * if the packet is above the minimum MTU.
- */
- if (df_inherit && packet_length > IPV6_MIN_MTU)
- frag_off = htons(IP_DF);
-
- if (pmtud) {
- mtu = max(mtu, IPV6_MIN_MTU);
-
- if (packet_length > mtu &&
- ovs_tnl_frag_needed(vport, mutable, skb, mtu))
- return false;
- }
- }
-#endif
-
- *frag_offp = frag_off;
- return true;
-}
-
-static struct rtable *find_route(const struct tnl_mutable_config *mutable,
- __be32 saddr, __be32 daddr, u8 ipproto,
- u8 tos)
+static struct rtable *find_route(struct net *net,
+ __be32 *saddr, __be32 daddr, u8 ipproto,
+ u8 tos)
{
+ struct rtable *rt;
/* Tunnel configuration keeps DSCP part of TOS bits, But Linux
* router expect RT_TOS bits only. */
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39)
struct flowi fl = { .nl_u = { .ip4_u = {
.daddr = daddr,
- .saddr = saddr,
+ .saddr = *saddr,
.tos = RT_TOS(tos) } },
.proto = ipproto };
- struct rtable *rt;
- if (unlikely(ip_route_output_key(port_key_get_net(&mutable->key), &rt, &fl)))
+ if (unlikely(ip_route_output_key(net, &rt, &fl)))
return ERR_PTR(-EADDRNOTAVAIL);
-
+ *saddr = fl.nl_u.ip4_u.saddr;
return rt;
#else
struct flowi4 fl = { .daddr = daddr,
- .saddr = saddr,
+ .saddr = *saddr,
.flowi4_tos = RT_TOS(tos),
.flowi4_proto = ipproto };
- return ip_route_output_key(port_key_get_net(&mutable->key), &fl);
+ rt = ip_route_output_key(net, &fl);
+ *saddr = fl.saddr;
+ return rt;
#endif
}
struct ovs_key_ipv4_tunnel tun_key;
int sent_len = 0;
int tunnel_hlen;
- __be16 frag_off = 0;
+ __be16 frag_off;
__be32 daddr;
__be32 saddr;
u8 ttl;
saddr = OVS_CB(skb)->tun_key->ipv4_src;
tos = OVS_CB(skb)->tun_key->ipv4_tos;
ttl = OVS_CB(skb)->tun_key->ipv4_ttl;
+ frag_off = OVS_CB(skb)->tun_key->tun_flags &
+ OVS_TNL_F_DONT_FRAGMENT ? htons(IP_DF) : 0;
} else {
u8 inner_tos;
daddr = mutable->key.daddr;
#endif
}
+ frag_off = mutable->flags & TNL_F_DF_DEFAULT ? htons(IP_DF) : 0;
}
/* Route lookup */
- rt = find_route(mutable, saddr, daddr, tnl_vport->tnl_ops->ipproto, tos);
+ rt = find_route(port_key_get_net(&mutable->key), &saddr, daddr,
+ tnl_vport->tnl_ops->ipproto, tos);
if (IS_ERR(rt))
goto error_free;
goto err_free_rt;
}
- /* MTU */
- if (unlikely(!check_mtu(skb, vport, mutable, rt, &frag_off, tunnel_hlen))) {
- err = VPORT_E_TX_DROPPED;
- goto err_free_rt;
- }
-
/* TTL Fixup. */
if (!OVS_CB(skb)->tun_key->ipv4_dst) {
if (!(mutable->flags & TNL_F_TTL_INHERIT)) {
iph->version = 4;
iph->ihl = sizeof(struct iphdr) >> 2;
iph->protocol = tnl_vport->tnl_ops->ipproto;
- iph->daddr = rt->rt_dst;
- iph->saddr = rt->rt_src;
+ iph->daddr = daddr;
+ iph->saddr = saddr;
iph->tos = tos;
iph->ttl = ttl;
iph->frag_off = frag_off;
[OVS_TUNNEL_ATTR_IN_KEY] = { .type = NLA_U64 },
[OVS_TUNNEL_ATTR_TOS] = { .type = NLA_U8 },
[OVS_TUNNEL_ATTR_TTL] = { .type = NLA_U8 },
+ [OVS_TUNNEL_ATTR_DST_PORT] = { .type = NLA_U16 },
};
/* Sets OVS_TUNNEL_ATTR_* fields in 'mutable', which must initially be
if (err)
return err;
- if (!a[OVS_TUNNEL_ATTR_FLAGS] || !a[OVS_TUNNEL_ATTR_DST_IPV4])
- return -EINVAL;
+ /* Process attributes possibly useful for null_ports first */
+ if (a[OVS_TUNNEL_ATTR_DST_PORT])
+ mutable->dst_port =
+ htons(nla_get_u16(a[OVS_TUNNEL_ATTR_DST_PORT]));
- mutable->flags = nla_get_u32(a[OVS_TUNNEL_ATTR_FLAGS]) & TNL_F_PUBLIC;
- mutable->key.daddr = nla_get_be32(a[OVS_TUNNEL_ATTR_DST_IPV4]);
+ if (a[OVS_TUNNEL_ATTR_DST_IPV4])
+ mutable->key.daddr = nla_get_be32(a[OVS_TUNNEL_ATTR_DST_IPV4]);
+
+ /* Skip the rest if configuring a null_port */
+ if (!mutable->key.daddr)
+ goto out;
+
+ if (a[OVS_TUNNEL_ATTR_FLAGS])
+ mutable->flags = nla_get_u32(a[OVS_TUNNEL_ATTR_FLAGS])
+ & TNL_F_PUBLIC;
if (a[OVS_TUNNEL_ATTR_SRC_IPV4]) {
if (ipv4_is_multicast(mutable->key.daddr))
if (ipv4_is_multicast(mutable->key.daddr)) {
struct net_device *dev;
struct rtable *rt;
+ __be32 saddr = mutable->key.saddr;
- rt = find_route(mutable, mutable->key.saddr, mutable->key.daddr,
- tnl_ops->ipproto, mutable->tos);
+ rt = find_route(port_key_get_net(&mutable->key),
+ &saddr, mutable->key.daddr,
+ tnl_ops->ipproto, mutable->tos);
if (IS_ERR(rt))
return -EADDRNOTAVAIL;
dev = rt_dst(rt).dev;
goto error_free_vport;
}
- random_ether_addr(mutable->eth_addr);
-
get_random_bytes(&initial_frag_id, sizeof(int));
atomic_set(&tnl_vport->frag_id, initial_frag_id);
goto error;
}
- /* Copy fields whose values should be retained. */
- mutable->seq = old_mutable->seq + 1;
- memcpy(mutable->eth_addr, old_mutable->eth_addr, ETH_ALEN);
-
/* Parse the others configured by userspace. */
err = tnl_set_config(ovs_dp_get_net(vport->dp), options, tnl_vport->tnl_ops,
vport, mutable);
const struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
const struct tnl_mutable_config *mutable = rcu_dereference_rtnl(tnl_vport->mutable);
- if (nla_put_u32(skb, OVS_TUNNEL_ATTR_FLAGS,
- mutable->flags & TNL_F_PUBLIC) ||
- nla_put_be32(skb, OVS_TUNNEL_ATTR_DST_IPV4, mutable->key.daddr))
+ if (mutable->dst_port && nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT,
+ ntohs(mutable->dst_port)))
goto nla_put_failure;
+ /* Skip the rest for null_ports */
+ if (!mutable->key.daddr)
+ return 0;
+
+ if (nla_put_be32(skb, OVS_TUNNEL_ATTR_DST_IPV4, mutable->key.daddr))
+ goto nla_put_failure;
+ if (nla_put_u32(skb, OVS_TUNNEL_ATTR_FLAGS,
+ mutable->flags & TNL_F_PUBLIC))
+ goto nla_put_failure;
if (!(mutable->flags & TNL_F_IN_KEY_MATCH) &&
nla_put_be64(skb, OVS_TUNNEL_ATTR_IN_KEY, mutable->key.in_key))
goto nla_put_failure;
call_rcu(&tnl_vport->rcu, free_port_rcu);
}
-int ovs_tnl_set_addr(struct vport *vport, const unsigned char *addr)
-{
- struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
- struct tnl_mutable_config *old_mutable, *mutable;
-
- old_mutable = rtnl_dereference(tnl_vport->mutable);
- mutable = kmemdup(old_mutable, sizeof(struct tnl_mutable_config), GFP_KERNEL);
- if (!mutable)
- return -ENOMEM;
-
- old_mutable->mlink = 0;
-
- memcpy(mutable->eth_addr, addr, ETH_ALEN);
- assign_config_rcu(vport, mutable);
-
- return 0;
-}
-
const char *ovs_tnl_get_name(const struct vport *vport)
{
const struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
return tnl_vport->name;
}
-const unsigned char *ovs_tnl_get_addr(const struct vport *vport)
-{
- const struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
- return rcu_dereference_rtnl(tnl_vport->mutable)->eth_addr;
-}
-
void ovs_tnl_free_linked_skbs(struct sk_buff *skb)
{
while (skb) {