ovs_vport_receive(vport, skb);
}
-static bool check_ipv4_address(__be32 addr)
-{
- if (ipv4_is_multicast(addr) || ipv4_is_lbcast(addr)
- || ipv4_is_loopback(addr) || ipv4_is_zeronet(addr))
- return false;
-
- return true;
-}
-
-static bool ipv4_should_icmp(struct sk_buff *skb)
-{
- struct iphdr *old_iph = ip_hdr(skb);
-
- /* Don't respond to L2 broadcast. */
- if (is_multicast_ether_addr(eth_hdr(skb)->h_dest))
- return false;
-
- /* Don't respond to L3 broadcast or invalid addresses. */
- if (!check_ipv4_address(old_iph->daddr) ||
- !check_ipv4_address(old_iph->saddr))
- return false;
-
- /* Only respond to the first fragment. */
- if (old_iph->frag_off & htons(IP_OFFSET))
- return false;
-
- /* Don't respond to ICMP error messages. */
- if (old_iph->protocol == IPPROTO_ICMP) {
- u8 icmp_type, *icmp_typep;
-
- icmp_typep = skb_header_pointer(skb, (u8 *)old_iph +
- (old_iph->ihl << 2) +
- offsetof(struct icmphdr, type) -
- skb->data, sizeof(icmp_type),
- &icmp_type);
-
- if (!icmp_typep)
- return false;
-
- if (*icmp_typep > NR_ICMP_TYPES
- || (*icmp_typep <= ICMP_PARAMETERPROB
- && *icmp_typep != ICMP_ECHOREPLY
- && *icmp_typep != ICMP_ECHO))
- return false;
- }
-
- return true;
-}
-
-static void ipv4_build_icmp(struct sk_buff *skb, struct sk_buff *nskb,
- unsigned int mtu, unsigned int payload_length)
-{
- struct iphdr *iph, *old_iph = ip_hdr(skb);
- struct icmphdr *icmph;
- u8 *payload;
-
- iph = (struct iphdr *)skb_put(nskb, sizeof(struct iphdr));
- icmph = (struct icmphdr *)skb_put(nskb, sizeof(struct icmphdr));
- payload = skb_put(nskb, payload_length);
-
- /* IP */
- iph->version = 4;
- iph->ihl = sizeof(struct iphdr) >> 2;
- iph->tos = (old_iph->tos & IPTOS_TOS_MASK) |
- IPTOS_PREC_INTERNETCONTROL;
- iph->tot_len = htons(sizeof(struct iphdr)
- + sizeof(struct icmphdr)
- + payload_length);
- get_random_bytes(&iph->id, sizeof(iph->id));
- iph->frag_off = 0;
- iph->ttl = IPDEFTTL;
- iph->protocol = IPPROTO_ICMP;
- iph->daddr = old_iph->saddr;
- iph->saddr = old_iph->daddr;
-
- ip_send_check(iph);
-
- /* ICMP */
- icmph->type = ICMP_DEST_UNREACH;
- icmph->code = ICMP_FRAG_NEEDED;
- icmph->un.gateway = htonl(mtu);
- icmph->checksum = 0;
-
- nskb->csum = csum_partial((u8 *)icmph, sizeof(struct icmphdr), 0);
- nskb->csum = skb_copy_and_csum_bits(skb, (u8 *)old_iph - skb->data,
- payload, payload_length,
- nskb->csum);
- icmph->checksum = csum_fold(nskb->csum);
-}
-
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-static bool ipv6_should_icmp(struct sk_buff *skb)
-{
- struct ipv6hdr *old_ipv6h = ipv6_hdr(skb);
- int addr_type;
- int payload_off = (u8 *)(old_ipv6h + 1) - skb->data;
- u8 nexthdr = ipv6_hdr(skb)->nexthdr;
- __be16 frag_off;
-
- /* Check source address is valid. */
- addr_type = ipv6_addr_type(&old_ipv6h->saddr);
- if (addr_type & IPV6_ADDR_MULTICAST || addr_type == IPV6_ADDR_ANY)
- return false;
-
- /* Don't reply to unspecified addresses. */
- if (ipv6_addr_type(&old_ipv6h->daddr) == IPV6_ADDR_ANY)
- return false;
-
- /* Don't respond to ICMP error messages. */
- payload_off = ipv6_skip_exthdr(skb, payload_off, &nexthdr, &frag_off);
- if (payload_off < 0)
- return false;
-
- if (nexthdr == NEXTHDR_ICMP) {
- u8 icmp_type, *icmp_typep;
-
- icmp_typep = skb_header_pointer(skb, payload_off +
- offsetof(struct icmp6hdr,
- icmp6_type),
- sizeof(icmp_type), &icmp_type);
-
- if (!icmp_typep || !(*icmp_typep & ICMPV6_INFOMSG_MASK))
- return false;
- }
-
- return true;
-}
-
-static void ipv6_build_icmp(struct sk_buff *skb, struct sk_buff *nskb,
- unsigned int mtu, unsigned int payload_length)
-{
- struct ipv6hdr *ipv6h, *old_ipv6h = ipv6_hdr(skb);
- struct icmp6hdr *icmp6h;
- u8 *payload;
-
- ipv6h = (struct ipv6hdr *)skb_put(nskb, sizeof(struct ipv6hdr));
- icmp6h = (struct icmp6hdr *)skb_put(nskb, sizeof(struct icmp6hdr));
- payload = skb_put(nskb, payload_length);
-
- /* IPv6 */
- ipv6h->version = 6;
- ipv6h->priority = 0;
- memset(&ipv6h->flow_lbl, 0, sizeof(ipv6h->flow_lbl));
- ipv6h->payload_len = htons(sizeof(struct icmp6hdr)
- + payload_length);
- ipv6h->nexthdr = NEXTHDR_ICMP;
- ipv6h->hop_limit = IPV6_DEFAULT_HOPLIMIT;
- ipv6h->daddr = old_ipv6h->saddr;
- ipv6h->saddr = old_ipv6h->daddr;
-
- /* ICMPv6 */
- icmp6h->icmp6_type = ICMPV6_PKT_TOOBIG;
- icmp6h->icmp6_code = 0;
- icmp6h->icmp6_cksum = 0;
- icmp6h->icmp6_mtu = htonl(mtu);
-
- nskb->csum = csum_partial((u8 *)icmp6h, sizeof(struct icmp6hdr), 0);
- nskb->csum = skb_copy_and_csum_bits(skb, (u8 *)old_ipv6h - skb->data,
- payload, payload_length,
- nskb->csum);
- icmp6h->icmp6_cksum = csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
- sizeof(struct icmp6hdr)
- + payload_length,
- ipv6h->nexthdr, nskb->csum);
-}
-#endif /* IPv6 */
-
-bool ovs_tnl_frag_needed(struct vport *vport,
- const struct tnl_mutable_config *mutable,
- struct sk_buff *skb, unsigned int mtu)
-{
- unsigned int eth_hdr_len = ETH_HLEN;
- unsigned int total_length = 0, header_length = 0, payload_length;
- struct ethhdr *eh, *old_eh = eth_hdr(skb);
- struct sk_buff *nskb;
-
- /* Sanity check */
- if (skb->protocol == htons(ETH_P_IP)) {
- if (mtu < IP_MIN_MTU)
- return false;
-
- if (!ipv4_should_icmp(skb))
- return true;
- }
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
- else if (skb->protocol == htons(ETH_P_IPV6)) {
- if (mtu < IPV6_MIN_MTU)
- return false;
-
- /*
- * In theory we should do PMTUD on IPv6 multicast messages but
- * we don't have an address to send from so just fragment.
- */
- if (ipv6_addr_type(&ipv6_hdr(skb)->daddr) & IPV6_ADDR_MULTICAST)
- return false;
-
- if (!ipv6_should_icmp(skb))
- return true;
- }
-#endif
- else
- return false;
-
- /* Allocate */
- if (old_eh->h_proto == htons(ETH_P_8021Q))
- eth_hdr_len = VLAN_ETH_HLEN;
-
- payload_length = skb->len - eth_hdr_len;
- if (skb->protocol == htons(ETH_P_IP)) {
- header_length = sizeof(struct iphdr) + sizeof(struct icmphdr);
- total_length = min_t(unsigned int, header_length +
- payload_length, 576);
- }
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
- else {
- header_length = sizeof(struct ipv6hdr) +
- sizeof(struct icmp6hdr);
- total_length = min_t(unsigned int, header_length +
- payload_length, IPV6_MIN_MTU);
- }
-#endif
-
- payload_length = total_length - header_length;
-
- nskb = dev_alloc_skb(NET_IP_ALIGN + eth_hdr_len + header_length +
- payload_length);
- if (!nskb)
- return false;
-
- skb_reserve(nskb, NET_IP_ALIGN);
-
- /* Ethernet / VLAN */
- eh = (struct ethhdr *)skb_put(nskb, eth_hdr_len);
- memcpy(eh->h_dest, old_eh->h_source, ETH_ALEN);
- memcpy(eh->h_source, mutable->eth_addr, ETH_ALEN);
- nskb->protocol = eh->h_proto = old_eh->h_proto;
- if (old_eh->h_proto == htons(ETH_P_8021Q)) {
- struct vlan_ethhdr *vh = (struct vlan_ethhdr *)eh;
-
- vh->h_vlan_TCI = vlan_eth_hdr(skb)->h_vlan_TCI;
- vh->h_vlan_encapsulated_proto = skb->protocol;
- } else
- vlan_set_tci(nskb, vlan_get_tci(skb));
- skb_reset_mac_header(nskb);
-
- /* Protocol */
- if (skb->protocol == htons(ETH_P_IP))
- ipv4_build_icmp(skb, nskb, mtu, payload_length);
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
- else
- ipv6_build_icmp(skb, nskb, mtu, payload_length);
-#endif
-
- if (unlikely(compute_ip_summed(nskb, false))) {
- kfree_skb(nskb);
- return false;
- }
-
- ovs_vport_receive(vport, nskb);
-
- return true;
-}
-
-static bool check_mtu(struct sk_buff *skb,
- struct vport *vport,
- const struct tnl_mutable_config *mutable,
- const struct rtable *rt, __be16 *frag_offp,
- int tunnel_hlen)
-{
- bool df_inherit;
- bool pmtud;
- __be16 frag_off;
- int mtu = 0;
- unsigned int packet_length = skb->len - ETH_HLEN;
-
- if (OVS_CB(skb)->tun_key->ipv4_dst) {
- df_inherit = false;
- pmtud = false;
- frag_off = OVS_CB(skb)->tun_key->tun_flags & OVS_TNL_F_DONT_FRAGMENT ?
- htons(IP_DF) : 0;
- } else {
- df_inherit = mutable->flags & TNL_F_DF_INHERIT;
- pmtud = mutable->flags & TNL_F_PMTUD;
- frag_off = mutable->flags & TNL_F_DF_DEFAULT ? htons(IP_DF) : 0;
- }
-
- /* Allow for one level of tagging in the packet length. */
- if (!vlan_tx_tag_present(skb) &&
- eth_hdr(skb)->h_proto == htons(ETH_P_8021Q))
- packet_length -= VLAN_HLEN;
-
- if (pmtud) {
- int vlan_header = 0;
-
- /* The tag needs to go in packet regardless of where it
- * currently is, so subtract it from the MTU.
- */
- if (vlan_tx_tag_present(skb) ||
- eth_hdr(skb)->h_proto == htons(ETH_P_8021Q))
- vlan_header = VLAN_HLEN;
-
- mtu = dst_mtu(&rt_dst(rt))
- - ETH_HLEN
- - tunnel_hlen
- - vlan_header;
- }
-
- if (skb->protocol == htons(ETH_P_IP)) {
- struct iphdr *iph = ip_hdr(skb);
-
- if (df_inherit)
- frag_off = iph->frag_off & htons(IP_DF);
-
- if (pmtud && iph->frag_off & htons(IP_DF)) {
- mtu = max(mtu, IP_MIN_MTU);
-
- if (packet_length > mtu &&
- ovs_tnl_frag_needed(vport, mutable, skb, mtu))
- return false;
- }
- }
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
- else if (skb->protocol == htons(ETH_P_IPV6)) {
- /* IPv6 requires end hosts to do fragmentation
- * if the packet is above the minimum MTU.
- */
- if (df_inherit && packet_length > IPV6_MIN_MTU)
- frag_off = htons(IP_DF);
-
- if (pmtud) {
- mtu = max(mtu, IPV6_MIN_MTU);
-
- if (packet_length > mtu &&
- ovs_tnl_frag_needed(vport, mutable, skb, mtu))
- return false;
- }
- }
-#endif
-
- *frag_offp = frag_off;
- return true;
-}
-
static struct rtable *find_route(struct net *net,
__be32 *saddr, __be32 daddr, u8 ipproto,
u8 tos)
struct ovs_key_ipv4_tunnel tun_key;
int sent_len = 0;
int tunnel_hlen;
- __be16 frag_off = 0;
+ __be16 frag_off;
__be32 daddr;
__be32 saddr;
u8 ttl;
saddr = OVS_CB(skb)->tun_key->ipv4_src;
tos = OVS_CB(skb)->tun_key->ipv4_tos;
ttl = OVS_CB(skb)->tun_key->ipv4_ttl;
+ frag_off = OVS_CB(skb)->tun_key->tun_flags &
+ OVS_TNL_F_DONT_FRAGMENT ? htons(IP_DF) : 0;
} else {
u8 inner_tos;
daddr = mutable->key.daddr;
#endif
}
+ frag_off = mutable->flags & TNL_F_DF_DEFAULT ? htons(IP_DF) : 0;
}
/* Route lookup */
goto err_free_rt;
}
- /* MTU */
- if (unlikely(!check_mtu(skb, vport, mutable, rt, &frag_off, tunnel_hlen))) {
- err = VPORT_E_TX_DROPPED;
- goto err_free_rt;
- }
-
/* TTL Fixup. */
if (!OVS_CB(skb)->tun_key->ipv4_dst) {
if (!(mutable->flags & TNL_F_TTL_INHERIT)) {
goto error_free_vport;
}
- random_ether_addr(mutable->eth_addr);
-
get_random_bytes(&initial_frag_id, sizeof(int));
atomic_set(&tnl_vport->frag_id, initial_frag_id);
goto error;
}
- /* Copy fields whose values should be retained. */
- mutable->seq = old_mutable->seq + 1;
- memcpy(mutable->eth_addr, old_mutable->eth_addr, ETH_ALEN);
-
/* Parse the others configured by userspace. */
err = tnl_set_config(ovs_dp_get_net(vport->dp), options, tnl_vport->tnl_ops,
vport, mutable);
call_rcu(&tnl_vport->rcu, free_port_rcu);
}
-int ovs_tnl_set_addr(struct vport *vport, const unsigned char *addr)
-{
- struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
- struct tnl_mutable_config *old_mutable, *mutable;
-
- old_mutable = rtnl_dereference(tnl_vport->mutable);
- mutable = kmemdup(old_mutable, sizeof(struct tnl_mutable_config), GFP_KERNEL);
- if (!mutable)
- return -ENOMEM;
-
- old_mutable->mlink = 0;
-
- memcpy(mutable->eth_addr, addr, ETH_ALEN);
- assign_config_rcu(vport, mutable);
-
- return 0;
-}
-
const char *ovs_tnl_get_name(const struct vport *vport)
{
const struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
return tnl_vport->name;
}
-const unsigned char *ovs_tnl_get_addr(const struct vport *vport)
-{
- const struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
- return rcu_dereference_rtnl(tnl_vport->mutable)->eth_addr;
-}
-
void ovs_tnl_free_linked_skbs(struct sk_buff *skb)
{
while (skb) {