#include "datapath.h"
#include "table.h"
#include "tunnel.h"
+#include "vlan.h"
#include "vport.h"
#include "vport-generic.h"
#include "vport-internal_dev.h"
* Modifies 'target' to store the rcu_dereferenced pointer that was used to do
* the comparision.
*/
-static int port_cmp(const struct tbl_node *node, void *target)
+static int port_cmp(const struct tbl_node *node, void *target, int unused)
{
const struct tnl_vport *tnl_vport = tnl_vport_table_cast(node);
struct port_lookup_key *lookup = target;
struct tbl *new_table;
new_table = tbl_expand(cur_table);
- if (IS_ERR(new_table))
- return PTR_ERR(new_table);
-
- rcu_assign_pointer(port_table, new_table);
- tbl_deferred_destroy(cur_table, NULL);
+ if (IS_ERR(new_table)) {
+ if (PTR_ERR(new_table) != -ENOSPC)
+ return PTR_ERR(new_table);
+ } else {
+ rcu_assign_pointer(port_table, new_table);
+ tbl_deferred_destroy(cur_table, NULL);
+ }
}
err = tbl_insert(rtnl_dereference(port_table), &tnl_vport->tbl_node,
lookup.tunnel_type = tunnel_type & ~TNL_T_KEY_MATCH;
if (key_local_remote_ports) {
- tbl_node = tbl_lookup(table, &lookup, port_hash(&lookup), port_cmp);
+ tbl_node = tbl_lookup(table, &lookup, sizeof(lookup),
+ port_hash(&lookup), port_cmp);
if (tbl_node)
goto found;
}
if (key_remote_ports) {
lookup.saddr = 0;
- tbl_node = tbl_lookup(table, &lookup, port_hash(&lookup), port_cmp);
+ tbl_node = tbl_lookup(table, &lookup, sizeof(lookup),
+ port_hash(&lookup), port_cmp);
if (tbl_node)
goto found;
lookup.tunnel_type = tunnel_type & ~TNL_T_KEY_EXACT;
if (local_remote_ports) {
- tbl_node = tbl_lookup(table, &lookup, port_hash(&lookup), port_cmp);
+ tbl_node = tbl_lookup(table, &lookup, sizeof(lookup),
+ port_hash(&lookup), port_cmp);
if (tbl_node)
goto found;
}
if (remote_ports) {
lookup.saddr = 0;
- tbl_node = tbl_lookup(table, &lookup, port_hash(&lookup), port_cmp);
+ tbl_node = tbl_lookup(table, &lookup, sizeof(lookup),
+ port_hash(&lookup), port_cmp);
if (tbl_node)
goto found;
}
return tnl_vport_to_vport(tnl_vport_table_cast(tbl_node));
}
-static inline void ecn_decapsulate(struct sk_buff *skb)
+static void ecn_decapsulate(struct sk_buff *skb, u8 tos)
{
- /* This is accessing the outer IP header of the tunnel, which we've
- * already validated to be OK. skb->data is currently set to the start
- * of the inner Ethernet header, and we've validated ETH_HLEN.
- */
- if (unlikely(INET_ECN_is_ce(ip_hdr(skb)->tos))) {
+ if (unlikely(INET_ECN_is_ce(tos))) {
__be16 protocol = skb->protocol;
skb_set_network_header(skb, ETH_HLEN);
- if (skb->protocol == htons(ETH_P_8021Q)) {
+ if (protocol == htons(ETH_P_8021Q)) {
if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN)))
return;
}
}
-/* Called with rcu_read_lock. */
-void tnl_rcv(struct vport *vport, struct sk_buff *skb)
+/**
+ * tnl_rcv - ingress point for generic tunnel code
+ *
+ * @vport: port this packet was received on
+ * @skb: received packet
+ * @tos: ToS from encapsulating IP packet, used to copy ECN bits
+ *
+ * Must be called with rcu_read_lock.
+ *
+ * Packets received by this function are in the following state:
+ * - skb->data points to the inner Ethernet header.
+ * - The inner Ethernet header is in the linear data area.
+ * - skb->csum does not include the inner Ethernet header.
+ * - The layer pointers are undefined.
+ */
+void tnl_rcv(struct vport *vport, struct sk_buff *skb, u8 tos)
{
- /* Packets received by this function are in the following state:
- * - skb->data points to the inner Ethernet header.
- * - The inner Ethernet header is in the linear data area.
- * - skb->csum does not include the inner Ethernet header.
- * - The layer pointers point at the outer headers.
- */
+ struct ethhdr *eh;
- struct ethhdr *eh = (struct ethhdr *)skb->data;
+ skb_reset_mac_header(skb);
+ eh = eth_hdr(skb);
if (likely(ntohs(eh->h_proto) >= 1536))
skb->protocol = eh->h_proto;
skb_dst_drop(skb);
nf_reset(skb);
+ skb_clear_rxhash(skb);
secpath_reset(skb);
- ecn_decapsulate(skb);
- compute_ip_summed(skb, false);
+ ecn_decapsulate(skb, tos);
+ vlan_set_tci(skb, 0);
+
+ if (unlikely(compute_ip_summed(skb, false))) {
+ kfree_skb(skb);
+ return;
+ }
vport_receive(vport, skb);
}
vh->h_vlan_TCI = vlan_eth_hdr(skb)->h_vlan_TCI;
vh->h_vlan_encapsulated_proto = skb->protocol;
- }
+ } else
+ vlan_set_tci(nskb, vlan_get_tci(skb));
skb_reset_mac_header(nskb);
/* Protocol */
(TNL_F_IN_KEY_MATCH | TNL_F_OUT_KEY_ACTION))
OVS_CB(nskb)->tun_id = flow_key;
- compute_ip_summed(nskb, false);
+ if (unlikely(compute_ip_summed(nskb, false))) {
+ kfree_skb(nskb);
+ return false;
+ }
+
vport_receive(vport, nskb);
return true;
const struct tnl_mutable_config *mutable,
const struct rtable *rt, __be16 *frag_offp)
{
+ bool df_inherit = mutable->flags & TNL_F_DF_INHERIT;
bool pmtud = mutable->flags & TNL_F_PMTUD;
- __be16 frag_off = 0;
+ __be16 frag_off = mutable->flags & TNL_F_DF_DEFAULT ? htons(IP_DF) : 0;
int mtu = 0;
+ unsigned int packet_length = skb->len - ETH_HLEN;
+
+ /* Allow for one level of tagging in the packet length. */
+ if (!vlan_tx_tag_present(skb) &&
+ eth_hdr(skb)->h_proto == htons(ETH_P_8021Q))
+ packet_length -= VLAN_HLEN;
if (pmtud) {
- frag_off = htons(IP_DF);
+ int vlan_header = 0;
+
+ /* The tag needs to go in packet regardless of where it
+ * currently is, so subtract it from the MTU.
+ */
+ if (vlan_tx_tag_present(skb) ||
+ eth_hdr(skb)->h_proto == htons(ETH_P_8021Q))
+ vlan_header = VLAN_HLEN;
mtu = dst_mtu(&rt_dst(rt))
- ETH_HLEN
- mutable->tunnel_hlen
- - (eth_hdr(skb)->h_proto == htons(ETH_P_8021Q) ?
- VLAN_HLEN : 0);
+ - vlan_header;
}
if (skb->protocol == htons(ETH_P_IP)) {
struct iphdr *iph = ip_hdr(skb);
- frag_off |= iph->frag_off & htons(IP_DF);
+ if (df_inherit)
+ frag_off = iph->frag_off & htons(IP_DF);
if (pmtud && iph->frag_off & htons(IP_DF)) {
mtu = max(mtu, IP_MIN_MTU);
- if (ntohs(iph->tot_len) > mtu &&
+ if (packet_length > mtu &&
tnl_frag_needed(vport, mutable, skb, mtu,
OVS_CB(skb)->tun_id))
return false;
}
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
else if (skb->protocol == htons(ETH_P_IPV6)) {
- unsigned int packet_length = skb->len - ETH_HLEN
- - (eth_hdr(skb)->h_proto == htons(ETH_P_8021Q) ?
- VLAN_HLEN : 0);
-
- /* IPv6 requires PMTUD if the packet is above the minimum MTU. */
- if (packet_length > IPV6_MIN_MTU)
+ /* IPv6 requires end hosts to do fragmentation
+ * if the packet is above the minimum MTU.
+ */
+ if (df_inherit && packet_length > IPV6_MIN_MTU)
frag_off = htons(IP_DF);
if (pmtud) {
iph->saddr = rt->rt_src;
iph->ttl = mutable->ttl;
if (!iph->ttl)
- iph->ttl = dst_metric(&rt_dst(rt), RTAX_HOPLIMIT);
+ iph->ttl = ip4_dst_hoplimit(&rt_dst(rt));
tnl_vport->tnl_ops->build_header(vport, mutable, iph + 1);
}
struct sk_buff *skb;
bool is_frag;
int err;
+ int flow_key_len;
dst_vport = internal_dev_get_vport(rt_dst(rt).dev);
if (!dst_vport)
__skb_put(skb, cache->len);
memcpy(skb->data, get_cached_header(cache), cache->len);
- err = flow_extract(skb, dst_vport->port_no, &flow_key, &is_frag);
+ err = flow_extract(skb, dst_vport->port_no, &flow_key,
+ &flow_key_len, &is_frag);
- kfree_skb(skb);
+ consume_skb(skb);
if (err || is_frag)
goto done;
flow_node = tbl_lookup(rcu_dereference(dst_vport->dp->table),
- &flow_key, flow_hash(&flow_key),
+ &flow_key, flow_key_len,
+ flow_hash(&flow_key, flow_key_len),
flow_cmp);
if (flow_node) {
struct sw_flow *flow = flow_cast(flow_node);
return cur_cache->rt;
} else {
struct rtable *rt;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39)
struct flowi fl = { .nl_u = { .ip4_u =
{ .daddr = mutable->daddr,
.saddr = mutable->saddr,
if (unlikely(ip_route_output_key(&init_net, &rt, &fl)))
return NULL;
+#else
+ struct flowi4 fl = { .daddr = mutable->daddr,
+ .saddr = mutable->saddr,
+ .flowi4_tos = tos,
+ .flowi4_proto = tnl_vport->tnl_ops->ipproto };
+
+ rt = ip_route_output_key(&init_net, &fl);
+ if (IS_ERR(rt))
+ return NULL;
+#endif
if (likely(tos == mutable->tos))
*cache = build_cache(vport, mutable, rt);
}
}
-static struct sk_buff *check_headroom(struct sk_buff *skb, int headroom)
-{
- if (skb_headroom(skb) < headroom || skb_header_cloned(skb)) {
- struct sk_buff *nskb = skb_realloc_headroom(skb, headroom + 16);
- if (unlikely(!nskb)) {
- kfree_skb(skb);
- return ERR_PTR(-ENOMEM);
- }
-
- set_skb_csum_bits(skb, nskb);
-
- if (skb->sk)
- skb_set_owner_w(nskb, skb->sk);
-
- kfree_skb(skb);
- return nskb;
- }
-
- return skb;
-}
-
static inline bool need_linearize(const struct sk_buff *skb)
{
int i;
* change them from underneath us and we can skip the linearization.
*/
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- if (unlikely(page_count(skb_shinfo(skb)->frags[0].page) > 1))
+ if (unlikely(page_count(skb_shinfo(skb)->frags[i].page) > 1))
return true;
return false;
int min_headroom;
int err;
- forward_ip_summed(skb);
-
- err = vswitch_skb_checksum_setup(skb);
- if (unlikely(err))
- goto error_free;
-
min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len
- + mutable->tunnel_hlen;
+ + mutable->tunnel_hlen
+ + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
+
+ if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
+ int head_delta = SKB_DATA_ALIGN(min_headroom -
+ skb_headroom(skb) +
+ 16);
+ err = pskb_expand_head(skb, max_t(int, head_delta, 0),
+ 0, GFP_ATOMIC);
+ if (unlikely(err))
+ goto error_free;
+ }
+
+ forward_ip_summed(skb, true);
if (skb_is_gso(skb)) {
struct sk_buff *nskb;
- /*
- * If we are doing GSO on a pskb it is better to make sure that
- * the headroom is correct now. We will only have to copy the
- * portion in the linear data area and GSO will preserve
- * headroom when it creates the segments. This is particularly
- * beneficial on Xen where we get a lot of GSO pskbs.
- * Conversely, we avoid copying if it is just to get our own
- * writable clone because GSO will do the copy for us.
- */
- if (skb_headroom(skb) < min_headroom) {
- skb = check_headroom(skb, min_headroom);
- if (IS_ERR(skb)) {
- err = PTR_ERR(skb);
- goto error;
- }
- }
-
nskb = skb_gso_segment(skb, 0);
- kfree_skb(skb);
if (IS_ERR(nskb)) {
+ kfree_skb(skb);
err = PTR_ERR(nskb);
goto error;
}
+ consume_skb(skb);
skb = nskb;
- } else {
- skb = check_headroom(skb, min_headroom);
- if (IS_ERR(skb)) {
- err = PTR_ERR(skb);
- goto error;
- }
-
- if (skb->ip_summed == CHECKSUM_PARTIAL) {
- /*
- * Pages aren't locked and could change at any time.
- * If this happens after we compute the checksum, the
- * checksum will be wrong. We linearize now to avoid
- * this problem.
- */
- if (unlikely(need_linearize(skb))) {
- err = __skb_linearize(skb);
- if (unlikely(err))
- goto error_free;
- }
-
- err = skb_checksum_help(skb);
+ } else if (get_ip_summed(skb) == OVS_CSUM_PARTIAL) {
+ /* Pages aren't locked and could change at any time.
+ * If this happens after we compute the checksum, the
+ * checksum will be wrong. We linearize now to avoid
+ * this problem.
+ */
+ if (unlikely(need_linearize(skb))) {
+ err = __skb_linearize(skb);
if (unlikely(err))
goto error_free;
- } else if (skb->ip_summed == CHECKSUM_COMPLETE)
- skb->ip_summed = CHECKSUM_NONE;
+ }
+
+ err = skb_checksum_help(skb);
+ if (unlikely(err))
+ goto error_free;
}
+ set_ip_summed(skb, OVS_CSUM_NONE);
+
return skb;
error_free:
const struct tnl_mutable_config *mutable)
{
int sent_len;
- int err;
sent_len = 0;
while (skb) {
struct sk_buff *next = skb->next;
int frag_len = skb->len - mutable->tunnel_hlen;
+ int err;
skb->next = NULL;
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
err = ip_local_out(skb);
- if (likely(net_xmit_eval(err) == 0))
- sent_len += frag_len;
- else {
- skb = next;
- goto free_frags;
- }
-
skb = next;
+ if (unlikely(net_xmit_eval(err)))
+ goto free_frags;
+ sent_len += frag_len;
}
return sent_len;
u8 tos;
/* Validate the protocol headers before we try to use them. */
- if (skb->protocol == htons(ETH_P_8021Q)) {
+ if (skb->protocol == htons(ETH_P_8021Q) &&
+ !vlan_tx_tag_present(skb)) {
if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN)))
goto error_free;
nf_reset(skb);
secpath_reset(skb);
skb_dst_drop(skb);
+ skb_clear_rxhash(skb);
/* Offloading */
skb = handle_offloads(skb, mutable, rt);
/* TTL */
ttl = mutable->ttl;
if (!ttl)
- ttl = dst_metric(&rt_dst(rt), RTAX_HOPLIMIT);
+ ttl = ip4_dst_hoplimit(&rt_dst(rt));
if (mutable->flags & TNL_F_TTL_INHERIT) {
if (skb->protocol == htons(ETH_P_IP))
struct sk_buff *next_skb = skb->next;
skb->next = NULL;
+ if (unlikely(vlan_deaccel_tag(skb)))
+ goto next;
+
if (likely(cache)) {
skb_push(skb, cache->len);
memcpy(skb->data, get_cached_header(cache), cache->len);
ip_send_check(iph);
if (cache_vport) {
+ if (unlikely(compute_ip_summed(skb, true))) {
+ kfree_skb(skb);
+ goto next;
+ }
+
OVS_CB(skb)->flow = cache->flow;
- compute_ip_summed(skb, true);
vport_receive(cache_vport, skb);
sent_len += orig_len;
} else {
error_free:
tnl_free_linked_skbs(skb);
error:
- dst_release(unattached_dst);
vport_record_error(vport, err);
out:
+ dst_release(unattached_dst);
return sent_len;
}
if (a[ODP_TUNNEL_ATTR_TTL])
mutable->ttl = nla_get_u8(a[ODP_TUNNEL_ATTR_TTL]);
- mutable->tunnel_hlen = tnl_ops->hdr_len(mutable);
- if (mutable->tunnel_hlen < 0)
- return mutable->tunnel_hlen;
-
- mutable->tunnel_hlen += sizeof(struct iphdr);
-
mutable->tunnel_type = tnl_ops->tunnel_type;
if (!a[ODP_TUNNEL_ATTR_IN_KEY]) {
mutable->tunnel_type |= TNL_T_KEY_MATCH;
else
mutable->out_key = nla_get_be64(a[ODP_TUNNEL_ATTR_OUT_KEY]);
+ mutable->tunnel_hlen = tnl_ops->hdr_len(mutable);
+ if (mutable->tunnel_hlen < 0)
+ return mutable->tunnel_hlen;
+
+ mutable->tunnel_hlen += sizeof(struct iphdr);
+
old_vport = tnl_find_port(mutable->saddr, mutable->daddr,
mutable->in_key, mutable->tunnel_type,
&old_mutable);
void tnl_free_linked_skbs(struct sk_buff *skb)
{
- if (unlikely(!skb))
- return;
-
while (skb) {
struct sk_buff *next = skb->next;
kfree_skb(skb);