X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=datapath%2Factions.c;h=bef7d108c3e7b1ef61f53824bbd426bbea925a22;hb=a0bc29a541fc7dc6e20137d5558e2094d614e6ab;hp=a037e4320f218adcfbd9f5df29094ed5b6d7d1d0;hpb=a14bc59fb8f27db193d74662dc9c5cb8237177ef;p=sliver-openvswitch.git diff --git a/datapath/actions.c b/datapath/actions.c index a037e4320..bef7d108c 100644 --- a/datapath/actions.c +++ b/datapath/actions.c @@ -1,6 +1,6 @@ /* * Distributed under the terms of the GNU GPL version 2. - * Copyright (c) 2007, 2008, 2009 Nicira Networks. + * Copyright (c) 2007, 2008, 2009, 2010 Nicira Networks. * * Significant portions of this file may be copied from parts of the Linux * kernel, by Linus Torvalds and others. @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include "datapath.h" @@ -22,12 +23,28 @@ #include "actions.h" #include "openvswitch/datapath-protocol.h" -struct sk_buff * -make_writable(struct sk_buff *skb, gfp_t gfp) +static struct sk_buff * +make_writable(struct sk_buff *skb, unsigned min_headroom, gfp_t gfp) { if (skb_shared(skb) || skb_cloned(skb)) { - struct sk_buff *nskb = skb_copy(skb, gfp); + struct sk_buff *nskb; + unsigned headroom = max(min_headroom, skb_headroom(skb)); + + nskb = skb_copy_expand(skb, headroom, skb_tailroom(skb), gfp); if (nskb) { +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) + /* Before 2.6.24 these fields were not copied when + * doing an skb_copy_expand. */ + nskb->ip_summed = skb->ip_summed; + nskb->csum = skb->csum; +#endif +#if defined(CONFIG_XEN) && defined(HAVE_PROTO_DATA_VALID) + /* These fields are copied in skb_clone but not in + * skb_copy or related functions. We need to manually + * copy them over here. */ + nskb->proto_data_valid = skb->proto_data_valid; + nskb->proto_csum_blank = skb->proto_csum_blank; +#endif kfree_skb(skb); return nskb; } @@ -48,11 +65,14 @@ vlan_pull_tag(struct sk_buff *skb) struct vlan_ethhdr *vh = vlan_eth_hdr(skb); struct ethhdr *eh; - /* Verify we were given a vlan packet */ if (vh->h_vlan_proto != htons(ETH_P_8021Q)) return skb; + if (OVS_CB(skb)->ip_summed == OVS_CSUM_COMPLETE) + skb->csum = csum_sub(skb->csum, csum_partial(skb->data + + ETH_HLEN, VLAN_HLEN, 0)); + memmove(skb->data + VLAN_HLEN, skb->data, 2 * VLAN_ETH_ALEN); eh = (struct ethhdr *)skb_pull(skb, VLAN_HLEN); @@ -74,20 +94,30 @@ modify_vlan_tci(struct datapath *dp, struct sk_buff *skb, if (a->type == ODPAT_SET_VLAN_VID) { tci = ntohs(a->vlan_vid.vlan_vid); mask = VLAN_VID_MASK; - key->dl_vlan = htons(tci & mask); + key->dl_vlan = a->vlan_vid.vlan_vid; } else { - tci = a->vlan_pcp.vlan_pcp << 13; + tci = a->vlan_pcp.vlan_pcp << VLAN_PCP_SHIFT; mask = VLAN_PCP_MASK; + key->dl_vlan_pcp = a->vlan_pcp.vlan_pcp; } - skb = make_writable(skb, gfp); + skb = make_writable(skb, VLAN_HLEN, gfp); if (!skb) return ERR_PTR(-ENOMEM); if (skb->protocol == htons(ETH_P_8021Q)) { /* Modify vlan id, but maintain other TCI values */ struct vlan_ethhdr *vh = vlan_eth_hdr(skb); + __be16 old_tci = vh->h_vlan_TCI; + vh->h_vlan_TCI = htons((ntohs(vh->h_vlan_TCI) & ~mask) | tci); + + if (OVS_CB(skb)->ip_summed == OVS_CSUM_COMPLETE) { + __be16 diff[] = { ~old_tci, vh->h_vlan_TCI }; + + skb->csum = ~csum_partial((char *)diff, sizeof(diff), + ~skb->csum); + } } else { /* Add vlan header */ @@ -96,7 +126,7 @@ modify_vlan_tci(struct datapath *dp, struct sk_buff *skb, * when we send the packet out on the wire, and it will fail at * that point because skb_checksum_setup() will not look inside * an 802.1Q header. */ - skb_checksum_setup(skb); + vswitch_skb_checksum_setup(skb); /* GSO is not implemented for packets with an 802.1Q header, so * we have to do segmentation before we add that header. @@ -127,6 +157,9 @@ modify_vlan_tci(struct datapath *dp, struct sk_buff *skb, segs->next = NULL; + /* GSO can change the checksum type so update.*/ + compute_ip_summed(segs, true); + segs = __vlan_put_tag(segs, tci); err = -ENOMEM; if (segs) { @@ -150,6 +183,7 @@ modify_vlan_tci(struct datapath *dp, struct sk_buff *skb, } while (segs->next); skb = segs; + compute_ip_summed(skb, true); } /* The hardware-accelerated version of vlan_put_tag() works @@ -160,6 +194,12 @@ modify_vlan_tci(struct datapath *dp, struct sk_buff *skb, skb = __vlan_put_tag(skb, tci); if (!skb) return ERR_PTR(-ENOMEM); + + /* GSO doesn't fix up the hardware computed checksum so this + * will only be hit in the non-GSO case. */ + if (OVS_CB(skb)->ip_summed == OVS_CSUM_COMPLETE) + skb->csum = csum_add(skb->csum, csum_partial(skb->data + + ETH_HLEN, VLAN_HLEN, 0)); } return skb; @@ -168,7 +208,7 @@ modify_vlan_tci(struct datapath *dp, struct sk_buff *skb, static struct sk_buff *strip_vlan(struct sk_buff *skb, struct odp_flow_key *key, gfp_t gfp) { - skb = make_writable(skb, gfp); + skb = make_writable(skb, 0, gfp); if (skb) { vlan_pull_tag(skb); key->dl_vlan = htons(ODP_VLAN_NONE); @@ -177,14 +217,20 @@ static struct sk_buff *strip_vlan(struct sk_buff *skb, } static struct sk_buff *set_dl_addr(struct sk_buff *skb, + struct odp_flow_key *key, const struct odp_action_dl_addr *a, gfp_t gfp) { - skb = make_writable(skb, gfp); + skb = make_writable(skb, 0, gfp); if (skb) { struct ethhdr *eh = eth_hdr(skb); - memcpy(a->type == ODPAT_SET_DL_SRC ? eh->h_source : eh->h_dest, - a->dl_addr, ETH_ALEN); + if (a->type == ODPAT_SET_DL_SRC) { + memcpy(eh->h_source, a->dl_addr, ETH_ALEN); + memcpy(key->dl_src, a->dl_addr, ETH_ALEN); + } else { + memcpy(eh->h_dest, a->dl_addr, ETH_ALEN); + memcpy(key->dl_dst, a->dl_addr, ETH_ALEN); + } } return skb; } @@ -197,10 +243,11 @@ static void update_csum(__sum16 *sum, struct sk_buff *skb, __be32 from, __be32 to, int pseudohdr) { __be32 diff[] = { ~from, to }; - if (skb->ip_summed != CHECKSUM_PARTIAL) { + + if (OVS_CB(skb)->ip_summed != OVS_CSUM_PARTIAL) { *sum = csum_fold(csum_partial((char *)diff, sizeof(diff), ~csum_unfold(*sum))); - if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr) + if (OVS_CB(skb)->ip_summed == OVS_CSUM_COMPLETE && pseudohdr) skb->csum = ~csum_partial((char *)diff, sizeof(diff), ~skb->csum); } else if (pseudohdr) @@ -216,7 +263,7 @@ static struct sk_buff *set_nw_addr(struct sk_buff *skb, if (key->dl_type != htons(ETH_P_IP)) return skb; - skb = make_writable(skb, gfp); + skb = make_writable(skb, 0, gfp); if (skb) { struct iphdr *nh = ip_hdr(skb); u32 *f = a->type == ODPAT_SET_NW_SRC ? &nh->saddr : &nh->daddr; @@ -232,6 +279,36 @@ static struct sk_buff *set_nw_addr(struct sk_buff *skb, } update_csum(&nh->check, skb, old, new, 0); *f = new; + + if (a->type == ODPAT_SET_NW_SRC) + key->nw_src = a->nw_addr; + else + key->nw_dst = a->nw_addr; + } + return skb; +} + +static struct sk_buff *set_nw_tos(struct sk_buff *skb, + struct odp_flow_key *key, + const struct odp_action_nw_tos *a, + gfp_t gfp) +{ + if (key->dl_type != htons(ETH_P_IP)) + return skb; + + skb = make_writable(skb, 0, gfp); + if (skb) { + struct iphdr *nh = ip_hdr(skb); + u8 *f = &nh->tos; + u8 old = *f; + u8 new; + + /* Set the DSCP bits and preserve the ECN bits. */ + new = a->nw_tos | (nh->tos & INET_ECN_MASK); + update_csum(&nh->check, skb, htons((uint16_t)old), + htons((uint16_t)new), 0); + *f = new; + key->nw_tos = a->nw_tos; } return skb; } @@ -253,15 +330,19 @@ set_tp_port(struct sk_buff *skb, struct odp_flow_key *key, else return skb; - skb = make_writable(skb, gfp); + skb = make_writable(skb, 0, gfp); if (skb) { struct udphdr *th = udp_hdr(skb); u16 *f = a->type == ODPAT_SET_TP_SRC ? &th->source : &th->dest; u16 old = *f; u16 new = a->tp_port; - update_csum((u16*)((u8*)skb->data + check_ofs), - skb, old, new, 1); + update_csum((u16*)(skb_transport_header(skb) + check_ofs), + skb, old, new, 0); *f = new; + if (a->type == ODPAT_SET_TP_SRC) + key->tp_src = a->tp_port; + else + key->tp_dst = a->tp_port; } return skb; } @@ -286,6 +367,7 @@ int dp_xmit_skb(struct sk_buff *skb) return -E2BIG; } + forward_ip_summed(skb); dev_queue_xmit(skb); return len; @@ -307,7 +389,7 @@ do_output(struct datapath *dp, struct sk_buff *skb, int out_port) dev = skb->dev = p->dev; if (is_dp_dev(dev)) dp_dev_recv(dev, skb); - else + else dp_xmit_skb(skb); return; @@ -350,6 +432,28 @@ output_control(struct datapath *dp, struct sk_buff *skb, u32 arg, gfp_t gfp) return dp_output_control(dp, skb, _ODPL_ACTION_NR, arg); } +/* Send a copy of this packet up to the sFlow agent, along with extra + * information about what happened to it. */ +static void sflow_sample(struct datapath *dp, struct sk_buff *skb, + const union odp_action *a, int n_actions, + gfp_t gfp, struct net_bridge_port *nbp) +{ + struct odp_sflow_sample_header *hdr; + unsigned int actlen = n_actions * sizeof(union odp_action); + unsigned int hdrlen = sizeof(struct odp_sflow_sample_header); + struct sk_buff *nskb; + + nskb = skb_copy_expand(skb, actlen + hdrlen, 0, gfp); + if (!nskb) + return; + + memcpy(__skb_push(nskb, actlen), a, actlen); + hdr = (struct odp_sflow_sample_header*)__skb_push(nskb, hdrlen); + hdr->n_actions = n_actions; + hdr->sample_pool = atomic_read(&nbp->sflow_pool); + dp_output_control(dp, nskb, _ODPL_SFLOW_NR, 0); +} + /* Execute a list of actions against 'skb'. */ int execute_actions(struct datapath *dp, struct sk_buff *skb, struct odp_flow_key *key, @@ -361,7 +465,18 @@ int execute_actions(struct datapath *dp, struct sk_buff *skb, * then freeing the original skbuff is wasteful. So the following code * is slightly obscure just to avoid that. */ int prev_port = -1; - int err = 0; + int err; + + if (dp->sflow_probability) { + struct net_bridge_port *p = skb->dev->br_port; + if (p) { + atomic_inc(&p->sflow_pool); + if (dp->sflow_probability == UINT_MAX || + net_random() < dp->sflow_probability) + sflow_sample(dp, skb, a, n_actions, gfp, p); + } + } + for (; n_actions > 0; a++, n_actions--) { WARN_ON_ONCE(skb_shared(skb)); if (prev_port != -1) { @@ -400,7 +515,7 @@ int execute_actions(struct datapath *dp, struct sk_buff *skb, case ODPAT_SET_DL_SRC: case ODPAT_SET_DL_DST: - skb = set_dl_addr(skb, &a->dl_addr, gfp); + skb = set_dl_addr(skb, key, &a->dl_addr, gfp); break; case ODPAT_SET_NW_SRC: @@ -408,6 +523,10 @@ int execute_actions(struct datapath *dp, struct sk_buff *skb, skb = set_nw_addr(skb, key, &a->nw_addr, gfp); break; + case ODPAT_SET_NW_TOS: + skb = set_nw_tos(skb, key, &a->nw_tos, gfp); + break; + case ODPAT_SET_TP_SRC: case ODPAT_SET_TP_DST: skb = set_tp_port(skb, key, &a->tp_port, gfp); @@ -420,5 +539,5 @@ int execute_actions(struct datapath *dp, struct sk_buff *skb, do_output(dp, skb, prev_port); else kfree_skb(skb); - return err; + return 0; }