X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=datapath%2Factions.c;h=fccacf217cfc2473e5b00a89313a51844aeabd1b;hb=26c3f94a553be66dd037159148f30b47ef2c6f4b;hp=8a3e8abb549e1302bdbed626bd889728bf2854f5;hpb=39fb08818bbd9c438dbf23caa89937c663451b5a;p=sliver-openvswitch.git diff --git a/datapath/actions.c b/datapath/actions.c index 8a3e8abb5..fccacf217 100644 --- a/datapath/actions.c +++ b/datapath/actions.c @@ -1,6 +1,6 @@ /* * Distributed under the terms of the GNU GPL version 2. - * Copyright (c) 2007, 2008, 2009 Nicira Networks. + * Copyright (c) 2007, 2008, 2009, 2010 Nicira Networks. * * Significant portions of this file may be copied from parts of the Linux * kernel, by Linus Torvalds and others. @@ -22,12 +22,28 @@ #include "actions.h" #include "openvswitch/datapath-protocol.h" -struct sk_buff * -make_writable(struct sk_buff *skb, gfp_t gfp) +static struct sk_buff * +make_writable(struct sk_buff *skb, unsigned min_headroom, gfp_t gfp) { if (skb_shared(skb) || skb_cloned(skb)) { - struct sk_buff *nskb = skb_copy(skb, gfp); + struct sk_buff *nskb; + unsigned headroom = max(min_headroom, skb_headroom(skb)); + + nskb = skb_copy_expand(skb, headroom, skb_tailroom(skb), gfp); if (nskb) { +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) + /* Before 2.6.24 these fields were not copied when + * doing an skb_copy_expand. */ + nskb->ip_summed = skb->ip_summed; + nskb->csum = skb->csum; +#endif +#if defined(CONFIG_XEN) && defined(HAVE_PROTO_DATA_VALID) + /* These fields are copied in skb_clone but not in + * skb_copy or related functions. We need to manually + * copy them over here. */ + nskb->proto_data_valid = skb->proto_data_valid; + nskb->proto_csum_blank = skb->proto_csum_blank; +#endif kfree_skb(skb); return nskb; } @@ -80,7 +96,7 @@ modify_vlan_tci(struct datapath *dp, struct sk_buff *skb, mask = VLAN_PCP_MASK; } - skb = make_writable(skb, gfp); + skb = make_writable(skb, VLAN_HLEN, gfp); if (!skb) return ERR_PTR(-ENOMEM); @@ -168,7 +184,7 @@ modify_vlan_tci(struct datapath *dp, struct sk_buff *skb, static struct sk_buff *strip_vlan(struct sk_buff *skb, struct odp_flow_key *key, gfp_t gfp) { - skb = make_writable(skb, gfp); + skb = make_writable(skb, 0, gfp); if (skb) { vlan_pull_tag(skb); key->dl_vlan = htons(ODP_VLAN_NONE); @@ -180,7 +196,7 @@ static struct sk_buff *set_dl_addr(struct sk_buff *skb, const struct odp_action_dl_addr *a, gfp_t gfp) { - skb = make_writable(skb, gfp); + skb = make_writable(skb, 0, gfp); if (skb) { struct ethhdr *eh = eth_hdr(skb); memcpy(a->type == ODPAT_SET_DL_SRC ? eh->h_source : eh->h_dest, @@ -197,10 +213,43 @@ static void update_csum(__sum16 *sum, struct sk_buff *skb, __be32 from, __be32 to, int pseudohdr) { __be32 diff[] = { ~from, to }; - if (skb->ip_summed != CHECKSUM_PARTIAL) { + +/* On older kernels, CHECKSUM_PARTIAL and CHECKSUM_COMPLETE are both defined + * as CHECKSUM_HW. However, we can make some inferences so that we can update + * the checksums appropriately. */ + enum { + CSUM_PARTIAL, /* Partial checksum, skb->csum undefined. */ + CSUM_PACKET, /* In-packet checksum, skb->csum undefined. */ + CSUM_COMPLETE, /* In-packet checksum, skb->csum valid. */ + } csum_type; + + csum_type = CSUM_PACKET; +#ifndef CHECKSUM_HW + /* Newer kernel, just map between kernel types and ours. */ + if (skb->ip_summed == CHECKSUM_PARTIAL) + csum_type = CSUM_PARTIAL; + else if (skb->ip_summed == CHECKSUM_COMPLETE) + csum_type = CSUM_COMPLETE; +#else + /* In theory this could be either CHECKSUM_PARTIAL or CHECKSUM_COMPLETE. + * However, we should only get CHECKSUM_PARTIAL packets from Xen, which + * uses some special fields to represent this (see below). Since we + * can only make one type work, pick the one that actually happens in + * practice. */ + if (skb->ip_summed == CHECKSUM_HW) + csum_type = CSUM_COMPLETE; +#endif +#if defined(CONFIG_XEN) && defined(HAVE_PROTO_DATA_VALID) + /* Xen has a special way of representing CHECKSUM_PARTIAL on older + * kernels. */ + if (skb->proto_csum_blank) + csum_type = CSUM_PARTIAL; +#endif + + if (csum_type != CSUM_PARTIAL) { *sum = csum_fold(csum_partial((char *)diff, sizeof(diff), ~csum_unfold(*sum))); - if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr) + if (csum_type == CSUM_COMPLETE && pseudohdr) skb->csum = ~csum_partial((char *)diff, sizeof(diff), ~skb->csum); } else if (pseudohdr) @@ -216,7 +265,7 @@ static struct sk_buff *set_nw_addr(struct sk_buff *skb, if (key->dl_type != htons(ETH_P_IP)) return skb; - skb = make_writable(skb, gfp); + skb = make_writable(skb, 0, gfp); if (skb) { struct iphdr *nh = ip_hdr(skb); u32 *f = a->type == ODPAT_SET_NW_SRC ? &nh->saddr : &nh->daddr; @@ -236,6 +285,30 @@ static struct sk_buff *set_nw_addr(struct sk_buff *skb, return skb; } +static struct sk_buff *set_nw_tos(struct sk_buff *skb, + struct odp_flow_key *key, + const struct odp_action_nw_tos *a, + gfp_t gfp) +{ + if (key->dl_type != htons(ETH_P_IP)) + return skb; + + skb = make_writable(skb, 0, gfp); + if (skb) { + struct iphdr *nh = ip_hdr(skb); + u8 *f = &nh->tos; + u8 old = *f; + + /* We only set the lower 6 bits. */ + u8 new = (a->nw_tos & 0x3f) | (nh->tos & 0xc0); + + update_csum(&nh->check, skb, htons((uint16_t)old), + htons((uint16_t)new), 0); + *f = new; + } + return skb; +} + static struct sk_buff * set_tp_port(struct sk_buff *skb, struct odp_flow_key *key, const struct odp_action_tp_port *a, @@ -253,14 +326,14 @@ set_tp_port(struct sk_buff *skb, struct odp_flow_key *key, else return skb; - skb = make_writable(skb, gfp); + skb = make_writable(skb, 0, gfp); if (skb) { struct udphdr *th = udp_hdr(skb); u16 *f = a->type == ODPAT_SET_TP_SRC ? &th->source : &th->dest; u16 old = *f; u16 new = a->tp_port; - update_csum((u16*)((u8*)skb->data + check_ofs), - skb, old, new, 1); + update_csum((u16*)(skb_transport_header(skb) + check_ofs), + skb, old, new, 0); *f = new; } return skb; @@ -286,6 +359,7 @@ int dp_xmit_skb(struct sk_buff *skb) return -E2BIG; } + forward_ip_summed(skb); dev_queue_xmit(skb); return len; @@ -307,7 +381,7 @@ do_output(struct datapath *dp, struct sk_buff *skb, int out_port) dev = skb->dev = p->dev; if (is_dp_dev(dev)) dp_dev_recv(dev, skb); - else + else dp_xmit_skb(skb); return; @@ -350,6 +424,28 @@ output_control(struct datapath *dp, struct sk_buff *skb, u32 arg, gfp_t gfp) return dp_output_control(dp, skb, _ODPL_ACTION_NR, arg); } +/* Send a copy of this packet up to the sFlow agent, along with extra + * information about what happened to it. */ +static void sflow_sample(struct datapath *dp, struct sk_buff *skb, + const union odp_action *a, int n_actions, + gfp_t gfp, struct net_bridge_port *nbp) +{ + struct odp_sflow_sample_header *hdr; + unsigned int actlen = n_actions * sizeof(union odp_action); + unsigned int hdrlen = sizeof(struct odp_sflow_sample_header); + struct sk_buff *nskb; + + nskb = skb_copy_expand(skb, actlen + hdrlen, 0, gfp); + if (!nskb) + return; + + memcpy(__skb_push(nskb, actlen), a, actlen); + hdr = (struct odp_sflow_sample_header*)__skb_push(nskb, hdrlen); + hdr->n_actions = n_actions; + hdr->sample_pool = atomic_read(&nbp->sflow_pool); + dp_output_control(dp, nskb, _ODPL_SFLOW_NR, 0); +} + /* Execute a list of actions against 'skb'. */ int execute_actions(struct datapath *dp, struct sk_buff *skb, struct odp_flow_key *key, @@ -362,6 +458,17 @@ int execute_actions(struct datapath *dp, struct sk_buff *skb, * is slightly obscure just to avoid that. */ int prev_port = -1; int err; + + if (dp->sflow_probability) { + struct net_bridge_port *p = skb->dev->br_port; + if (p) { + atomic_inc(&p->sflow_pool); + if (dp->sflow_probability == UINT_MAX || + net_random() < dp->sflow_probability) + sflow_sample(dp, skb, a, n_actions, gfp, p); + } + } + for (; n_actions > 0; a++, n_actions--) { WARN_ON_ONCE(skb_shared(skb)); if (prev_port != -1) { @@ -408,6 +515,10 @@ int execute_actions(struct datapath *dp, struct sk_buff *skb, skb = set_nw_addr(skb, key, &a->nw_addr, gfp); break; + case ODPAT_SET_NW_TOS: + skb = set_nw_tos(skb, key, &a->nw_tos, gfp); + break; + case ODPAT_SET_TP_SRC: case ODPAT_SET_TP_DST: skb = set_tp_port(skb, key, &a->tp_port, gfp);