X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=datapath%2Fvport-capwap.c;h=05a099d639076a7b6c250377cf5c289a104685ac;hb=e0edde6fee279cdbbf3c179f5f50adaf0c7c7f1e;hp=65f1f1bdda515564e8e34ffd814e49a7bfa98306;hpb=f915f1a8ca180828983ef22cf2fd21b8f010b972;p=sliver-openvswitch.git diff --git a/datapath/vport-capwap.c b/datapath/vport-capwap.c index 65f1f1bdd..05a099d63 100644 --- a/datapath/vport-capwap.c +++ b/datapath/vport-capwap.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010, 2011 Nicira Networks. + * Copyright (c) 2007-2012 Nicira, Inc. * Distributed under the terms of the GNU GPL version 2. * * Significant portions of this file may be copied from parts of the Linux @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -23,6 +24,7 @@ #include #include +#include "datapath.h" #include "tunnel.h" #include "vport.h" #include "vport-generic.h" @@ -32,7 +34,7 @@ #define CAPWAP_FRAG_TIMEOUT (30 * HZ) #define CAPWAP_FRAG_MAX_MEM (256 * 1024) -#define CAPWAP_FRAG_PRUNE_MEM (192 *1024) +#define CAPWAP_FRAG_PRUNE_MEM (192 * 1024) #define CAPWAP_FRAG_SECRET_INTERVAL (10 * 60 * HZ) /* @@ -42,22 +44,65 @@ * statically create them and we can do very fast parsing by checking all 12 * fields in one go. */ -#define CAPWAP_BEGIN_HLEN __cpu_to_be32(0x00100000) -#define CAPWAP_BEGIN_WBID __cpu_to_be32(0x00000200) -#define CAPWAP_BEGIN_FRAG __cpu_to_be32(0x00000080) -#define CAPWAP_BEGIN_LAST __cpu_to_be32(0x00000040) - -#define NO_FRAG_HDR (CAPWAP_BEGIN_HLEN | CAPWAP_BEGIN_WBID) -#define FRAG_HDR (NO_FRAG_HDR | CAPWAP_BEGIN_FRAG) -#define FRAG_LAST_HDR (FRAG_HDR | CAPWAP_BEGIN_LAST) +#define CAPWAP_PREAMBLE_MASK __cpu_to_be32(0xFF000000) +#define CAPWAP_HLEN_SHIFT 17 +#define CAPWAP_HLEN_MASK __cpu_to_be32(0x00F80000) +#define CAPWAP_RID_MASK __cpu_to_be32(0x0007C000) +#define CAPWAP_WBID_MASK __cpu_to_be32(0x00003E00) +#define CAPWAP_F_MASK __cpu_to_be32(0x000001FF) + +#define CAPWAP_F_FRAG __cpu_to_be32(0x00000080) +#define CAPWAP_F_LASTFRAG __cpu_to_be32(0x00000040) +#define CAPWAP_F_WSI __cpu_to_be32(0x00000020) +#define CAPWAP_F_RMAC __cpu_to_be32(0x00000010) + +#define CAPWAP_RMAC_LEN 4 + +/* Standard CAPWAP looks for a WBID value of 2. + * When we insert WSI field, use WBID value of 30, which has been + * proposed for all "experimental" usage - users with no reserved WBID value + * of their own. +*/ +#define CAPWAP_WBID_30 __cpu_to_be32(0x00003C00) +#define CAPWAP_WBID_2 __cpu_to_be32(0x00000200) + +#define FRAG_HDR (CAPWAP_F_FRAG) +#define FRAG_LAST_HDR (FRAG_HDR | CAPWAP_F_LASTFRAG) + +/* Keyed packet, WBID 30, and length long enough to include WSI key */ +#define CAPWAP_KEYED (CAPWAP_WBID_30 | CAPWAP_F_WSI | htonl(20 << CAPWAP_HLEN_SHIFT)) +/* A backward-compatible packet, WBID 2 and length of 2 words (no WSI fields) */ +#define CAPWAP_NO_WSI (CAPWAP_WBID_2 | htonl(8 << CAPWAP_HLEN_SHIFT)) + +/* Mask for all parts of header that must be 0. */ +#define CAPWAP_ZERO_MASK (CAPWAP_PREAMBLE_MASK | \ + (CAPWAP_F_MASK ^ (CAPWAP_F_WSI | CAPWAP_F_FRAG | CAPWAP_F_LASTFRAG | CAPWAP_F_RMAC))) struct capwaphdr { __be32 begin; __be16 frag_id; + /* low 3 bits of frag_off are reserved */ __be16 frag_off; }; -static inline struct capwaphdr *capwap_hdr(const struct sk_buff *skb) +/* + * We use the WSI field to hold additional tunnel data. + * The first eight bits store the size of the wsi data in bytes. + */ +struct capwaphdr_wsi { + u8 wsi_len; + u8 flags; + __be16 reserved_padding; +}; + +struct capwaphdr_wsi_key { + __be64 key; +}; + +/* Flag indicating a 64bit key is stored in WSI data field */ +#define CAPWAP_WSI_F_KEY64 0x80 + +static struct capwaphdr *capwap_hdr(const struct sk_buff *skb) { return (struct capwaphdr *)(udp_hdr(skb) + 1); } @@ -70,7 +115,11 @@ static inline struct capwaphdr *capwap_hdr(const struct sk_buff *skb) */ #define FRAG_OFF_MASK (~0x7U) -#define CAPWAP_HLEN (sizeof(struct udphdr) + sizeof(struct capwaphdr)) +/* + * The minimum header length. The header may be longer if the optional + * WSI field is used. + */ +#define CAPWAP_MIN_HLEN (sizeof(struct udphdr) + sizeof(struct capwaphdr)) struct frag_match { __be32 saddr; @@ -89,9 +138,7 @@ struct frag_skb_cb { #define FRAG_CB(skb) ((struct frag_skb_cb *)(skb)->cb) static struct sk_buff *fragment(struct sk_buff *, const struct vport *, - struct dst_entry *); -static void defrag_init(void); -static void defrag_exit(void); + struct dst_entry *dst, unsigned int hlen); static struct sk_buff *defrag(struct sk_buff *, bool frag_last); static void capwap_frag_init(struct inet_frag_queue *, void *match); @@ -107,28 +154,22 @@ static struct inet_frags frag_state = { .frag_expire = capwap_frag_expire, .secret_interval = CAPWAP_FRAG_SECRET_INTERVAL, }; -static struct netns_frags frag_netns_state = { - .timeout = CAPWAP_FRAG_TIMEOUT, - .high_thresh = CAPWAP_FRAG_MAX_MEM, - .low_thresh = CAPWAP_FRAG_PRUNE_MEM, -}; - -static struct socket *capwap_rcv_socket; static int capwap_hdr_len(const struct tnl_mutable_config *mutable) { + int size = CAPWAP_MIN_HLEN; + /* CAPWAP has no checksums. */ if (mutable->flags & TNL_F_CSUM) return -EINVAL; - /* CAPWAP has no keys, so check that the configuration for keys is the - * default if no key-specific attributes are used. - */ - if ((mutable->flags & (TNL_F_IN_KEY_MATCH | TNL_F_OUT_KEY_ACTION)) != - (TNL_F_IN_KEY_MATCH | TNL_F_OUT_KEY_ACTION)) - return -EINVAL; + /* if keys are specified, then add WSI field */ + if (mutable->out_key || (mutable->flags & TNL_F_OUT_KEY_ACTION)) { + size += sizeof(struct capwaphdr_wsi) + + sizeof(struct capwaphdr_wsi_key); + } - return CAPWAP_HLEN; + return size; } static void capwap_build_header(const struct vport *vport, @@ -142,9 +183,28 @@ static void capwap_build_header(const struct vport *vport, udph->dest = htons(CAPWAP_DST_PORT); udph->check = 0; - cwh->begin = NO_FRAG_HDR; cwh->frag_id = 0; cwh->frag_off = 0; + + if (mutable->out_key || (mutable->flags & TNL_F_OUT_KEY_ACTION)) { + struct capwaphdr_wsi *wsi = (struct capwaphdr_wsi *)(cwh + 1); + + cwh->begin = CAPWAP_KEYED; + + /* -1 for wsi_len byte, not included in length as per spec */ + wsi->wsi_len = sizeof(struct capwaphdr_wsi) - 1 + + sizeof(struct capwaphdr_wsi_key); + wsi->flags = CAPWAP_WSI_F_KEY64; + wsi->reserved_padding = 0; + + if (mutable->out_key) { + struct capwaphdr_wsi_key *opt = (struct capwaphdr_wsi_key *)(wsi + 1); + opt->key = mutable->out_key; + } + } else { + /* make packet readable by old capwap code */ + cwh->begin = CAPWAP_NO_WSI; + } } static struct sk_buff *capwap_update_header(const struct vport *vport, @@ -154,31 +214,100 @@ static struct sk_buff *capwap_update_header(const struct vport *vport, { struct udphdr *udph = udp_hdr(skb); + if (mutable->flags & TNL_F_OUT_KEY_ACTION) { + /* first field in WSI is key */ + struct capwaphdr *cwh = (struct capwaphdr *)(udph + 1); + struct capwaphdr_wsi *wsi = (struct capwaphdr_wsi *)(cwh + 1); + struct capwaphdr_wsi_key *opt = (struct capwaphdr_wsi_key *)(wsi + 1); + + opt->key = OVS_CB(skb)->tun_id; + } + udph->len = htons(skb->len - skb_transport_offset(skb)); - if (unlikely(skb->len - skb_network_offset(skb) > dst_mtu(dst))) - skb = fragment(skb, vport, dst); + if (unlikely(skb->len - skb_network_offset(skb) > dst_mtu(dst))) { + unsigned int hlen = skb_transport_offset(skb) + capwap_hdr_len(mutable); + skb = fragment(skb, vport, dst, hlen); + } return skb; } -static inline struct sk_buff *process_capwap_proto(struct sk_buff *skb) +static int process_capwap_wsi(struct sk_buff *skb, __be64 *key) { struct capwaphdr *cwh = capwap_hdr(skb); + struct capwaphdr_wsi *wsi; + int hdr_len; + int rmac_len = 0; + int wsi_len; - if (likely(cwh->begin == NO_FRAG_HDR)) - return skb; - else if (cwh->begin == FRAG_HDR) - return defrag(skb, false); - else if (cwh->begin == FRAG_LAST_HDR) - return defrag(skb, true); - else { - if (net_ratelimit()) - pr_warn("unparsable packet receive on capwap socket\n"); + if (((cwh->begin & CAPWAP_WBID_MASK) != CAPWAP_WBID_30)) + return 0; - kfree_skb(skb); - return NULL; + if (cwh->begin & CAPWAP_F_RMAC) + rmac_len = CAPWAP_RMAC_LEN; + + hdr_len = ntohl(cwh->begin & CAPWAP_HLEN_MASK) >> CAPWAP_HLEN_SHIFT; + + if (unlikely(sizeof(struct capwaphdr) + rmac_len + sizeof(struct capwaphdr_wsi) > hdr_len)) + return -EINVAL; + + /* read wsi header to find out how big it really is */ + wsi = (struct capwaphdr_wsi *)((u8 *)(cwh + 1) + rmac_len); + /* +1 for length byte not included in wsi_len */ + wsi_len = 1 + wsi->wsi_len; + + if (unlikely(sizeof(struct capwaphdr) + rmac_len + wsi_len != hdr_len)) + return -EINVAL; + + wsi_len -= sizeof(struct capwaphdr_wsi); + + if (wsi->flags & CAPWAP_WSI_F_KEY64) { + struct capwaphdr_wsi_key *opt; + + if (unlikely(wsi_len < sizeof(struct capwaphdr_wsi_key))) + return -EINVAL; + + opt = (struct capwaphdr_wsi_key *)(wsi + 1); + *key = opt->key; } + + return 0; +} + +static struct sk_buff *process_capwap_proto(struct sk_buff *skb, __be64 *key) +{ + struct capwaphdr *cwh = capwap_hdr(skb); + int hdr_len = sizeof(struct udphdr); + + if (unlikely((cwh->begin & CAPWAP_ZERO_MASK) != 0)) + goto error; + + hdr_len += ntohl(cwh->begin & CAPWAP_HLEN_MASK) >> CAPWAP_HLEN_SHIFT; + if (unlikely(hdr_len < CAPWAP_MIN_HLEN)) + goto error; + + if (unlikely(!pskb_may_pull(skb, hdr_len + ETH_HLEN))) + goto error; + + cwh = capwap_hdr(skb); + __skb_pull(skb, hdr_len); + skb_postpull_rcsum(skb, skb_transport_header(skb), hdr_len + ETH_HLEN); + + if (cwh->begin & CAPWAP_F_FRAG) { + skb = defrag(skb, (__force bool)(cwh->begin & CAPWAP_F_LASTFRAG)); + if (!skb) + return NULL; + cwh = capwap_hdr(skb); + } + + if ((cwh->begin & CAPWAP_F_WSI) && process_capwap_wsi(skb, key)) + goto error; + + return skb; +error: + kfree_skb(skb); + return NULL; } /* Called with rcu_read_lock and BH disabled. */ @@ -187,26 +316,29 @@ static int capwap_rcv(struct sock *sk, struct sk_buff *skb) struct vport *vport; const struct tnl_mutable_config *mutable; struct iphdr *iph; + __be64 key = 0; - if (unlikely(!pskb_may_pull(skb, CAPWAP_HLEN + ETH_HLEN))) + if (unlikely(!pskb_may_pull(skb, CAPWAP_MIN_HLEN + ETH_HLEN))) goto error; - __skb_pull(skb, CAPWAP_HLEN); - skb_postpull_rcsum(skb, skb_transport_header(skb), CAPWAP_HLEN + ETH_HLEN); - - skb = process_capwap_proto(skb); + skb = process_capwap_proto(skb, &key); if (unlikely(!skb)) goto out; iph = ip_hdr(skb); - vport = tnl_find_port(iph->daddr, iph->saddr, 0, - TNL_T_PROTO_CAPWAP | TNL_T_KEY_EXACT, &mutable); + vport = ovs_tnl_find_port(sock_net(sk), iph->daddr, iph->saddr, key, + TNL_T_PROTO_CAPWAP, &mutable); if (unlikely(!vport)) { icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); goto error; } - tnl_rcv(vport, skb); + if (mutable->flags & TNL_F_IN_KEY_MATCH) + OVS_CB(skb)->tun_id = key; + else + OVS_CB(skb)->tun_id = 0; + + ovs_tnl_rcv(vport, skb, iph->tos); goto out; error: @@ -223,49 +355,105 @@ static const struct tnl_ops capwap_tnl_ops = { .update_header = capwap_update_header, }; -static struct vport *capwap_create(const struct vport_parms *parms) +static inline struct capwap_net *ovs_get_capwap_net(struct net *net) { - return tnl_create(parms, &capwap_vport_ops, &capwap_tnl_ops); + struct ovs_net *ovs_net = net_generic(net, ovs_net_id); + return &ovs_net->vport_net.capwap; } -/* Random value. Irrelevant as long as it's not 0 since we set the handler. */ +/* Arbitrary value. Irrelevant as long as it's not 0 since we set the handler. */ #define UDP_ENCAP_CAPWAP 10 -static int capwap_init(void) +static int init_socket(struct net *net) { int err; + struct capwap_net *capwap_net = ovs_get_capwap_net(net); struct sockaddr_in sin; - err = sock_create(AF_INET, SOCK_DGRAM, 0, &capwap_rcv_socket); + if (capwap_net->n_tunnels) { + capwap_net->n_tunnels++; + return 0; + } + + err = sock_create_kern(AF_INET, SOCK_DGRAM, 0, + &capwap_net->capwap_rcv_socket); if (err) goto error; + /* release net ref. */ + sk_change_net(capwap_net->capwap_rcv_socket->sk, net); + sin.sin_family = AF_INET; sin.sin_addr.s_addr = htonl(INADDR_ANY); sin.sin_port = htons(CAPWAP_DST_PORT); - err = kernel_bind(capwap_rcv_socket, (struct sockaddr *)&sin, + err = kernel_bind(capwap_net->capwap_rcv_socket, + (struct sockaddr *)&sin, sizeof(struct sockaddr_in)); if (err) goto error_sock; - udp_sk(capwap_rcv_socket->sk)->encap_type = UDP_ENCAP_CAPWAP; - udp_sk(capwap_rcv_socket->sk)->encap_rcv = capwap_rcv; + udp_sk(capwap_net->capwap_rcv_socket->sk)->encap_type = UDP_ENCAP_CAPWAP; + udp_sk(capwap_net->capwap_rcv_socket->sk)->encap_rcv = capwap_rcv; + + capwap_net->frag_state.timeout = CAPWAP_FRAG_TIMEOUT; + capwap_net->frag_state.high_thresh = CAPWAP_FRAG_MAX_MEM; + capwap_net->frag_state.low_thresh = CAPWAP_FRAG_PRUNE_MEM; - defrag_init(); + inet_frags_init_net(&capwap_net->frag_state); + capwap_net->n_tunnels++; return 0; error_sock: - sock_release(capwap_rcv_socket); + sk_release_kernel(capwap_net->capwap_rcv_socket->sk); error: - pr_warn("cannot register capwap protocol handler\n"); + pr_warn("cannot register capwap protocol handler : %d\n", err); return err; } +static void release_socket(struct net *net) +{ + struct capwap_net *capwap_net = ovs_get_capwap_net(net); + + capwap_net->n_tunnels--; + if (capwap_net->n_tunnels) + return; + + inet_frags_exit_net(&capwap_net->frag_state, &frag_state); + sk_release_kernel(capwap_net->capwap_rcv_socket->sk); +} + +static struct vport *capwap_create(const struct vport_parms *parms) +{ + struct vport *vport; + int err; + + err = init_socket(ovs_dp_get_net(parms->dp)); + if (err) + return ERR_PTR(err); + + vport = ovs_tnl_create(parms, &ovs_capwap_vport_ops, &capwap_tnl_ops); + if (IS_ERR(vport)) + release_socket(ovs_dp_get_net(parms->dp)); + + return vport; +} + +static void capwap_destroy(struct vport *vport) +{ + ovs_tnl_destroy(vport); + release_socket(ovs_dp_get_net(vport->dp)); +} + +static int capwap_init(void) +{ + inet_frags_init(&frag_state); + return 0; +} + static void capwap_exit(void) { - defrag_exit(); - sock_release(capwap_rcv_socket); + inet_frags_fini(&frag_state); } static void copy_skb_metadata(struct sk_buff *from, struct sk_buff *to) @@ -290,10 +478,9 @@ static void copy_skb_metadata(struct sk_buff *from, struct sk_buff *to) } static struct sk_buff *fragment(struct sk_buff *skb, const struct vport *vport, - struct dst_entry *dst) + struct dst_entry *dst, unsigned int hlen) { struct tnl_vport *tnl_vport = tnl_vport_priv(vport); - unsigned int hlen = skb_transport_offset(skb) + CAPWAP_HLEN; unsigned int headroom; unsigned int max_frame_len = dst_mtu(dst) + skb_network_offset(skb); struct sk_buff *result = NULL, *list_cur = NULL; @@ -352,9 +539,9 @@ static struct sk_buff *fragment(struct sk_buff *skb, const struct vport *vport, cwh = capwap_hdr(skb2); if (remaining > frag_size) - cwh->begin = FRAG_HDR; + cwh->begin |= FRAG_HDR; else - cwh->begin = FRAG_LAST_HDR; + cwh->begin |= FRAG_LAST_HDR; cwh->frag_id = frag_id; cwh->frag_off = htons(offset); @@ -368,18 +555,18 @@ static struct sk_buff *fragment(struct sk_buff *skb, const struct vport *vport, remaining -= frag_size; } - goto out; + consume_skb(skb); + return result; error: - tnl_free_linked_skbs(result); -out: + ovs_tnl_free_linked_skbs(result); kfree_skb(skb); - return result; + return NULL; } /* All of the following functions relate to fragmentation reassembly. */ -static inline struct frag_queue *ifq_cast(struct inet_frag_queue *ifq) +static struct frag_queue *ifq_cast(struct inet_frag_queue *ifq) { return container_of(ifq, struct frag_queue, ifq); } @@ -391,13 +578,14 @@ static u32 frag_hash(struct frag_match *match) frag_state.rnd) & (INETFRAGS_HASHSZ - 1); } -static struct frag_queue *queue_find(struct frag_match *match) +static struct frag_queue *queue_find(struct netns_frags *ns_frag_state, + struct frag_match *match) { struct inet_frag_queue *ifq; read_lock(&frag_state.lock); - ifq = inet_frag_find(&frag_netns_state, &frag_state, match, frag_hash(match)); + ifq = inet_frag_find(ns_frag_state, &frag_state, match, frag_hash(match)); if (!ifq) return NULL; @@ -572,19 +760,21 @@ static struct sk_buff *defrag(struct sk_buff *skb, bool frag_last) { struct iphdr *iph = ip_hdr(skb); struct capwaphdr *cwh = capwap_hdr(skb); + struct capwap_net *capwap_net = ovs_get_capwap_net(dev_net(skb->dev)); + struct netns_frags *ns_frag_state = &capwap_net->frag_state; struct frag_match match; u16 frag_off; struct frag_queue *fq; - if (atomic_read(&frag_netns_state.mem) > frag_netns_state.high_thresh) - inet_frag_evictor(&frag_netns_state, &frag_state); + if (atomic_read(&ns_frag_state->mem) > ns_frag_state->high_thresh) + inet_frag_evictor(ns_frag_state, &frag_state); match.daddr = iph->daddr; match.saddr = iph->saddr; match.id = cwh->frag_id; frag_off = ntohs(cwh->frag_off) & FRAG_OFF_MASK; - fq = queue_find(&match); + fq = queue_find(ns_frag_state, &match); if (fq) { spin_lock(&fq->ifq.lock); skb = frag_queue(fq, skb, frag_off, frag_last); @@ -599,18 +789,6 @@ static struct sk_buff *defrag(struct sk_buff *skb, bool frag_last) return NULL; } -static void defrag_init(void) -{ - inet_frags_init(&frag_state); - inet_frags_init_net(&frag_netns_state); -} - -static void defrag_exit(void) -{ - inet_frags_exit_net(&frag_netns_state, &frag_state); - inet_frags_fini(&frag_state); -} - static void capwap_frag_init(struct inet_frag_queue *ifq, void *match_) { struct frag_match *match = match_; @@ -647,22 +825,23 @@ static void capwap_frag_expire(unsigned long ifq) inet_frag_put(&fq->ifq, &frag_state); } -const struct vport_ops capwap_vport_ops = { - .type = ODP_VPORT_TYPE_CAPWAP, - .flags = VPORT_F_GEN_STATS, +const struct vport_ops ovs_capwap_vport_ops = { + .type = OVS_VPORT_TYPE_CAPWAP, + .flags = VPORT_F_TUN_ID, .init = capwap_init, .exit = capwap_exit, .create = capwap_create, - .destroy = tnl_destroy, - .set_addr = tnl_set_addr, - .get_name = tnl_get_name, - .get_addr = tnl_get_addr, - .get_options = tnl_get_options, - .set_options = tnl_set_options, - .get_dev_flags = vport_gen_get_dev_flags, - .is_running = vport_gen_is_running, - .get_operstate = vport_gen_get_operstate, - .send = tnl_send, + .destroy = capwap_destroy, + .set_addr = ovs_tnl_set_addr, + .get_name = ovs_tnl_get_name, + .get_addr = ovs_tnl_get_addr, + .get_options = ovs_tnl_get_options, + .set_options = ovs_tnl_set_options, + .get_dev_flags = ovs_vport_gen_get_dev_flags, + .is_running = ovs_vport_gen_is_running, + .get_operstate = ovs_vport_gen_get_operstate, + .send = ovs_tnl_send, }; - -#endif /* Linux kernel >= 2.6.26 */ +#else +#warning CAPWAP tunneling will not be available on kernels before 2.6.26 +#endif /* Linux kernel < 2.6.26 */