fedora core 6 1.2949 + vserver 2.2.0
[linux-2.6.git] / net / ipv6 / ipcomp6.c
index b357db4..511730b 100644 (file)
  *  The decompression of IP datagram MUST be done after the reassembly, 
  *  AH/ESP processing.
  */
-#include <linux/config.h>
 #include <linux/module.h>
-#include <net/inet_ecn.h>
 #include <net/ip.h>
 #include <net/xfrm.h>
 #include <net/ipcomp.h>
 #include <asm/scatterlist.h>
+#include <asm/semaphore.h>
 #include <linux/crypto.h>
 #include <linux/pfkeyv2.h>
 #include <linux/random.h>
+#include <linux/percpu.h>
+#include <linux/smp.h>
+#include <linux/list.h>
+#include <linux/vmalloc.h>
+#include <linux/rtnetlink.h>
 #include <net/icmp.h>
 #include <net/ipv6.h>
+#include <net/protocol.h>
 #include <linux/ipv6.h>
 #include <linux/icmpv6.h>
+#include <linux/mutex.h>
 
-static int ipcomp6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb)
+struct ipcomp6_tfms {
+       struct list_head list;
+       struct crypto_comp **tfms;
+       int users;
+};
+
+static DEFINE_MUTEX(ipcomp6_resource_mutex);
+static void **ipcomp6_scratches;
+static int ipcomp6_scratch_users;
+static LIST_HEAD(ipcomp6_tfms_list);
+
+static int ipcomp6_input(struct xfrm_state *x, struct sk_buff *skb)
 {
-       int err = 0;
-       u8 nexthdr = 0;
-       u8 *prevhdr;
-       int hdr_len = skb->h.raw - skb->nh.raw;
-       unsigned char *tmp_hdr = NULL;
+       int err = -ENOMEM;
        struct ipv6hdr *iph;
+       struct ipv6_comp_hdr *ipch;
        int plen, dlen;
        struct ipcomp_data *ipcd = x->data;
-       u8 *start, *scratch = ipcd->scratch;
+       u8 *start, *scratch;
+       struct crypto_comp *tfm;
+       int cpu;
 
-       if ((skb_is_nonlinear(skb) || skb_cloned(skb)) &&
-               skb_linearize(skb, GFP_ATOMIC) != 0) {
-               err = -ENOMEM;
+       if (skb_linearize_cow(skb))
                goto out;
-       }
 
        skb->ip_summed = CHECKSUM_NONE;
 
        /* Remove ipcomp header and decompress original payload */
        iph = skb->nh.ipv6h;
-       tmp_hdr = kmalloc(hdr_len, GFP_ATOMIC);
-       if (!tmp_hdr)
-               goto out;
-       memcpy(tmp_hdr, iph, hdr_len);
-       nexthdr = *(u8 *)skb->data;
-       skb_pull(skb, sizeof(struct ipv6_comp_hdr)); 
-       skb->nh.raw += sizeof(struct ipv6_comp_hdr);
-       memcpy(skb->nh.raw, tmp_hdr, hdr_len);
-       iph = skb->nh.ipv6h;
-       iph->payload_len = htons(ntohs(iph->payload_len) - sizeof(struct ipv6_comp_hdr));
-       skb->h.raw = skb->data;
+       ipch = (void *)skb->data;
+       skb->h.raw = skb->nh.raw + sizeof(*ipch);
+       __skb_pull(skb, sizeof(*ipch));
 
        /* decompression */
        plen = skb->len;
        dlen = IPCOMP_SCRATCH_SIZE;
        start = skb->data;
 
-       err = crypto_comp_decompress(ipcd->tfm, start, plen, scratch, &dlen);
+       cpu = get_cpu();
+       scratch = *per_cpu_ptr(ipcomp6_scratches, cpu);
+       tfm = *per_cpu_ptr(ipcd->tfms, cpu);
+
+       err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen);
        if (err) {
                err = -EINVAL;
-               goto out;
+               goto out_put_cpu;
        }
 
        if (dlen < (plen + sizeof(struct ipv6_comp_hdr))) {
                err = -EINVAL;
-               goto out;
+               goto out_put_cpu;
        }
 
        err = pskb_expand_head(skb, 0, dlen - plen, GFP_ATOMIC);
        if (err) {
-               goto out;
+               goto out_put_cpu;
        }
 
-       skb_put(skb, dlen - plen);
+       skb->truesize += dlen - plen;
+       __skb_put(skb, dlen - plen);
        memcpy(skb->data, scratch, dlen);
+       err = ipch->nexthdr;
 
-       iph = skb->nh.ipv6h;
-       iph->payload_len = htons(skb->len);
-       
-       ip6_find_1stfragopt(skb, &prevhdr);
-       *prevhdr = nexthdr;
+out_put_cpu:
+       put_cpu();
 out:
-       if (tmp_hdr)
-               kfree(tmp_hdr);
-       if (err)
-               goto error_out;
-       return nexthdr;
-error_out:
        return err;
 }
 
-static int ipcomp6_output(struct sk_buff **pskb)
+static int ipcomp6_output(struct xfrm_state *x, struct sk_buff *skb)
 {
        int err;
-       struct dst_entry *dst = (*pskb)->dst;
-       struct xfrm_state *x = dst->xfrm;
-       struct ipv6hdr *tmp_iph = NULL, *iph, *top_iph;
-       int hdr_len = 0;
+       struct ipv6hdr *top_iph;
+       int hdr_len;
        struct ipv6_comp_hdr *ipch;
        struct ipcomp_data *ipcd = x->data;
-       u8 *prevhdr;
-       u8 nexthdr = 0;
        int plen, dlen;
-       u8 *start, *scratch = ipcd->scratch;
+       u8 *start, *scratch;
+       struct crypto_comp *tfm;
+       int cpu;
 
-       if ((*pskb)->ip_summed == CHECKSUM_HW) {
-               err = skb_checksum_help(pskb, 0);
-               if (err)
-                       goto error_nolock;
-       }
-
-       spin_lock_bh(&x->lock);
-
-       err = xfrm_check_output(x, *pskb, AF_INET6);
-       if (err)
-               goto error;
-
-       if (x->props.mode) {
-               hdr_len = sizeof(struct ipv6hdr);
-               nexthdr = IPPROTO_IPV6;
-               iph = (*pskb)->nh.ipv6h;
-               top_iph = (struct ipv6hdr *)skb_push(*pskb, sizeof(struct ipv6hdr));
-               top_iph->version = 6;
-               top_iph->priority = iph->priority;
-               top_iph->flow_lbl[0] = iph->flow_lbl[0];
-               top_iph->flow_lbl[1] = iph->flow_lbl[1];
-               top_iph->flow_lbl[2] = iph->flow_lbl[2];
-               top_iph->nexthdr = IPPROTO_IPV6; /* initial */
-               top_iph->payload_len = htons((*pskb)->len - sizeof(struct ipv6hdr));
-               top_iph->hop_limit = iph->hop_limit;
-               memcpy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr, sizeof(struct in6_addr));
-               memcpy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr, sizeof(struct in6_addr));
-               (*pskb)->nh.raw = (*pskb)->data; /* == top_iph */
-               (*pskb)->h.raw = (*pskb)->nh.raw + hdr_len;
-       } else {
-               hdr_len = ip6_find_1stfragopt(*pskb, &prevhdr);
-               nexthdr = *prevhdr;
-       }
+       hdr_len = skb->h.raw - skb->data;
 
        /* check whether datagram len is larger than threshold */
-       if (((*pskb)->len - hdr_len) < ipcd->threshold) {
+       if ((skb->len - hdr_len) < ipcd->threshold) {
                goto out_ok;
        }
 
-       if ((skb_is_nonlinear(*pskb) || skb_cloned(*pskb)) &&
-               skb_linearize(*pskb, GFP_ATOMIC) != 0) {
-               err = -ENOMEM;
-               goto error;
-       }
+       if (skb_linearize_cow(skb))
+               goto out_ok;
 
        /* compression */
-       plen = (*pskb)->len - hdr_len;
+       plen = skb->len - hdr_len;
        dlen = IPCOMP_SCRATCH_SIZE;
-       start = (*pskb)->data + hdr_len;
+       start = skb->h.raw;
 
-       err = crypto_comp_compress(ipcd->tfm, start, plen, scratch, &dlen);
-       if (err) {
-               goto error;
-       }
-       if ((dlen + sizeof(struct ipv6_comp_hdr)) >= plen) {
+       cpu = get_cpu();
+       scratch = *per_cpu_ptr(ipcomp6_scratches, cpu);
+       tfm = *per_cpu_ptr(ipcd->tfms, cpu);
+
+       err = crypto_comp_compress(tfm, start, plen, scratch, &dlen);
+       if (err || (dlen + sizeof(struct ipv6_comp_hdr)) >= plen) {
+               put_cpu();
                goto out_ok;
        }
-       memcpy(start, scratch, dlen);
-       pskb_trim(*pskb, hdr_len+dlen);
+       memcpy(start + sizeof(struct ip_comp_hdr), scratch, dlen);
+       put_cpu();
+       pskb_trim(skb, hdr_len + dlen + sizeof(struct ip_comp_hdr));
 
        /* insert ipcomp header and replace datagram */
-       tmp_iph = kmalloc(hdr_len, GFP_ATOMIC);
-       if (!tmp_iph) {
-               err = -ENOMEM;
-               goto error;
-       }
-       memcpy(tmp_iph, (*pskb)->nh.raw, hdr_len);
-       top_iph = (struct ipv6hdr*)skb_push(*pskb, sizeof(struct ipv6_comp_hdr));
-       memcpy(top_iph, tmp_iph, hdr_len);
-       kfree(tmp_iph);
-
-       if (x->props.mode && (x->props.flags & XFRM_STATE_NOECN))
-               IP6_ECN_clear(top_iph);
-       top_iph->payload_len = htons((*pskb)->len - sizeof(struct ipv6hdr));
-       (*pskb)->nh.raw = (*pskb)->data; /* top_iph */
-       ip6_find_1stfragopt(*pskb, &prevhdr); 
-       *prevhdr = IPPROTO_COMP;
-
-       ipch = (struct ipv6_comp_hdr *)((unsigned char *)top_iph + hdr_len);
-       ipch->nexthdr = nexthdr;
+       top_iph = (struct ipv6hdr *)skb->data;
+
+       top_iph->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+
+       ipch = (struct ipv6_comp_hdr *)start;
+       ipch->nexthdr = *skb->nh.raw;
        ipch->flags = 0;
        ipch->cpi = htons((u16 )ntohl(x->id.spi));
+       *skb->nh.raw = IPPROTO_COMP;
 
-       (*pskb)->h.raw = (unsigned char*)ipch;
 out_ok:
-       x->curlft.bytes += (*pskb)->len;
-       x->curlft.packets++;
-       spin_unlock_bh(&x->lock);
-
-       if (((*pskb)->dst = dst_pop(dst)) == NULL) {
-               err = -EHOSTUNREACH;
-               goto error_nolock;
-       }
-       err = NET_XMIT_BYPASS;
-
-out_exit:
-       return err;
-error:
-       spin_unlock_bh(&x->lock);
-error_nolock:
-       kfree_skb(*pskb);
-       goto out_exit;
+       return 0;
 }
 
 static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
-                               int type, int code, int offset, __u32 info)
+                               int type, int code, int offset, __be32 info)
 {
-       u32 spi;
+       __be32 spi;
        struct ipv6hdr *iph = (struct ipv6hdr*)skb->data;
        struct ipv6_comp_hdr *ipcomph = (struct ipv6_comp_hdr*)(skb->data+offset);
        struct xfrm_state *x;
 
-       if (type != ICMPV6_DEST_UNREACH || type != ICMPV6_PKT_TOOBIG)
+       if (type != ICMPV6_DEST_UNREACH && type != ICMPV6_PKT_TOOBIG)
                return;
 
-       spi = ntohl(ntohs(ipcomph->cpi));
+       spi = htonl(ntohs(ipcomph->cpi));
        x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, spi, IPPROTO_COMP, AF_INET6);
        if (!x)
                return;
 
-       printk(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/"
-                       "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
+       printk(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/" NIP6_FMT "\n",
                        spi, NIP6(iph->daddr));
        xfrm_state_put(x);
 }
 
+static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x)
+{
+       struct xfrm_state *t = NULL;
+       u8 mode = XFRM_MODE_TUNNEL;
+
+       t = xfrm_state_alloc();
+       if (!t)
+               goto out;
+
+       t->id.proto = IPPROTO_IPV6;
+       t->id.spi = xfrm6_tunnel_alloc_spi((xfrm_address_t *)&x->props.saddr);
+       if (!t->id.spi)
+               goto error;
+
+       memcpy(t->id.daddr.a6, x->id.daddr.a6, sizeof(struct in6_addr));
+       memcpy(&t->sel, &x->sel, sizeof(t->sel));
+       t->props.family = AF_INET6;
+       if (x->props.mode == XFRM_MODE_BEET)
+               mode = x->props.mode;
+       t->props.mode = mode;
+       memcpy(t->props.saddr.a6, x->props.saddr.a6, sizeof(struct in6_addr));
+
+       if (xfrm_init_state(t))
+               goto error;
+
+       atomic_set(&t->tunnel_users, 1);
+
+out:
+       return t;
+
+error:
+       t->km.state = XFRM_STATE_DEAD;
+       xfrm_state_put(t);
+       t = NULL;
+       goto out;
+}
+
+static int ipcomp6_tunnel_attach(struct xfrm_state *x)
+{
+       int err = 0;
+       struct xfrm_state *t = NULL;
+       __be32 spi;
+
+       spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&x->props.saddr);
+       if (spi)
+               t = xfrm_state_lookup((xfrm_address_t *)&x->id.daddr,
+                                             spi, IPPROTO_IPV6, AF_INET6);
+       if (!t) {
+               t = ipcomp6_tunnel_create(x);
+               if (!t) {
+                       err = -EINVAL;
+                       goto out;
+               }
+               xfrm_state_insert(t);
+               xfrm_state_hold(t);
+       }
+       x->tunnel = t;
+       atomic_inc(&t->tunnel_users);
+
+out:
+       return err;
+}
+
+static void ipcomp6_free_scratches(void)
+{
+       int i;
+       void **scratches;
+
+       if (--ipcomp6_scratch_users)
+               return;
+
+       scratches = ipcomp6_scratches;
+       if (!scratches)
+               return;
+
+       for_each_possible_cpu(i) {
+               void *scratch = *per_cpu_ptr(scratches, i);
+
+               vfree(scratch);
+       }
+
+       free_percpu(scratches);
+}
+
+static void **ipcomp6_alloc_scratches(void)
+{
+       int i;
+       void **scratches;
+
+       if (ipcomp6_scratch_users++)
+               return ipcomp6_scratches;
+
+       scratches = alloc_percpu(void *);
+       if (!scratches)
+               return NULL;
+
+       ipcomp6_scratches = scratches;
+
+       for_each_possible_cpu(i) {
+               void *scratch = vmalloc(IPCOMP_SCRATCH_SIZE);
+               if (!scratch)
+                       return NULL;
+               *per_cpu_ptr(scratches, i) = scratch;
+       }
+
+       return scratches;
+}
+
+static void ipcomp6_free_tfms(struct crypto_comp **tfms)
+{
+       struct ipcomp6_tfms *pos;
+       int cpu;
+
+       list_for_each_entry(pos, &ipcomp6_tfms_list, list) {
+               if (pos->tfms == tfms)
+                       break;
+       }
+
+       BUG_TRAP(pos);
+
+       if (--pos->users)
+               return;
+
+       list_del(&pos->list);
+       kfree(pos);
+
+       if (!tfms)
+               return;
+
+       for_each_possible_cpu(cpu) {
+               struct crypto_comp *tfm = *per_cpu_ptr(tfms, cpu);
+               crypto_free_comp(tfm);
+       }
+       free_percpu(tfms);
+}
+
+static struct crypto_comp **ipcomp6_alloc_tfms(const char *alg_name)
+{
+       struct ipcomp6_tfms *pos;
+       struct crypto_comp **tfms;
+       int cpu;
+
+       /* This can be any valid CPU ID so we don't need locking. */
+       cpu = raw_smp_processor_id();
+
+       list_for_each_entry(pos, &ipcomp6_tfms_list, list) {
+               struct crypto_comp *tfm;
+
+               tfms = pos->tfms;
+               tfm = *per_cpu_ptr(tfms, cpu);
+
+               if (!strcmp(crypto_comp_name(tfm), alg_name)) {
+                       pos->users++;
+                       return tfms;
+               }
+       }
+
+       pos = kmalloc(sizeof(*pos), GFP_KERNEL);
+       if (!pos)
+               return NULL;
+
+       pos->users = 1;
+       INIT_LIST_HEAD(&pos->list);
+       list_add(&pos->list, &ipcomp6_tfms_list);
+
+       pos->tfms = tfms = alloc_percpu(struct crypto_comp *);
+       if (!tfms)
+               goto error;
+
+       for_each_possible_cpu(cpu) {
+               struct crypto_comp *tfm = crypto_alloc_comp(alg_name, 0,
+                                                           CRYPTO_ALG_ASYNC);
+               if (!tfm)
+                       goto error;
+               *per_cpu_ptr(tfms, cpu) = tfm;
+       }
+
+       return tfms;
+
+error:
+       ipcomp6_free_tfms(tfms);
+       return NULL;
+}
+
 static void ipcomp6_free_data(struct ipcomp_data *ipcd)
 {
-       if (ipcd->tfm)
-               crypto_free_tfm(ipcd->tfm);
-       if (ipcd->scratch)
-               kfree(ipcd->scratch);
+       if (ipcd->tfms)
+               ipcomp6_free_tfms(ipcd->tfms);
+       ipcomp6_free_scratches();
 }
 
 static void ipcomp6_destroy(struct xfrm_state *x)
@@ -271,11 +392,16 @@ static void ipcomp6_destroy(struct xfrm_state *x)
        struct ipcomp_data *ipcd = x->data;
        if (!ipcd)
                return;
+       xfrm_state_delete_tunnel(x);
+       mutex_lock(&ipcomp6_resource_mutex);
        ipcomp6_free_data(ipcd);
+       mutex_unlock(&ipcomp6_resource_mutex);
        kfree(ipcd);
+
+       xfrm6_tunnel_free_spi((xfrm_address_t *)&x->props.saddr);
 }
 
-static int ipcomp6_init_state(struct xfrm_state *x, void *args)
+static int ipcomp6_init_state(struct xfrm_state *x)
 {
        int err;
        struct ipcomp_data *ipcd;
@@ -285,36 +411,46 @@ static int ipcomp6_init_state(struct xfrm_state *x, void *args)
        if (!x->calg)
                goto out;
 
+       if (x->encap)
+               goto out;
+
        err = -ENOMEM;
-       ipcd = kmalloc(sizeof(*ipcd), GFP_KERNEL);
+       ipcd = kzalloc(sizeof(*ipcd), GFP_KERNEL);
        if (!ipcd)
-               goto error;
+               goto out;
 
-       memset(ipcd, 0, sizeof(*ipcd));
-       x->props.header_len = sizeof(struct ipv6_comp_hdr);
-       if (x->props.mode)
+       x->props.header_len = 0;
+       if (x->props.mode == XFRM_MODE_TUNNEL)
                x->props.header_len += sizeof(struct ipv6hdr);
        
-       ipcd->scratch = kmalloc(IPCOMP_SCRATCH_SIZE, GFP_KERNEL);
-       if (!ipcd->scratch)
+       mutex_lock(&ipcomp6_resource_mutex);
+       if (!ipcomp6_alloc_scratches())
                goto error;
 
-       ipcd->tfm = crypto_alloc_tfm(x->calg->alg_name, 0);
-       if (!ipcd->tfm)
+       ipcd->tfms = ipcomp6_alloc_tfms(x->calg->alg_name);
+       if (!ipcd->tfms)
                goto error;
+       mutex_unlock(&ipcomp6_resource_mutex);
+
+       if (x->props.mode == XFRM_MODE_TUNNEL) {
+               err = ipcomp6_tunnel_attach(x);
+               if (err)
+                       goto error_tunnel;
+       }
 
-       calg_desc = xfrm_calg_get_byname(x->calg->alg_name);
+       calg_desc = xfrm_calg_get_byname(x->calg->alg_name, 0);
        BUG_ON(!calg_desc);
        ipcd->threshold = calg_desc->uinfo.comp.threshold;
        x->data = ipcd;
        err = 0;
 out:
        return err;
+error_tunnel:
+       mutex_lock(&ipcomp6_resource_mutex);
 error:
-       if (ipcd) {
-               ipcomp6_free_data(ipcd);
-               kfree(ipcd);
-       }
+       ipcomp6_free_data(ipcd);
+       mutex_unlock(&ipcomp6_resource_mutex);
+       kfree(ipcd);
 
        goto out;
 }
@@ -328,6 +464,7 @@ static struct xfrm_type ipcomp6_type =
        .destructor     = ipcomp6_destroy,
        .input          = ipcomp6_input,
        .output         = ipcomp6_output,
+       .hdr_offset     = xfrm6_find_1stfragopt,
 };
 
 static struct inet6_protocol ipcomp6_protocol =