fedora core 6 1.2949 + vserver 2.2.0
[linux-2.6.git] / net / ipv4 / ipcomp.c
index 2a5a7df..3839b70 100644 (file)
  *   - Compression stats.
  *   - Adaptive compression.
  */
-#include <linux/config.h>
 #include <linux/module.h>
 #include <asm/scatterlist.h>
+#include <asm/semaphore.h>
 #include <linux/crypto.h>
 #include <linux/pfkeyv2.h>
-#include <net/inet_ecn.h>
+#include <linux/percpu.h>
+#include <linux/smp.h>
+#include <linux/list.h>
+#include <linux/vmalloc.h>
+#include <linux/rtnetlink.h>
+#include <linux/mutex.h>
 #include <net/ip.h>
 #include <net/xfrm.h>
 #include <net/icmp.h>
 #include <net/ipcomp.h>
+#include <net/protocol.h>
+
+struct ipcomp_tfms {
+       struct list_head list;
+       struct crypto_comp **tfms;
+       int users;
+};
+
+static DEFINE_MUTEX(ipcomp_resource_mutex);
+static void **ipcomp_scratches;
+static int ipcomp_scratch_users;
+static LIST_HEAD(ipcomp_tfms_list);
 
 static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb)
 {
        int err, plen, dlen;
-       struct iphdr *iph;
        struct ipcomp_data *ipcd = x->data;
-       u8 *start, *scratch = ipcd->scratch;
+       u8 *start, *scratch;
+       struct crypto_comp *tfm;
+       int cpu;
        
        plen = skb->len;
        dlen = IPCOMP_SCRATCH_SIZE;
        start = skb->data;
 
-       err = crypto_comp_decompress(ipcd->tfm, start, plen, scratch, &dlen);
+       cpu = get_cpu();
+       scratch = *per_cpu_ptr(ipcomp_scratches, cpu);
+       tfm = *per_cpu_ptr(ipcd->tfms, cpu);
+
+       err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen);
        if (err)
                goto out;
 
@@ -48,45 +70,31 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb)
        if (err)
                goto out;
                
-       skb_put(skb, dlen - plen);
+       skb->truesize += dlen - plen;
+       __skb_put(skb, dlen - plen);
        memcpy(skb->data, scratch, dlen);
-       iph = skb->nh.iph;
-       iph->tot_len = htons(dlen + iph->ihl * 4);
 out:   
+       put_cpu();
        return err;
 }
 
-static int ipcomp_input(struct xfrm_state *x,
-                        struct xfrm_decap_state *decap, struct sk_buff *skb)
+static int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb)
 {
-       u8 nexthdr;
-       int err = 0;
+       int err = -ENOMEM;
        struct iphdr *iph;
-       union {
-               struct iphdr    iph;
-               char            buf[60];
-       } tmp_iph;
-
+       struct ip_comp_hdr *ipch;
 
-       if ((skb_is_nonlinear(skb) || skb_cloned(skb)) &&
-           skb_linearize(skb, GFP_ATOMIC) != 0) {
-               err = -ENOMEM;
+       if (skb_linearize_cow(skb))
                goto out;
-       }
 
        skb->ip_summed = CHECKSUM_NONE;
 
        /* Remove ipcomp header and decompress original payload */      
        iph = skb->nh.iph;
-       memcpy(&tmp_iph, iph, iph->ihl * 4);
-       nexthdr = *(u8 *)skb->data;
-       skb_pull(skb, sizeof(struct ip_comp_hdr));
-       skb->nh.raw += sizeof(struct ip_comp_hdr);
-       memcpy(skb->nh.raw, &tmp_iph, tmp_iph.iph.ihl * 4);
-       iph = skb->nh.iph;
-       iph->tot_len = htons(ntohs(iph->tot_len) - sizeof(struct ip_comp_hdr));
-       iph->protocol = nexthdr;
-       skb->h.raw = skb->data;
+       ipch = (void *)skb->data;
+       iph->protocol = ipch->nexthdr;
+       skb->h.raw = skb->nh.raw + sizeof(*ipch);
+       __skb_pull(skb, sizeof(*ipch));
        err = ipcomp_decompress(x, skb);
 
 out:   
@@ -98,14 +106,20 @@ static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb)
        int err, plen, dlen, ihlen;
        struct iphdr *iph = skb->nh.iph;
        struct ipcomp_data *ipcd = x->data;
-       u8 *start, *scratch = ipcd->scratch;
+       u8 *start, *scratch;
+       struct crypto_comp *tfm;
+       int cpu;
        
        ihlen = iph->ihl * 4;
        plen = skb->len - ihlen;
        dlen = IPCOMP_SCRATCH_SIZE;
        start = skb->data + ihlen;
 
-       err = crypto_comp_compress(ipcd->tfm, start, plen, scratch, &dlen);
+       cpu = get_cpu();
+       scratch = *per_cpu_ptr(ipcomp_scratches, cpu);
+       tfm = *per_cpu_ptr(ipcd->tfms, cpu);
+
+       err = crypto_comp_compress(tfm, start, plen, scratch, &dlen);
        if (err)
                goto out;
 
@@ -114,137 +128,62 @@ static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb)
                goto out;
        }
        
-       memcpy(start, scratch, dlen);
-       pskb_trim(skb, ihlen + dlen);
+       memcpy(start + sizeof(struct ip_comp_hdr), scratch, dlen);
+       put_cpu();
+
+       pskb_trim(skb, ihlen + dlen + sizeof(struct ip_comp_hdr));
+       return 0;
        
 out:   
+       put_cpu();
        return err;
 }
 
-static void ipcomp_tunnel_encap(struct xfrm_state *x, struct sk_buff *skb)
-{
-       struct dst_entry *dst = skb->dst;
-       struct iphdr *iph, *top_iph;
-
-       iph = skb->nh.iph;
-       top_iph = (struct iphdr *)skb_push(skb, sizeof(struct iphdr));
-       top_iph->ihl = 5;
-       top_iph->version = 4;
-       top_iph->tos = iph->tos;
-       top_iph->tot_len = htons(skb->len);
-       if (!(iph->frag_off&htons(IP_DF)))
-               __ip_select_ident(top_iph, dst, 0);
-       top_iph->ttl = iph->ttl;
-       top_iph->check = 0;
-       top_iph->saddr = x->props.saddr.a4;
-       top_iph->daddr = x->id.daddr.a4;
-       top_iph->frag_off = iph->frag_off&~htons(IP_MF|IP_OFFSET);
-       memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
-       skb->nh.raw = skb->data;
-}
-
-static int ipcomp_output(struct sk_buff *skb)
+static int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb)
 {
        int err;
-       struct dst_entry *dst = skb->dst;
-       struct xfrm_state *x = dst->xfrm;
-       struct iphdr *iph, *top_iph;
+       struct iphdr *iph;
        struct ip_comp_hdr *ipch;
        struct ipcomp_data *ipcd = x->data;
-       union {
-               struct iphdr    iph;
-               char            buf[60];
-       } tmp_iph;
        int hdr_len = 0;
 
-       if (skb->ip_summed == CHECKSUM_HW && skb_checksum_help(skb) == NULL) {
-               err = -EINVAL;
-               goto error_nolock;
-       }
-
-       spin_lock_bh(&x->lock);
-       err = xfrm_check_output(x, skb, AF_INET);
-       if (err)
-               goto error;
-
-       /* Don't bother compressing */
-       if (!x->props.mode) {
-               iph = skb->nh.iph;
-               hdr_len = iph->ihl * 4;
-       }
+       iph = skb->nh.iph;
+       iph->tot_len = htons(skb->len);
+       hdr_len = iph->ihl * 4;
        if ((skb->len - hdr_len) < ipcd->threshold) {
-               if (x->props.mode) {
-                       ipcomp_tunnel_encap(x, skb);
-                       iph = skb->nh.iph;
-                       iph->protocol = IPPROTO_IPIP;
-                       ip_send_check(iph);
-               }
+               /* Don't bother compressing */
                goto out_ok;
        }
 
-       if (x->props.mode) 
-               ipcomp_tunnel_encap(x, skb);
-
-       if ((skb_is_nonlinear(skb) || skb_cloned(skb)) &&
-           skb_linearize(skb, GFP_ATOMIC) != 0) {
-               err = -ENOMEM;
-               goto error;
-       }
+       if (skb_linearize_cow(skb))
+               goto out_ok;
        
        err = ipcomp_compress(x, skb);
+       iph = skb->nh.iph;
+
        if (err) {
-               if (err == -EMSGSIZE) {
-                       if (x->props.mode) {
-                               iph = skb->nh.iph;
-                               iph->protocol = IPPROTO_IPIP;
-                               ip_send_check(iph);
-                       }
-                       goto out_ok;
-               }
-               goto error;
+               goto out_ok;
        }
 
        /* Install ipcomp header, convert into ipcomp datagram. */
-       iph = skb->nh.iph;
-       memcpy(&tmp_iph, iph, iph->ihl * 4);
-       top_iph = (struct iphdr *)skb_push(skb, sizeof(struct ip_comp_hdr));
-       memcpy(top_iph, &tmp_iph, iph->ihl * 4);
-       iph = top_iph;
-       if (x->props.mode && (x->props.flags & XFRM_STATE_NOECN))
-               IP_ECN_clear(iph);
        iph->tot_len = htons(skb->len);
-       iph->protocol = IPPROTO_COMP;
-       iph->check = 0;
        ipch = (struct ip_comp_hdr *)((char *)iph + iph->ihl * 4);
-       ipch->nexthdr = x->props.mode ? IPPROTO_IPIP : tmp_iph.iph.protocol;
+       ipch->nexthdr = iph->protocol;
        ipch->flags = 0;
        ipch->cpi = htons((u16 )ntohl(x->id.spi));
+       iph->protocol = IPPROTO_COMP;
        ip_send_check(iph);
-       skb->nh.raw = skb->data;
+       return 0;
 
 out_ok:
-       x->curlft.bytes += skb->len;
-       x->curlft.packets++;
-       spin_unlock_bh(&x->lock);
-       
-       if ((skb->dst = dst_pop(dst)) == NULL) {
-               err = -EHOSTUNREACH;
-               goto error_nolock;
-       }
-       err = NET_XMIT_BYPASS;
-
-out_exit:
-       return err;
-error:
-       spin_unlock_bh(&x->lock);
-error_nolock:
-       kfree_skb(skb);
-       goto out_exit;
+       if (x->props.mode == XFRM_MODE_TUNNEL)
+               ip_send_check(iph);
+       return 0;
 }
 
 static void ipcomp4_err(struct sk_buff *skb, u32 info)
 {
-       u32 spi;
+       __be32 spi;
        struct iphdr *iph = (struct iphdr *)skb->data;
        struct ip_comp_hdr *ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2));
        struct xfrm_state *x;
@@ -253,13 +192,13 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info)
            skb->h.icmph->code != ICMP_FRAG_NEEDED)
                return;
 
-       spi = ntohl(ntohs(ipch->cpi));
+       spi = htonl(ntohs(ipch->cpi));
        x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr,
                              spi, IPPROTO_COMP, AF_INET);
        if (!x)
                return;
-       NETDEBUG(printk(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%u.%u.%u.%u\n",
-              spi, NIPQUAD(iph->daddr)));
+       NETDEBUG(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%u.%u.%u.%u\n",
+                spi, NIPQUAD(iph->daddr));
        xfrm_state_put(x);
 }
 
@@ -267,6 +206,7 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info)
 static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
 {
        struct xfrm_state *t;
+       u8 mode = XFRM_MODE_TUNNEL;
        
        t = xfrm_state_alloc();
        if (t == NULL)
@@ -277,18 +217,15 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
        t->id.daddr.a4 = x->id.daddr.a4;
        memcpy(&t->sel, &x->sel, sizeof(t->sel));
        t->props.family = AF_INET;
-       t->props.mode = 1;
+       if (x->props.mode == XFRM_MODE_BEET)
+               mode = x->props.mode;
+       t->props.mode = mode;
        t->props.saddr.a4 = x->props.saddr.a4;
        t->props.flags = x->props.flags;
-       
-       t->type = xfrm_get_type(IPPROTO_IPIP, t->props.family);
-       if (t->type == NULL)
-               goto error;
-               
-       if (t->type->init_state(t, NULL))
+
+       if (xfrm_init_state(t))
                goto error;
 
-       t->km.state = XFRM_STATE_VALID;
        atomic_set(&t->tunnel_users, 1);
 out:
        return t;
@@ -301,7 +238,7 @@ error:
 }
 
 /*
- * Must be protected by xfrm_cfg_sem.  State and tunnel user references are
+ * Must be protected by xfrm_cfg_mutex.  State and tunnel user references are
  * always incremented on success.
  */
 static int ipcomp_tunnel_attach(struct xfrm_state *x)
@@ -326,12 +263,129 @@ out:
        return err;
 }
 
+static void ipcomp_free_scratches(void)
+{
+       int i;
+       void **scratches;
+
+       if (--ipcomp_scratch_users)
+               return;
+
+       scratches = ipcomp_scratches;
+       if (!scratches)
+               return;
+
+       for_each_possible_cpu(i)
+               vfree(*per_cpu_ptr(scratches, i));
+
+       free_percpu(scratches);
+}
+
+static void **ipcomp_alloc_scratches(void)
+{
+       int i;
+       void **scratches;
+
+       if (ipcomp_scratch_users++)
+               return ipcomp_scratches;
+
+       scratches = alloc_percpu(void *);
+       if (!scratches)
+               return NULL;
+
+       ipcomp_scratches = scratches;
+
+       for_each_possible_cpu(i) {
+               void *scratch = vmalloc(IPCOMP_SCRATCH_SIZE);
+               if (!scratch)
+                       return NULL;
+               *per_cpu_ptr(scratches, i) = scratch;
+       }
+
+       return scratches;
+}
+
+static void ipcomp_free_tfms(struct crypto_comp **tfms)
+{
+       struct ipcomp_tfms *pos;
+       int cpu;
+
+       list_for_each_entry(pos, &ipcomp_tfms_list, list) {
+               if (pos->tfms == tfms)
+                       break;
+       }
+
+       BUG_TRAP(pos);
+
+       if (--pos->users)
+               return;
+
+       list_del(&pos->list);
+       kfree(pos);
+
+       if (!tfms)
+               return;
+
+       for_each_possible_cpu(cpu) {
+               struct crypto_comp *tfm = *per_cpu_ptr(tfms, cpu);
+               crypto_free_comp(tfm);
+       }
+       free_percpu(tfms);
+}
+
+static struct crypto_comp **ipcomp_alloc_tfms(const char *alg_name)
+{
+       struct ipcomp_tfms *pos;
+       struct crypto_comp **tfms;
+       int cpu;
+
+       /* This can be any valid CPU ID so we don't need locking. */
+       cpu = raw_smp_processor_id();
+
+       list_for_each_entry(pos, &ipcomp_tfms_list, list) {
+               struct crypto_comp *tfm;
+
+               tfms = pos->tfms;
+               tfm = *per_cpu_ptr(tfms, cpu);
+
+               if (!strcmp(crypto_comp_name(tfm), alg_name)) {
+                       pos->users++;
+                       return tfms;
+               }
+       }
+
+       pos = kmalloc(sizeof(*pos), GFP_KERNEL);
+       if (!pos)
+               return NULL;
+
+       pos->users = 1;
+       INIT_LIST_HEAD(&pos->list);
+       list_add(&pos->list, &ipcomp_tfms_list);
+
+       pos->tfms = tfms = alloc_percpu(struct crypto_comp *);
+       if (!tfms)
+               goto error;
+
+       for_each_possible_cpu(cpu) {
+               struct crypto_comp *tfm = crypto_alloc_comp(alg_name, 0,
+                                                           CRYPTO_ALG_ASYNC);
+               if (!tfm)
+                       goto error;
+               *per_cpu_ptr(tfms, cpu) = tfm;
+       }
+
+       return tfms;
+
+error:
+       ipcomp_free_tfms(tfms);
+       return NULL;
+}
+
 static void ipcomp_free_data(struct ipcomp_data *ipcd)
 {
-       if (ipcd->tfm)
-               crypto_free_tfm(ipcd->tfm);
-       if (ipcd->scratch)
-               kfree(ipcd->scratch);   
+       if (ipcd->tfms)
+               ipcomp_free_tfms(ipcd->tfms);
+       ipcomp_free_scratches();
 }
 
 static void ipcomp_destroy(struct xfrm_state *x)
@@ -339,11 +393,14 @@ static void ipcomp_destroy(struct xfrm_state *x)
        struct ipcomp_data *ipcd = x->data;
        if (!ipcd)
                return;
+       xfrm_state_delete_tunnel(x);
+       mutex_lock(&ipcomp_resource_mutex);
        ipcomp_free_data(ipcd);
+       mutex_unlock(&ipcomp_resource_mutex);
        kfree(ipcd);
 }
 
-static int ipcomp_init_state(struct xfrm_state *x, void *args)
+static int ipcomp_init_state(struct xfrm_state *x)
 {
        int err;
        struct ipcomp_data *ipcd;
@@ -353,31 +410,34 @@ static int ipcomp_init_state(struct xfrm_state *x, void *args)
        if (!x->calg)
                goto out;
 
+       if (x->encap)
+               goto out;
+
        err = -ENOMEM;
-       ipcd = kmalloc(sizeof(*ipcd), GFP_KERNEL);
+       ipcd = kzalloc(sizeof(*ipcd), GFP_KERNEL);
        if (!ipcd)
-               goto error;
+               goto out;
 
-       memset(ipcd, 0, sizeof(*ipcd));
-       x->props.header_len = sizeof(struct ip_comp_hdr);
-       if (x->props.mode)
+       x->props.header_len = 0;
+       if (x->props.mode == XFRM_MODE_TUNNEL)
                x->props.header_len += sizeof(struct iphdr);
 
-       ipcd->scratch = kmalloc(IPCOMP_SCRATCH_SIZE, GFP_KERNEL);
-       if (!ipcd->scratch)
+       mutex_lock(&ipcomp_resource_mutex);
+       if (!ipcomp_alloc_scratches())
                goto error;
-       
-       ipcd->tfm = crypto_alloc_tfm(x->calg->alg_name, 0);
-       if (!ipcd->tfm)
+
+       ipcd->tfms = ipcomp_alloc_tfms(x->calg->alg_name);
+       if (!ipcd->tfms)
                goto error;
+       mutex_unlock(&ipcomp_resource_mutex);
 
-       if (x->props.mode) {
+       if (x->props.mode == XFRM_MODE_TUNNEL) {
                err = ipcomp_tunnel_attach(x);
                if (err)
-                       goto error;
+                       goto error_tunnel;
        }
 
-       calg_desc = xfrm_calg_get_byname(x->calg->alg_name);
+       calg_desc = xfrm_calg_get_byname(x->calg->alg_name, 0);
        BUG_ON(!calg_desc);
        ipcd->threshold = calg_desc->uinfo.comp.threshold;
        x->data = ipcd;
@@ -385,11 +445,12 @@ static int ipcomp_init_state(struct xfrm_state *x, void *args)
 out:
        return err;
 
+error_tunnel:
+       mutex_lock(&ipcomp_resource_mutex);
 error:
-       if (ipcd) {
-               ipcomp_free_data(ipcd);
-               kfree(ipcd);
-       }
+       ipcomp_free_data(ipcd);
+       mutex_unlock(&ipcomp_resource_mutex);
+       kfree(ipcd);
        goto out;
 }
 
@@ -403,7 +464,7 @@ static struct xfrm_type ipcomp_type = {
        .output         = ipcomp_output
 };
 
-static struct inet_protocol ipcomp4_protocol = {
+static struct net_protocol ipcomp4_protocol = {
        .handler        =       xfrm4_rcv,
        .err_handler    =       ipcomp4_err,
        .no_policy      =       1,