vserver 1.9.5.x5
[linux-2.6.git] / net / ipv4 / ipcomp.c
1 /*
2  * IP Payload Compression Protocol (IPComp) - RFC3173.
3  *
4  * Copyright (c) 2003 James Morris <jmorris@intercode.com.au>
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License as published by the Free
8  * Software Foundation; either version 2 of the License, or (at your option) 
9  * any later version.
10  *
11  * Todo:
12  *   - Tunable compression parameters.
13  *   - Compression stats.
14  *   - Adaptive compression.
15  */
16 #include <linux/config.h>
17 #include <linux/module.h>
18 #include <asm/scatterlist.h>
19 #include <asm/semaphore.h>
20 #include <linux/crypto.h>
21 #include <linux/pfkeyv2.h>
22 #include <linux/percpu.h>
23 #include <linux/smp.h>
24 #include <linux/list.h>
25 #include <linux/vmalloc.h>
26 #include <linux/rtnetlink.h>
27 #include <net/ip.h>
28 #include <net/xfrm.h>
29 #include <net/icmp.h>
30 #include <net/ipcomp.h>
31
32 struct ipcomp_tfms {
33         struct list_head list;
34         struct crypto_tfm **tfms;
35         int users;
36 };
37
38 static DECLARE_MUTEX(ipcomp_resource_sem);
39 static void **ipcomp_scratches;
40 static int ipcomp_scratch_users;
41 static LIST_HEAD(ipcomp_tfms_list);
42
43 static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb)
44 {
45         int err, plen, dlen;
46         struct iphdr *iph;
47         struct ipcomp_data *ipcd = x->data;
48         u8 *start, *scratch;
49         struct crypto_tfm *tfm;
50         int cpu;
51         
52         plen = skb->len;
53         dlen = IPCOMP_SCRATCH_SIZE;
54         start = skb->data;
55
56         cpu = get_cpu();
57         scratch = *per_cpu_ptr(ipcomp_scratches, cpu);
58         tfm = *per_cpu_ptr(ipcd->tfms, cpu);
59
60         err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen);
61         if (err)
62                 goto out;
63
64         if (dlen < (plen + sizeof(struct ip_comp_hdr))) {
65                 err = -EINVAL;
66                 goto out;
67         }
68
69         err = pskb_expand_head(skb, 0, dlen - plen, GFP_ATOMIC);
70         if (err)
71                 goto out;
72                 
73         skb_put(skb, dlen - plen);
74         memcpy(skb->data, scratch, dlen);
75         iph = skb->nh.iph;
76         iph->tot_len = htons(dlen + iph->ihl * 4);
77 out:    
78         put_cpu();
79         return err;
80 }
81
82 static int ipcomp_input(struct xfrm_state *x,
83                         struct xfrm_decap_state *decap, struct sk_buff *skb)
84 {
85         u8 nexthdr;
86         int err = 0;
87         struct iphdr *iph;
88         union {
89                 struct iphdr    iph;
90                 char            buf[60];
91         } tmp_iph;
92
93
94         if ((skb_is_nonlinear(skb) || skb_cloned(skb)) &&
95             skb_linearize(skb, GFP_ATOMIC) != 0) {
96                 err = -ENOMEM;
97                 goto out;
98         }
99
100         skb->ip_summed = CHECKSUM_NONE;
101
102         /* Remove ipcomp header and decompress original payload */      
103         iph = skb->nh.iph;
104         memcpy(&tmp_iph, iph, iph->ihl * 4);
105         nexthdr = *(u8 *)skb->data;
106         skb_pull(skb, sizeof(struct ip_comp_hdr));
107         skb->nh.raw += sizeof(struct ip_comp_hdr);
108         memcpy(skb->nh.raw, &tmp_iph, tmp_iph.iph.ihl * 4);
109         iph = skb->nh.iph;
110         iph->tot_len = htons(ntohs(iph->tot_len) - sizeof(struct ip_comp_hdr));
111         iph->protocol = nexthdr;
112         skb->h.raw = skb->data;
113         err = ipcomp_decompress(x, skb);
114
115 out:    
116         return err;
117 }
118
119 static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb)
120 {
121         int err, plen, dlen, ihlen;
122         struct iphdr *iph = skb->nh.iph;
123         struct ipcomp_data *ipcd = x->data;
124         u8 *start, *scratch;
125         struct crypto_tfm *tfm;
126         int cpu;
127         
128         ihlen = iph->ihl * 4;
129         plen = skb->len - ihlen;
130         dlen = IPCOMP_SCRATCH_SIZE;
131         start = skb->data + ihlen;
132
133         cpu = get_cpu();
134         scratch = *per_cpu_ptr(ipcomp_scratches, cpu);
135         tfm = *per_cpu_ptr(ipcd->tfms, cpu);
136
137         err = crypto_comp_compress(tfm, start, plen, scratch, &dlen);
138         if (err)
139                 goto out;
140
141         if ((dlen + sizeof(struct ip_comp_hdr)) >= plen) {
142                 err = -EMSGSIZE;
143                 goto out;
144         }
145         
146         memcpy(start + sizeof(struct ip_comp_hdr), scratch, dlen);
147         put_cpu();
148
149         pskb_trim(skb, ihlen + dlen + sizeof(struct ip_comp_hdr));
150         return 0;
151         
152 out:    
153         put_cpu();
154         return err;
155 }
156
157 static int ipcomp_output(struct sk_buff *skb)
158 {
159         int err;
160         struct dst_entry *dst = skb->dst;
161         struct xfrm_state *x = dst->xfrm;
162         struct iphdr *iph;
163         struct ip_comp_hdr *ipch;
164         struct ipcomp_data *ipcd = x->data;
165         int hdr_len = 0;
166
167         iph = skb->nh.iph;
168         iph->tot_len = htons(skb->len);
169         hdr_len = iph->ihl * 4;
170         if ((skb->len - hdr_len) < ipcd->threshold) {
171                 /* Don't bother compressing */
172                 if (x->props.mode) {
173                         ip_send_check(iph);
174                 }
175                 goto out_ok;
176         }
177
178         if ((skb_is_nonlinear(skb) || skb_cloned(skb)) &&
179             skb_linearize(skb, GFP_ATOMIC) != 0) {
180                 err = -ENOMEM;
181                 goto error;
182         }
183         
184         err = ipcomp_compress(x, skb);
185         if (err) {
186                 if (err == -EMSGSIZE) {
187                         if (x->props.mode) {
188                                 iph = skb->nh.iph;
189                                 ip_send_check(iph);
190                         }
191                         goto out_ok;
192                 }
193                 goto error;
194         }
195
196         /* Install ipcomp header, convert into ipcomp datagram. */
197         iph = skb->nh.iph;
198         iph->tot_len = htons(skb->len);
199         ipch = (struct ip_comp_hdr *)((char *)iph + iph->ihl * 4);
200         ipch->nexthdr = iph->protocol;
201         ipch->flags = 0;
202         ipch->cpi = htons((u16 )ntohl(x->id.spi));
203         iph->protocol = IPPROTO_COMP;
204         ip_send_check(iph);
205
206 out_ok:
207         err = 0;
208
209 error:
210         return err;
211 }
212
213 static void ipcomp4_err(struct sk_buff *skb, u32 info)
214 {
215         u32 spi;
216         struct iphdr *iph = (struct iphdr *)skb->data;
217         struct ip_comp_hdr *ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2));
218         struct xfrm_state *x;
219
220         if (skb->h.icmph->type != ICMP_DEST_UNREACH ||
221             skb->h.icmph->code != ICMP_FRAG_NEEDED)
222                 return;
223
224         spi = ntohl(ntohs(ipch->cpi));
225         x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr,
226                               spi, IPPROTO_COMP, AF_INET);
227         if (!x)
228                 return;
229         NETDEBUG(printk(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%u.%u.%u.%u\n",
230                spi, NIPQUAD(iph->daddr)));
231         xfrm_state_put(x);
232 }
233
234 /* We always hold one tunnel user reference to indicate a tunnel */ 
235 static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
236 {
237         struct xfrm_state *t;
238         
239         t = xfrm_state_alloc();
240         if (t == NULL)
241                 goto out;
242
243         t->id.proto = IPPROTO_IPIP;
244         t->id.spi = x->props.saddr.a4;
245         t->id.daddr.a4 = x->id.daddr.a4;
246         memcpy(&t->sel, &x->sel, sizeof(t->sel));
247         t->props.family = AF_INET;
248         t->props.mode = 1;
249         t->props.saddr.a4 = x->props.saddr.a4;
250         t->props.flags = x->props.flags;
251         
252         t->type = xfrm_get_type(IPPROTO_IPIP, t->props.family);
253         if (t->type == NULL)
254                 goto error;
255                 
256         if (t->type->init_state(t, NULL))
257                 goto error;
258
259         t->km.state = XFRM_STATE_VALID;
260         atomic_set(&t->tunnel_users, 1);
261 out:
262         return t;
263
264 error:
265         t->km.state = XFRM_STATE_DEAD;
266         xfrm_state_put(t);
267         t = NULL;
268         goto out;
269 }
270
271 /*
272  * Must be protected by xfrm_cfg_sem.  State and tunnel user references are
273  * always incremented on success.
274  */
275 static int ipcomp_tunnel_attach(struct xfrm_state *x)
276 {
277         int err = 0;
278         struct xfrm_state *t;
279
280         t = xfrm_state_lookup((xfrm_address_t *)&x->id.daddr.a4,
281                               x->props.saddr.a4, IPPROTO_IPIP, AF_INET);
282         if (!t) {
283                 t = ipcomp_tunnel_create(x);
284                 if (!t) {
285                         err = -EINVAL;
286                         goto out;
287                 }
288                 xfrm_state_insert(t);
289                 xfrm_state_hold(t);
290         }
291         x->tunnel = t;
292         atomic_inc(&t->tunnel_users);
293 out:
294         return err;
295 }
296
297 static void ipcomp_free_scratches(void)
298 {
299         int i;
300         void **scratches;
301
302         if (--ipcomp_scratch_users)
303                 return;
304
305         scratches = ipcomp_scratches;
306         if (!scratches)
307                 return;
308
309         for_each_cpu(i) {
310                 void *scratch = *per_cpu_ptr(scratches, i);
311                 if (scratch)
312                         vfree(scratch);
313         }
314
315         free_percpu(scratches);
316 }
317
318 static void **ipcomp_alloc_scratches(void)
319 {
320         int i;
321         void **scratches;
322
323         if (ipcomp_scratch_users++)
324                 return ipcomp_scratches;
325
326         scratches = alloc_percpu(void *);
327         if (!scratches)
328                 return NULL;
329
330         ipcomp_scratches = scratches;
331
332         for_each_cpu(i) {
333                 void *scratch = vmalloc(IPCOMP_SCRATCH_SIZE);
334                 if (!scratch)
335                         return NULL;
336                 *per_cpu_ptr(scratches, i) = scratch;
337         }
338
339         return scratches;
340 }
341
342 static void ipcomp_free_tfms(struct crypto_tfm **tfms)
343 {
344         struct ipcomp_tfms *pos;
345         int cpu;
346
347         list_for_each_entry(pos, &ipcomp_tfms_list, list) {
348                 if (pos->tfms == tfms)
349                         break;
350         }
351
352         BUG_TRAP(pos);
353
354         if (--pos->users)
355                 return;
356
357         list_del(&pos->list);
358         kfree(pos);
359
360         if (!tfms)
361                 return;
362
363         for_each_cpu(cpu) {
364                 struct crypto_tfm *tfm = *per_cpu_ptr(tfms, cpu);
365                 if (tfm)
366                         crypto_free_tfm(tfm);
367         }
368         free_percpu(tfms);
369 }
370
371 static struct crypto_tfm **ipcomp_alloc_tfms(const char *alg_name)
372 {
373         struct ipcomp_tfms *pos;
374         struct crypto_tfm **tfms;
375         int cpu;
376
377         /* This can be any valid CPU ID so we don't need locking. */
378         cpu = smp_processor_id();
379
380         list_for_each_entry(pos, &ipcomp_tfms_list, list) {
381                 struct crypto_tfm *tfm;
382
383                 tfms = pos->tfms;
384                 tfm = *per_cpu_ptr(tfms, cpu);
385
386                 if (!strcmp(crypto_tfm_alg_name(tfm), alg_name)) {
387                         pos->users++;
388                         return tfms;
389                 }
390         }
391
392         pos = kmalloc(sizeof(*pos), GFP_KERNEL);
393         if (!pos)
394                 return NULL;
395
396         pos->users = 1;
397         INIT_LIST_HEAD(&pos->list);
398         list_add(&pos->list, &ipcomp_tfms_list);
399
400         pos->tfms = tfms = alloc_percpu(struct crypto_tfm *);
401         if (!tfms)
402                 goto error;
403
404         for_each_cpu(cpu) {
405                 struct crypto_tfm *tfm = crypto_alloc_tfm(alg_name, 0);
406                 if (!tfm)
407                         goto error;
408                 *per_cpu_ptr(tfms, cpu) = tfm;
409         }
410
411         return tfms;
412
413 error:
414         ipcomp_free_tfms(tfms);
415         return NULL;
416 }
417
418 static void ipcomp_free_data(struct ipcomp_data *ipcd)
419 {
420         if (ipcd->tfms)
421                 ipcomp_free_tfms(ipcd->tfms);
422         ipcomp_free_scratches();
423 }
424
425 static void ipcomp_destroy(struct xfrm_state *x)
426 {
427         struct ipcomp_data *ipcd = x->data;
428         if (!ipcd)
429                 return;
430         xfrm_state_delete_tunnel(x);
431         down(&ipcomp_resource_sem);
432         ipcomp_free_data(ipcd);
433         up(&ipcomp_resource_sem);
434         kfree(ipcd);
435 }
436
437 static int ipcomp_init_state(struct xfrm_state *x, void *args)
438 {
439         int err;
440         struct ipcomp_data *ipcd;
441         struct xfrm_algo_desc *calg_desc;
442
443         err = -EINVAL;
444         if (!x->calg)
445                 goto out;
446
447         if (x->encap)
448                 goto out;
449
450         err = -ENOMEM;
451         ipcd = kmalloc(sizeof(*ipcd), GFP_KERNEL);
452         if (!ipcd)
453                 goto out;
454
455         memset(ipcd, 0, sizeof(*ipcd));
456         x->props.header_len = 0;
457         if (x->props.mode)
458                 x->props.header_len += sizeof(struct iphdr);
459
460         down(&ipcomp_resource_sem);
461         if (!ipcomp_alloc_scratches())
462                 goto error;
463
464         ipcd->tfms = ipcomp_alloc_tfms(x->calg->alg_name);
465         if (!ipcd->tfms)
466                 goto error;
467         up(&ipcomp_resource_sem);
468
469         if (x->props.mode) {
470                 err = ipcomp_tunnel_attach(x);
471                 if (err)
472                         goto error_tunnel;
473         }
474
475         calg_desc = xfrm_calg_get_byname(x->calg->alg_name, 0);
476         BUG_ON(!calg_desc);
477         ipcd->threshold = calg_desc->uinfo.comp.threshold;
478         x->data = ipcd;
479         err = 0;
480 out:
481         return err;
482
483 error_tunnel:
484         down(&ipcomp_resource_sem);
485 error:
486         ipcomp_free_data(ipcd);
487         up(&ipcomp_resource_sem);
488         kfree(ipcd);
489         goto out;
490 }
491
492 static struct xfrm_type ipcomp_type = {
493         .description    = "IPCOMP4",
494         .owner          = THIS_MODULE,
495         .proto          = IPPROTO_COMP,
496         .init_state     = ipcomp_init_state,
497         .destructor     = ipcomp_destroy,
498         .input          = ipcomp_input,
499         .output         = ipcomp_output
500 };
501
502 static struct net_protocol ipcomp4_protocol = {
503         .handler        =       xfrm4_rcv,
504         .err_handler    =       ipcomp4_err,
505         .no_policy      =       1,
506 };
507
508 static int __init ipcomp4_init(void)
509 {
510         if (xfrm_register_type(&ipcomp_type, AF_INET) < 0) {
511                 printk(KERN_INFO "ipcomp init: can't add xfrm type\n");
512                 return -EAGAIN;
513         }
514         if (inet_add_protocol(&ipcomp4_protocol, IPPROTO_COMP) < 0) {
515                 printk(KERN_INFO "ipcomp init: can't add protocol\n");
516                 xfrm_unregister_type(&ipcomp_type, AF_INET);
517                 return -EAGAIN;
518         }
519         return 0;
520 }
521
522 static void __exit ipcomp4_fini(void)
523 {
524         if (inet_del_protocol(&ipcomp4_protocol, IPPROTO_COMP) < 0)
525                 printk(KERN_INFO "ip ipcomp close: can't remove protocol\n");
526         if (xfrm_unregister_type(&ipcomp_type, AF_INET) < 0)
527                 printk(KERN_INFO "ip ipcomp close: can't remove xfrm type\n");
528 }
529
530 module_init(ipcomp4_init);
531 module_exit(ipcomp4_fini);
532
533 MODULE_LICENSE("GPL");
534 MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp) - RFC3173");
535 MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
536