Preparing this patch for the next series, which upgrades to the latest version of...
[linux-2.6.git] / linux-2.6-525-sknid-elevator.patch
1 diff -Nurb linux-2.6.27-524/include/linux/netdevice.h linux-2.6.27-525/include/linux/netdevice.h
2 --- linux-2.6.27-524/include/linux/netdevice.h  2008-10-09 18:13:53.000000000 -0400
3 +++ linux-2.6.27-525/include/linux/netdevice.h  2009-12-04 16:03:56.000000000 -0500
4 @@ -857,6 +857,7 @@
5  struct packet_type {
6         __be16                  type;   /* This is really htons(ether_type). */
7         struct net_device       *dev;   /* NULL is wildcarded here           */
8 +       unsigned char           sknid_elevator; 
9         int                     (*func) (struct sk_buff *,
10                                          struct net_device *,
11                                          struct packet_type *,
12 diff -Nurb linux-2.6.27-524/net/core/dev.c linux-2.6.27-525/net/core/dev.c
13 --- linux-2.6.27-524/net/core/dev.c     2009-12-04 16:03:48.000000000 -0500
14 +++ linux-2.6.27-525/net/core/dev.c     2009-12-04 16:05:48.000000000 -0500
15 @@ -99,6 +99,8 @@
16  #include <linux/proc_fs.h>
17  #include <linux/seq_file.h>
18  #include <linux/stat.h>
19 +#include <linux/ip.h>
20 +#include <linux/tcp.h>
21  #include <linux/if_bridge.h>
22  #include <linux/if_macvlan.h>
23  #include <net/dst.h>
24 @@ -1318,7 +1320,7 @@
25                 if ((ptype->dev == dev || !ptype->dev) &&
26                     (ptype->af_packet_priv == NULL ||
27                      (struct sock *)ptype->af_packet_priv != skb->sk)) {
28 -                       struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
29 +                       struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
30                         if (!skb2)
31                                 break;
32  
33 @@ -2170,6 +2172,10 @@
34         rcu_read_unlock();
35  }
36  
37 +/* The code already makes the assumption that packet handlers run
38 + * sequentially on the same CPU. -Sapan */
39 +DEFINE_PER_CPU(int, sknid_elevator) = 0;
40 +
41  /**
42   *     netif_receive_skb - process receive buffer from network
43   *     @skb: buffer to process
44 @@ -2191,8 +2197,11 @@
45         struct net_device *orig_dev;
46         struct net_device *null_or_orig;
47         int ret = NET_RX_DROP;
48 +       int *cur_elevator = &__get_cpu_var(sknid_elevator);
49         __be16 type;
50  
51 +       *cur_elevator = 0;
52 +
53         if (skb->vlan_tci && vlan_hwaccel_do_receive(skb))
54                 return NET_RX_SUCCESS;
55  
56 @@ -2272,7 +2281,27 @@
57         }
58  
59         if (pt_prev) {
60 +               /* At this point, cur_elevator may be -2 or a positive value, in
61 +                * case a previous protocol handler marked it */
62 +               if (*cur_elevator) {
63 +                       atomic_inc(&skb->users);
64 +               }
65 +               
66                 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
67 +
68 +               if ((*cur_elevator)>0) {
69 +                       skb->skb_tag = *cur_elevator;
70 +                       list_for_each_entry_rcu(ptype, &ptype_all, list) {
71 +                               if ((!ptype->dev || ptype->dev == skb->dev) && (ptype->sknid_elevator)) {
72 +                                       ret = deliver_skb(skb, ptype, orig_dev);
73 +                               }
74 +                       }
75 +               }
76 +
77 +               if (*cur_elevator) {
78 +                       /* We have a packet */
79 +                       kfree_skb(skb);
80 +               }
81         } else {
82                 kfree_skb(skb);
83                 /* Jamal, now you will not able to escape explaining
84 @@ -4895,6 +4924,7 @@
85  EXPORT_SYMBOL(net_enable_timestamp);
86  EXPORT_SYMBOL(net_disable_timestamp);
87  EXPORT_SYMBOL(dev_get_flags);
88 +EXPORT_PER_CPU_SYMBOL(sknid_elevator);
89  
90  #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
91  EXPORT_SYMBOL(br_handle_frame_hook);
92 diff -Nurb linux-2.6.27-524/net/core/skbuff.c.orig linux-2.6.27-525/net/core/skbuff.c.orig
93 --- linux-2.6.27-524/net/core/skbuff.c.orig     2009-12-04 16:03:47.000000000 -0500
94 +++ linux-2.6.27-525/net/core/skbuff.c.orig     1969-12-31 19:00:00.000000000 -0500
95 @@ -1,2594 +0,0 @@
96 -/*
97 - *     Routines having to do with the 'struct sk_buff' memory handlers.
98 - *
99 - *     Authors:        Alan Cox <iiitac@pyr.swan.ac.uk>
100 - *                     Florian La Roche <rzsfl@rz.uni-sb.de>
101 - *
102 - *     Fixes:
103 - *             Alan Cox        :       Fixed the worst of the load
104 - *                                     balancer bugs.
105 - *             Dave Platt      :       Interrupt stacking fix.
106 - *     Richard Kooijman        :       Timestamp fixes.
107 - *             Alan Cox        :       Changed buffer format.
108 - *             Alan Cox        :       destructor hook for AF_UNIX etc.
109 - *             Linus Torvalds  :       Better skb_clone.
110 - *             Alan Cox        :       Added skb_copy.
111 - *             Alan Cox        :       Added all the changed routines Linus
112 - *                                     only put in the headers
113 - *             Ray VanTassle   :       Fixed --skb->lock in free
114 - *             Alan Cox        :       skb_copy copy arp field
115 - *             Andi Kleen      :       slabified it.
116 - *             Robert Olsson   :       Removed skb_head_pool
117 - *
118 - *     NOTE:
119 - *             The __skb_ routines should be called with interrupts
120 - *     disabled, or you better be *real* sure that the operation is atomic
121 - *     with respect to whatever list is being frobbed (e.g. via lock_sock()
122 - *     or via disabling bottom half handlers, etc).
123 - *
124 - *     This program is free software; you can redistribute it and/or
125 - *     modify it under the terms of the GNU General Public License
126 - *     as published by the Free Software Foundation; either version
127 - *     2 of the License, or (at your option) any later version.
128 - */
129 -
130 -/*
131 - *     The functions in this file will not compile correctly with gcc 2.4.x
132 - */
133 -
134 -#include <linux/module.h>
135 -#include <linux/types.h>
136 -#include <linux/kernel.h>
137 -#include <linux/mm.h>
138 -#include <linux/interrupt.h>
139 -#include <linux/in.h>
140 -#include <linux/inet.h>
141 -#include <linux/slab.h>
142 -#include <linux/netdevice.h>
143 -#ifdef CONFIG_NET_CLS_ACT
144 -#include <net/pkt_sched.h>
145 -#endif
146 -#include <linux/string.h>
147 -#include <linux/skbuff.h>
148 -#include <linux/splice.h>
149 -#include <linux/cache.h>
150 -#include <linux/rtnetlink.h>
151 -#include <linux/init.h>
152 -#include <linux/scatterlist.h>
153 -
154 -#include <net/protocol.h>
155 -#include <net/dst.h>
156 -#include <net/sock.h>
157 -#include <net/checksum.h>
158 -#include <net/xfrm.h>
159 -
160 -#include <asm/uaccess.h>
161 -#include <asm/system.h>
162 -
163 -#include "kmap_skb.h"
164 -
165 -static struct kmem_cache *skbuff_head_cache __read_mostly;
166 -static struct kmem_cache *skbuff_fclone_cache __read_mostly;
167 -
168 -static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
169 -                                 struct pipe_buffer *buf)
170 -{
171 -       put_page(buf->page);
172 -}
173 -
174 -static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
175 -                               struct pipe_buffer *buf)
176 -{
177 -       get_page(buf->page);
178 -}
179 -
180 -static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
181 -                              struct pipe_buffer *buf)
182 -{
183 -       return 1;
184 -}
185 -
186 -
187 -/* Pipe buffer operations for a socket. */
188 -static struct pipe_buf_operations sock_pipe_buf_ops = {
189 -       .can_merge = 0,
190 -       .map = generic_pipe_buf_map,
191 -       .unmap = generic_pipe_buf_unmap,
192 -       .confirm = generic_pipe_buf_confirm,
193 -       .release = sock_pipe_buf_release,
194 -       .steal = sock_pipe_buf_steal,
195 -       .get = sock_pipe_buf_get,
196 -};
197 -
198 -/*
199 - *     Keep out-of-line to prevent kernel bloat.
200 - *     __builtin_return_address is not used because it is not always
201 - *     reliable.
202 - */
203 -
204 -/**
205 - *     skb_over_panic  -       private function
206 - *     @skb: buffer
207 - *     @sz: size
208 - *     @here: address
209 - *
210 - *     Out of line support code for skb_put(). Not user callable.
211 - */
212 -void skb_over_panic(struct sk_buff *skb, int sz, void *here)
213 -{
214 -       printk(KERN_EMERG "skb_over_panic: text:%p len:%d put:%d head:%p "
215 -                         "data:%p tail:%#lx end:%#lx dev:%s\n",
216 -              here, skb->len, sz, skb->head, skb->data,
217 -              (unsigned long)skb->tail, (unsigned long)skb->end,
218 -              skb->dev ? skb->dev->name : "<NULL>");
219 -       BUG();
220 -}
221 -
222 -/**
223 - *     skb_under_panic -       private function
224 - *     @skb: buffer
225 - *     @sz: size
226 - *     @here: address
227 - *
228 - *     Out of line support code for skb_push(). Not user callable.
229 - */
230 -
231 -void skb_under_panic(struct sk_buff *skb, int sz, void *here)
232 -{
233 -       printk(KERN_EMERG "skb_under_panic: text:%p len:%d put:%d head:%p "
234 -                         "data:%p tail:%#lx end:%#lx dev:%s\n",
235 -              here, skb->len, sz, skb->head, skb->data,
236 -              (unsigned long)skb->tail, (unsigned long)skb->end,
237 -              skb->dev ? skb->dev->name : "<NULL>");
238 -       BUG();
239 -}
240 -
241 -/*     Allocate a new skbuff. We do this ourselves so we can fill in a few
242 - *     'private' fields and also do memory statistics to find all the
243 - *     [BEEP] leaks.
244 - *
245 - */
246 -
247 -/**
248 - *     __alloc_skb     -       allocate a network buffer
249 - *     @size: size to allocate
250 - *     @gfp_mask: allocation mask
251 - *     @fclone: allocate from fclone cache instead of head cache
252 - *             and allocate a cloned (child) skb
253 - *     @node: numa node to allocate memory on
254 - *
255 - *     Allocate a new &sk_buff. The returned buffer has no headroom and a
256 - *     tail room of size bytes. The object has a reference count of one.
257 - *     The return is the buffer. On a failure the return is %NULL.
258 - *
259 - *     Buffers may only be allocated from interrupts using a @gfp_mask of
260 - *     %GFP_ATOMIC.
261 - */
262 -struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
263 -                           int fclone, int node)
264 -{
265 -       struct kmem_cache *cache;
266 -       struct skb_shared_info *shinfo;
267 -       struct sk_buff *skb;
268 -       u8 *data;
269 -
270 -       cache = fclone ? skbuff_fclone_cache : skbuff_head_cache;
271 -
272 -       /* Get the HEAD */
273 -       skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node);
274 -       if (!skb)
275 -               goto out;
276 -
277 -       size = SKB_DATA_ALIGN(size);
278 -       data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info),
279 -                       gfp_mask, node);
280 -       if (!data)
281 -               goto nodata;
282 -
283 -       /*
284 -        * Only clear those fields we need to clear, not those that we will
285 -        * actually initialise below. Hence, don't put any more fields after
286 -        * the tail pointer in struct sk_buff!
287 -        */
288 -       memset(skb, 0, offsetof(struct sk_buff, tail));
289 -       skb->truesize = size + sizeof(struct sk_buff);
290 -       atomic_set(&skb->users, 1);
291 -       skb->head = data;
292 -       skb->data = data;
293 -       skb_reset_tail_pointer(skb);
294 -       skb->end = skb->tail + size;
295 -       /* make sure we initialize shinfo sequentially */
296 -       shinfo = skb_shinfo(skb);
297 -       atomic_set(&shinfo->dataref, 1);
298 -       shinfo->nr_frags  = 0;
299 -       shinfo->gso_size = 0;
300 -       shinfo->gso_segs = 0;
301 -       shinfo->gso_type = 0;
302 -       shinfo->ip6_frag_id = 0;
303 -       shinfo->frag_list = NULL;
304 -
305 -       if (fclone) {
306 -               struct sk_buff *child = skb + 1;
307 -               atomic_t *fclone_ref = (atomic_t *) (child + 1);
308 -
309 -               skb->fclone = SKB_FCLONE_ORIG;
310 -               atomic_set(fclone_ref, 1);
311 -
312 -               child->fclone = SKB_FCLONE_UNAVAILABLE;
313 -       }
314 -out:
315 -       return skb;
316 -nodata:
317 -       kmem_cache_free(cache, skb);
318 -       skb = NULL;
319 -       goto out;
320 -}
321 -
322 -/**
323 - *     __netdev_alloc_skb - allocate an skbuff for rx on a specific device
324 - *     @dev: network device to receive on
325 - *     @length: length to allocate
326 - *     @gfp_mask: get_free_pages mask, passed to alloc_skb
327 - *
328 - *     Allocate a new &sk_buff and assign it a usage count of one. The
329 - *     buffer has unspecified headroom built in. Users should allocate
330 - *     the headroom they think they need without accounting for the
331 - *     built in space. The built in space is used for optimisations.
332 - *
333 - *     %NULL is returned if there is no free memory.
334 - */
335 -struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
336 -               unsigned int length, gfp_t gfp_mask)
337 -{
338 -       int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1;
339 -       struct sk_buff *skb;
340 -
341 -       skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, node);
342 -       if (likely(skb)) {
343 -               skb_reserve(skb, NET_SKB_PAD);
344 -               skb->dev = dev;
345 -       }
346 -       return skb;
347 -}
348 -
349 -/**
350 - *     dev_alloc_skb - allocate an skbuff for receiving
351 - *     @length: length to allocate
352 - *
353 - *     Allocate a new &sk_buff and assign it a usage count of one. The
354 - *     buffer has unspecified headroom built in. Users should allocate
355 - *     the headroom they think they need without accounting for the
356 - *     built in space. The built in space is used for optimisations.
357 - *
358 - *     %NULL is returned if there is no free memory. Although this function
359 - *     allocates memory it can be called from an interrupt.
360 - */
361 -struct sk_buff *dev_alloc_skb(unsigned int length)
362 -{
363 -       /*
364 -        * There is more code here than it seems:
365 -        * __dev_alloc_skb is an inline
366 -        */
367 -       return __dev_alloc_skb(length, GFP_ATOMIC);
368 -}
369 -EXPORT_SYMBOL(dev_alloc_skb);
370 -
371 -static void skb_drop_list(struct sk_buff **listp)
372 -{
373 -       struct sk_buff *list = *listp;
374 -
375 -       *listp = NULL;
376 -
377 -       do {
378 -               struct sk_buff *this = list;
379 -               list = list->next;
380 -               kfree_skb(this);
381 -       } while (list);
382 -}
383 -
384 -static inline void skb_drop_fraglist(struct sk_buff *skb)
385 -{
386 -       skb_drop_list(&skb_shinfo(skb)->frag_list);
387 -}
388 -
389 -static void skb_clone_fraglist(struct sk_buff *skb)
390 -{
391 -       struct sk_buff *list;
392 -
393 -       for (list = skb_shinfo(skb)->frag_list; list; list = list->next)
394 -               skb_get(list);
395 -}
396 -
397 -static void skb_release_data(struct sk_buff *skb)
398 -{
399 -       if (!skb->cloned ||
400 -           !atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1,
401 -                              &skb_shinfo(skb)->dataref)) {
402 -               if (skb_shinfo(skb)->nr_frags) {
403 -                       int i;
404 -                       for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
405 -                               put_page(skb_shinfo(skb)->frags[i].page);
406 -               }
407 -
408 -               if (skb_shinfo(skb)->frag_list)
409 -                       skb_drop_fraglist(skb);
410 -
411 -               kfree(skb->head);
412 -       }
413 -}
414 -
415 -/*
416 - *     Free an skbuff by memory without cleaning the state.
417 - */
418 -static void kfree_skbmem(struct sk_buff *skb)
419 -{
420 -       struct sk_buff *other;
421 -       atomic_t *fclone_ref;
422 -
423 -       switch (skb->fclone) {
424 -       case SKB_FCLONE_UNAVAILABLE:
425 -               kmem_cache_free(skbuff_head_cache, skb);
426 -               break;
427 -
428 -       case SKB_FCLONE_ORIG:
429 -               fclone_ref = (atomic_t *) (skb + 2);
430 -               if (atomic_dec_and_test(fclone_ref))
431 -                       kmem_cache_free(skbuff_fclone_cache, skb);
432 -               break;
433 -
434 -       case SKB_FCLONE_CLONE:
435 -               fclone_ref = (atomic_t *) (skb + 1);
436 -               other = skb - 1;
437 -
438 -               /* The clone portion is available for
439 -                * fast-cloning again.
440 -                */
441 -               skb->fclone = SKB_FCLONE_UNAVAILABLE;
442 -
443 -               if (atomic_dec_and_test(fclone_ref))
444 -                       kmem_cache_free(skbuff_fclone_cache, other);
445 -               break;
446 -       }
447 -}
448 -
449 -/* Free everything but the sk_buff shell. */
450 -static void skb_release_all(struct sk_buff *skb)
451 -{
452 -       dst_release(skb->dst);
453 -#ifdef CONFIG_XFRM
454 -       secpath_put(skb->sp);
455 -#endif
456 -       if (skb->destructor) {
457 -               WARN_ON(in_irq());
458 -               skb->destructor(skb);
459 -       }
460 -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
461 -       nf_conntrack_put(skb->nfct);
462 -       nf_conntrack_put_reasm(skb->nfct_reasm);
463 -#endif
464 -#ifdef CONFIG_BRIDGE_NETFILTER
465 -       nf_bridge_put(skb->nf_bridge);
466 -#endif
467 -/* XXX: IS this still necessary? - JHS */
468 -#ifdef CONFIG_NET_SCHED
469 -       skb->tc_index = 0;
470 -#ifdef CONFIG_NET_CLS_ACT
471 -       skb->tc_verd = 0;
472 -#endif
473 -#endif
474 -       skb_release_data(skb);
475 -}
476 -
477 -/**
478 - *     __kfree_skb - private function
479 - *     @skb: buffer
480 - *
481 - *     Free an sk_buff. Release anything attached to the buffer.
482 - *     Clean the state. This is an internal helper function. Users should
483 - *     always call kfree_skb
484 - */
485 -
486 -void __kfree_skb(struct sk_buff *skb)
487 -{
488 -       skb_release_all(skb);
489 -       kfree_skbmem(skb);
490 -}
491 -
492 -/**
493 - *     kfree_skb - free an sk_buff
494 - *     @skb: buffer to free
495 - *
496 - *     Drop a reference to the buffer and free it if the usage count has
497 - *     hit zero.
498 - */
499 -void kfree_skb(struct sk_buff *skb)
500 -{
501 -       if (unlikely(!skb))
502 -               return;
503 -       if (likely(atomic_read(&skb->users) == 1))
504 -               smp_rmb();
505 -       else if (likely(!atomic_dec_and_test(&skb->users)))
506 -               return;
507 -       __kfree_skb(skb);
508 -}
509 -
510 -static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
511 -{
512 -       new->tstamp             = old->tstamp;
513 -       new->dev                = old->dev;
514 -       new->transport_header   = old->transport_header;
515 -       new->network_header     = old->network_header;
516 -       new->mac_header         = old->mac_header;
517 -       new->dst                = dst_clone(old->dst);
518 -#ifdef CONFIG_INET
519 -       new->sp                 = secpath_get(old->sp);
520 -#endif
521 -       memcpy(new->cb, old->cb, sizeof(old->cb));
522 -       new->csum_start         = old->csum_start;
523 -       new->csum_offset        = old->csum_offset;
524 -       new->local_df           = old->local_df;
525 -       new->pkt_type           = old->pkt_type;
526 -       new->ip_summed          = old->ip_summed;
527 -       skb_copy_queue_mapping(new, old);
528 -       new->priority           = old->priority;
529 -#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
530 -       new->ipvs_property      = old->ipvs_property;
531 -#endif
532 -       new->protocol           = old->protocol;
533 -       new->mark               = old->mark;
534 -       __nf_copy(new, old);
535 -#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
536 -    defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
537 -       new->nf_trace           = old->nf_trace;
538 -#endif
539 -#ifdef CONFIG_NET_SCHED
540 -       new->tc_index           = old->tc_index;
541 -#ifdef CONFIG_NET_CLS_ACT
542 -       new->tc_verd            = old->tc_verd;
543 -#endif
544 -#endif
545 -       new->vlan_tci           = old->vlan_tci;
546 -
547 -       skb_copy_secmark(new, old);
548 -}
549 -
550 -static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
551 -{
552 -#define C(x) n->x = skb->x
553 -
554 -       n->next = n->prev = NULL;
555 -       n->sk = NULL;
556 -       __copy_skb_header(n, skb);
557 -
558 -       C(len);
559 -       C(data_len);
560 -       C(mac_len);
561 -       n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len;
562 -       n->cloned = 1;
563 -       n->nohdr = 0;
564 -       n->destructor = NULL;
565 -       C(iif);
566 -       C(tail);
567 -       C(end);
568 -       C(head);
569 -       C(data);
570 -       C(truesize);
571 -#if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE)
572 -       C(do_not_encrypt);
573 -#endif
574 -       atomic_set(&n->users, 1);
575 -
576 -       atomic_inc(&(skb_shinfo(skb)->dataref));
577 -       skb->cloned = 1;
578 -
579 -       return n;
580 -#undef C
581 -}
582 -
583 -/**
584 - *     skb_morph       -       morph one skb into another
585 - *     @dst: the skb to receive the contents
586 - *     @src: the skb to supply the contents
587 - *
588 - *     This is identical to skb_clone except that the target skb is
589 - *     supplied by the user.
590 - *
591 - *     The target skb is returned upon exit.
592 - */
593 -struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src)
594 -{
595 -       skb_release_all(dst);
596 -       return __skb_clone(dst, src);
597 -}
598 -EXPORT_SYMBOL_GPL(skb_morph);
599 -
600 -/**
601 - *     skb_clone       -       duplicate an sk_buff
602 - *     @skb: buffer to clone
603 - *     @gfp_mask: allocation priority
604 - *
605 - *     Duplicate an &sk_buff. The new one is not owned by a socket. Both
606 - *     copies share the same packet data but not structure. The new
607 - *     buffer has a reference count of 1. If the allocation fails the
608 - *     function returns %NULL otherwise the new buffer is returned.
609 - *
610 - *     If this function is called from an interrupt gfp_mask() must be
611 - *     %GFP_ATOMIC.
612 - */
613 -
614 -struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
615 -{
616 -       struct sk_buff *n;
617 -
618 -       n = skb + 1;
619 -       if (skb->fclone == SKB_FCLONE_ORIG &&
620 -           n->fclone == SKB_FCLONE_UNAVAILABLE) {
621 -               atomic_t *fclone_ref = (atomic_t *) (n + 1);
622 -               n->fclone = SKB_FCLONE_CLONE;
623 -               atomic_inc(fclone_ref);
624 -       } else {
625 -               n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
626 -               if (!n)
627 -                       return NULL;
628 -               n->fclone = SKB_FCLONE_UNAVAILABLE;
629 -       }
630 -
631 -       return __skb_clone(n, skb);
632 -}
633 -
634 -static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
635 -{
636 -#ifndef NET_SKBUFF_DATA_USES_OFFSET
637 -       /*
638 -        *      Shift between the two data areas in bytes
639 -        */
640 -       unsigned long offset = new->data - old->data;
641 -#endif
642 -
643 -       __copy_skb_header(new, old);
644 -
645 -#ifndef NET_SKBUFF_DATA_USES_OFFSET
646 -       /* {transport,network,mac}_header are relative to skb->head */
647 -       new->transport_header += offset;
648 -       new->network_header   += offset;
649 -       new->mac_header       += offset;
650 -#endif
651 -       skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size;
652 -       skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs;
653 -       skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type;
654 -}
655 -
656 -/**
657 - *     skb_copy        -       create private copy of an sk_buff
658 - *     @skb: buffer to copy
659 - *     @gfp_mask: allocation priority
660 - *
661 - *     Make a copy of both an &sk_buff and its data. This is used when the
662 - *     caller wishes to modify the data and needs a private copy of the
663 - *     data to alter. Returns %NULL on failure or the pointer to the buffer
664 - *     on success. The returned buffer has a reference count of 1.
665 - *
666 - *     As by-product this function converts non-linear &sk_buff to linear
667 - *     one, so that &sk_buff becomes completely private and caller is allowed
668 - *     to modify all the data of returned buffer. This means that this
669 - *     function is not recommended for use in circumstances when only
670 - *     header is going to be modified. Use pskb_copy() instead.
671 - */
672 -
673 -struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
674 -{
675 -       int headerlen = skb->data - skb->head;
676 -       /*
677 -        *      Allocate the copy buffer
678 -        */
679 -       struct sk_buff *n;
680 -#ifdef NET_SKBUFF_DATA_USES_OFFSET
681 -       n = alloc_skb(skb->end + skb->data_len, gfp_mask);
682 -#else
683 -       n = alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask);
684 -#endif
685 -       if (!n)
686 -               return NULL;
687 -
688 -       /* Set the data pointer */
689 -       skb_reserve(n, headerlen);
690 -       /* Set the tail pointer and length */
691 -       skb_put(n, skb->len);
692 -
693 -       if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len))
694 -               BUG();
695 -
696 -       copy_skb_header(n, skb);
697 -       return n;
698 -}
699 -
700 -
701 -/**
702 - *     pskb_copy       -       create copy of an sk_buff with private head.
703 - *     @skb: buffer to copy
704 - *     @gfp_mask: allocation priority
705 - *
706 - *     Make a copy of both an &sk_buff and part of its data, located
707 - *     in header. Fragmented data remain shared. This is used when
708 - *     the caller wishes to modify only header of &sk_buff and needs
709 - *     private copy of the header to alter. Returns %NULL on failure
710 - *     or the pointer to the buffer on success.
711 - *     The returned buffer has a reference count of 1.
712 - */
713 -
714 -struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
715 -{
716 -       /*
717 -        *      Allocate the copy buffer
718 -        */
719 -       struct sk_buff *n;
720 -#ifdef NET_SKBUFF_DATA_USES_OFFSET
721 -       n = alloc_skb(skb->end, gfp_mask);
722 -#else
723 -       n = alloc_skb(skb->end - skb->head, gfp_mask);
724 -#endif
725 -       if (!n)
726 -               goto out;
727 -
728 -       /* Set the data pointer */
729 -       skb_reserve(n, skb->data - skb->head);
730 -       /* Set the tail pointer and length */
731 -       skb_put(n, skb_headlen(skb));
732 -       /* Copy the bytes */
733 -       skb_copy_from_linear_data(skb, n->data, n->len);
734 -
735 -       n->truesize += skb->data_len;
736 -       n->data_len  = skb->data_len;
737 -       n->len       = skb->len;
738 -
739 -       if (skb_shinfo(skb)->nr_frags) {
740 -               int i;
741 -
742 -               for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
743 -                       skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
744 -                       get_page(skb_shinfo(n)->frags[i].page);
745 -               }
746 -               skb_shinfo(n)->nr_frags = i;
747 -       }
748 -
749 -       if (skb_shinfo(skb)->frag_list) {
750 -               skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
751 -               skb_clone_fraglist(n);
752 -       }
753 -
754 -       copy_skb_header(n, skb);
755 -out:
756 -       return n;
757 -}
758 -
759 -/**
760 - *     pskb_expand_head - reallocate header of &sk_buff
761 - *     @skb: buffer to reallocate
762 - *     @nhead: room to add at head
763 - *     @ntail: room to add at tail
764 - *     @gfp_mask: allocation priority
765 - *
766 - *     Expands (or creates identical copy, if &nhead and &ntail are zero)
767 - *     header of skb. &sk_buff itself is not changed. &sk_buff MUST have
768 - *     reference count of 1. Returns zero in the case of success or error,
769 - *     if expansion failed. In the last case, &sk_buff is not changed.
770 - *
771 - *     All the pointers pointing into skb header may change and must be
772 - *     reloaded after call to this function.
773 - */
774 -
775 -int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
776 -                    gfp_t gfp_mask)
777 -{
778 -       int i;
779 -       u8 *data;
780 -#ifdef NET_SKBUFF_DATA_USES_OFFSET
781 -       int size = nhead + skb->end + ntail;
782 -#else
783 -       int size = nhead + (skb->end - skb->head) + ntail;
784 -#endif
785 -       long off;
786 -
787 -       if (skb_shared(skb))
788 -               BUG();
789 -
790 -       size = SKB_DATA_ALIGN(size);
791 -
792 -       data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
793 -       if (!data)
794 -               goto nodata;
795 -
796 -       /* Copy only real data... and, alas, header. This should be
797 -        * optimized for the cases when header is void. */
798 -#ifdef NET_SKBUFF_DATA_USES_OFFSET
799 -       memcpy(data + nhead, skb->head, skb->tail);
800 -#else
801 -       memcpy(data + nhead, skb->head, skb->tail - skb->head);
802 -#endif
803 -       memcpy(data + size, skb_end_pointer(skb),
804 -              sizeof(struct skb_shared_info));
805 -
806 -       for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
807 -               get_page(skb_shinfo(skb)->frags[i].page);
808 -
809 -       if (skb_shinfo(skb)->frag_list)
810 -               skb_clone_fraglist(skb);
811 -
812 -       skb_release_data(skb);
813 -
814 -       off = (data + nhead) - skb->head;
815 -
816 -       skb->head     = data;
817 -       skb->data    += off;
818 -#ifdef NET_SKBUFF_DATA_USES_OFFSET
819 -       skb->end      = size;
820 -       off           = nhead;
821 -#else
822 -       skb->end      = skb->head + size;
823 -#endif
824 -       /* {transport,network,mac}_header and tail are relative to skb->head */
825 -       skb->tail             += off;
826 -       skb->transport_header += off;
827 -       skb->network_header   += off;
828 -       skb->mac_header       += off;
829 -       skb->csum_start       += nhead;
830 -       skb->cloned   = 0;
831 -       skb->hdr_len  = 0;
832 -       skb->nohdr    = 0;
833 -       atomic_set(&skb_shinfo(skb)->dataref, 1);
834 -       return 0;
835 -
836 -nodata:
837 -       return -ENOMEM;
838 -}
839 -
840 -/* Make private copy of skb with writable head and some headroom */
841 -
842 -struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
843 -{
844 -       struct sk_buff *skb2;
845 -       int delta = headroom - skb_headroom(skb);
846 -
847 -       if (delta <= 0)
848 -               skb2 = pskb_copy(skb, GFP_ATOMIC);
849 -       else {
850 -               skb2 = skb_clone(skb, GFP_ATOMIC);
851 -               if (skb2 && pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0,
852 -                                            GFP_ATOMIC)) {
853 -                       kfree_skb(skb2);
854 -                       skb2 = NULL;
855 -               }
856 -       }
857 -       return skb2;
858 -}
859 -
860 -
861 -/**
862 - *     skb_copy_expand -       copy and expand sk_buff
863 - *     @skb: buffer to copy
864 - *     @newheadroom: new free bytes at head
865 - *     @newtailroom: new free bytes at tail
866 - *     @gfp_mask: allocation priority
867 - *
868 - *     Make a copy of both an &sk_buff and its data and while doing so
869 - *     allocate additional space.
870 - *
871 - *     This is used when the caller wishes to modify the data and needs a
872 - *     private copy of the data to alter as well as more space for new fields.
873 - *     Returns %NULL on failure or the pointer to the buffer
874 - *     on success. The returned buffer has a reference count of 1.
875 - *
876 - *     You must pass %GFP_ATOMIC as the allocation priority if this function
877 - *     is called from an interrupt.
878 - */
879 -struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
880 -                               int newheadroom, int newtailroom,
881 -                               gfp_t gfp_mask)
882 -{
883 -       /*
884 -        *      Allocate the copy buffer
885 -        */
886 -       struct sk_buff *n = alloc_skb(newheadroom + skb->len + newtailroom,
887 -                                     gfp_mask);
888 -       int oldheadroom = skb_headroom(skb);
889 -       int head_copy_len, head_copy_off;
890 -       int off;
891 -
892 -       if (!n)
893 -               return NULL;
894 -
895 -       skb_reserve(n, newheadroom);
896 -
897 -       /* Set the tail pointer and length */
898 -       skb_put(n, skb->len);
899 -
900 -       head_copy_len = oldheadroom;
901 -       head_copy_off = 0;
902 -       if (newheadroom <= head_copy_len)
903 -               head_copy_len = newheadroom;
904 -       else
905 -               head_copy_off = newheadroom - head_copy_len;
906 -
907 -       /* Copy the linear header and data. */
908 -       if (skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off,
909 -                         skb->len + head_copy_len))
910 -               BUG();
911 -
912 -       copy_skb_header(n, skb);
913 -
914 -       off                  = newheadroom - oldheadroom;
915 -       n->csum_start       += off;
916 -#ifdef NET_SKBUFF_DATA_USES_OFFSET
917 -       n->transport_header += off;
918 -       n->network_header   += off;
919 -       n->mac_header       += off;
920 -#endif
921 -
922 -       return n;
923 -}
924 -
925 -/**
926 - *     skb_pad                 -       zero pad the tail of an skb
927 - *     @skb: buffer to pad
928 - *     @pad: space to pad
929 - *
930 - *     Ensure that a buffer is followed by a padding area that is zero
931 - *     filled. Used by network drivers which may DMA or transfer data
932 - *     beyond the buffer end onto the wire.
933 - *
934 - *     May return error in out of memory cases. The skb is freed on error.
935 - */
936 -
937 -int skb_pad(struct sk_buff *skb, int pad)
938 -{
939 -       int err;
940 -       int ntail;
941 -
942 -       /* If the skbuff is non linear tailroom is always zero.. */
943 -       if (!skb_cloned(skb) && skb_tailroom(skb) >= pad) {
944 -               memset(skb->data+skb->len, 0, pad);
945 -               return 0;
946 -       }
947 -
948 -       ntail = skb->data_len + pad - (skb->end - skb->tail);
949 -       if (likely(skb_cloned(skb) || ntail > 0)) {
950 -               err = pskb_expand_head(skb, 0, ntail, GFP_ATOMIC);
951 -               if (unlikely(err))
952 -                       goto free_skb;
953 -       }
954 -
955 -       /* FIXME: The use of this function with non-linear skb's really needs
956 -        * to be audited.
957 -        */
958 -       err = skb_linearize(skb);
959 -       if (unlikely(err))
960 -               goto free_skb;
961 -
962 -       memset(skb->data + skb->len, 0, pad);
963 -       return 0;
964 -
965 -free_skb:
966 -       kfree_skb(skb);
967 -       return err;
968 -}
969 -
970 -/**
971 - *     skb_put - add data to a buffer
972 - *     @skb: buffer to use
973 - *     @len: amount of data to add
974 - *
975 - *     This function extends the used data area of the buffer. If this would
976 - *     exceed the total buffer size the kernel will panic. A pointer to the
977 - *     first byte of the extra data is returned.
978 - */
979 -unsigned char *skb_put(struct sk_buff *skb, unsigned int len)
980 -{
981 -       unsigned char *tmp = skb_tail_pointer(skb);
982 -       SKB_LINEAR_ASSERT(skb);
983 -       skb->tail += len;
984 -       skb->len  += len;
985 -       if (unlikely(skb->tail > skb->end))
986 -               skb_over_panic(skb, len, __builtin_return_address(0));
987 -       return tmp;
988 -}
989 -EXPORT_SYMBOL(skb_put);
990 -
991 -/**
992 - *     skb_push - add data to the start of a buffer
993 - *     @skb: buffer to use
994 - *     @len: amount of data to add
995 - *
996 - *     This function extends the used data area of the buffer at the buffer
997 - *     start. If this would exceed the total buffer headroom the kernel will
998 - *     panic. A pointer to the first byte of the extra data is returned.
999 - */
1000 -unsigned char *skb_push(struct sk_buff *skb, unsigned int len)
1001 -{
1002 -       skb->data -= len;
1003 -       skb->len  += len;
1004 -       if (unlikely(skb->data<skb->head))
1005 -               skb_under_panic(skb, len, __builtin_return_address(0));
1006 -       return skb->data;
1007 -}
1008 -EXPORT_SYMBOL(skb_push);
1009 -
1010 -/**
1011 - *     skb_pull - remove data from the start of a buffer
1012 - *     @skb: buffer to use
1013 - *     @len: amount of data to remove
1014 - *
1015 - *     This function removes data from the start of a buffer, returning
1016 - *     the memory to the headroom. A pointer to the next data in the buffer
1017 - *     is returned. Once the data has been pulled future pushes will overwrite
1018 - *     the old data.
1019 - */
1020 -unsigned char *skb_pull(struct sk_buff *skb, unsigned int len)
1021 -{
1022 -       return unlikely(len > skb->len) ? NULL : __skb_pull(skb, len);
1023 -}
1024 -EXPORT_SYMBOL(skb_pull);
1025 -
1026 -/**
1027 - *     skb_trim - remove end from a buffer
1028 - *     @skb: buffer to alter
1029 - *     @len: new length
1030 - *
1031 - *     Cut the length of a buffer down by removing data from the tail. If
1032 - *     the buffer is already under the length specified it is not modified.
1033 - *     The skb must be linear.
1034 - */
1035 -void skb_trim(struct sk_buff *skb, unsigned int len)
1036 -{
1037 -       if (skb->len > len)
1038 -               __skb_trim(skb, len);
1039 -}
1040 -EXPORT_SYMBOL(skb_trim);
1041 -
1042 -/* Trims skb to length len. It can change skb pointers.
1043 - */
1044 -
1045 -int ___pskb_trim(struct sk_buff *skb, unsigned int len)
1046 -{
1047 -       struct sk_buff **fragp;
1048 -       struct sk_buff *frag;
1049 -       int offset = skb_headlen(skb);
1050 -       int nfrags = skb_shinfo(skb)->nr_frags;
1051 -       int i;
1052 -       int err;
1053 -
1054 -       if (skb_cloned(skb) &&
1055 -           unlikely((err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))))
1056 -               return err;
1057 -
1058 -       i = 0;
1059 -       if (offset >= len)
1060 -               goto drop_pages;
1061 -
1062 -       for (; i < nfrags; i++) {
1063 -               int end = offset + skb_shinfo(skb)->frags[i].size;
1064 -
1065 -               if (end < len) {
1066 -                       offset = end;
1067 -                       continue;
1068 -               }
1069 -
1070 -               skb_shinfo(skb)->frags[i++].size = len - offset;
1071 -
1072 -drop_pages:
1073 -               skb_shinfo(skb)->nr_frags = i;
1074 -
1075 -               for (; i < nfrags; i++)
1076 -                       put_page(skb_shinfo(skb)->frags[i].page);
1077 -
1078 -               if (skb_shinfo(skb)->frag_list)
1079 -                       skb_drop_fraglist(skb);
1080 -               goto done;
1081 -       }
1082 -
1083 -       for (fragp = &skb_shinfo(skb)->frag_list; (frag = *fragp);
1084 -            fragp = &frag->next) {
1085 -               int end = offset + frag->len;
1086 -
1087 -               if (skb_shared(frag)) {
1088 -                       struct sk_buff *nfrag;
1089 -
1090 -                       nfrag = skb_clone(frag, GFP_ATOMIC);
1091 -                       if (unlikely(!nfrag))
1092 -                               return -ENOMEM;
1093 -
1094 -                       nfrag->next = frag->next;
1095 -                       kfree_skb(frag);
1096 -                       frag = nfrag;
1097 -                       *fragp = frag;
1098 -               }
1099 -
1100 -               if (end < len) {
1101 -                       offset = end;
1102 -                       continue;
1103 -               }
1104 -
1105 -               if (end > len &&
1106 -                   unlikely((err = pskb_trim(frag, len - offset))))
1107 -                       return err;
1108 -
1109 -               if (frag->next)
1110 -                       skb_drop_list(&frag->next);
1111 -               break;
1112 -       }
1113 -
1114 -done:
1115 -       if (len > skb_headlen(skb)) {
1116 -               skb->data_len -= skb->len - len;
1117 -               skb->len       = len;
1118 -       } else {
1119 -               skb->len       = len;
1120 -               skb->data_len  = 0;
1121 -               skb_set_tail_pointer(skb, len);
1122 -       }
1123 -
1124 -       return 0;
1125 -}
1126 -
1127 -/**
1128 - *     __pskb_pull_tail - advance tail of skb header
1129 - *     @skb: buffer to reallocate
1130 - *     @delta: number of bytes to advance tail
1131 - *
1132 - *     The function makes a sense only on a fragmented &sk_buff,
1133 - *     it expands header moving its tail forward and copying necessary
1134 - *     data from fragmented part.
1135 - *
1136 - *     &sk_buff MUST have reference count of 1.
1137 - *
1138 - *     Returns %NULL (and &sk_buff does not change) if pull failed
1139 - *     or value of new tail of skb in the case of success.
1140 - *
1141 - *     All the pointers pointing into skb header may change and must be
1142 - *     reloaded after call to this function.
1143 - */
1144 -
1145 -/* Moves tail of skb head forward, copying data from fragmented part,
1146 - * when it is necessary.
1147 - * 1. It may fail due to malloc failure.
1148 - * 2. It may change skb pointers.
1149 - *
1150 - * It is pretty complicated. Luckily, it is called only in exceptional cases.
1151 - */
1152 -unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta)
1153 -{
1154 -       /* If skb has not enough free space at tail, get new one
1155 -        * plus 128 bytes for future expansions. If we have enough
1156 -        * room at tail, reallocate without expansion only if skb is cloned.
1157 -        */
1158 -       int i, k, eat = (skb->tail + delta) - skb->end;
1159 -
1160 -       if (eat > 0 || skb_cloned(skb)) {
1161 -               if (pskb_expand_head(skb, 0, eat > 0 ? eat + 128 : 0,
1162 -                                    GFP_ATOMIC))
1163 -                       return NULL;
1164 -       }
1165 -
1166 -       if (skb_copy_bits(skb, skb_headlen(skb), skb_tail_pointer(skb), delta))
1167 -               BUG();
1168 -
1169 -       /* Optimization: no fragments, no reasons to preestimate
1170 -        * size of pulled pages. Superb.
1171 -        */
1172 -       if (!skb_shinfo(skb)->frag_list)
1173 -               goto pull_pages;
1174 -
1175 -       /* Estimate size of pulled pages. */
1176 -       eat = delta;
1177 -       for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1178 -               if (skb_shinfo(skb)->frags[i].size >= eat)
1179 -                       goto pull_pages;
1180 -               eat -= skb_shinfo(skb)->frags[i].size;
1181 -       }
1182 -
1183 -       /* If we need update frag list, we are in troubles.
1184 -        * Certainly, it possible to add an offset to skb data,
1185 -        * but taking into account that pulling is expected to
1186 -        * be very rare operation, it is worth to fight against
1187 -        * further bloating skb head and crucify ourselves here instead.
1188 -        * Pure masohism, indeed. 8)8)
1189 -        */
1190 -       if (eat) {
1191 -               struct sk_buff *list = skb_shinfo(skb)->frag_list;
1192 -               struct sk_buff *clone = NULL;
1193 -               struct sk_buff *insp = NULL;
1194 -
1195 -               do {
1196 -                       BUG_ON(!list);
1197 -
1198 -                       if (list->len <= eat) {
1199 -                               /* Eaten as whole. */
1200 -                               eat -= list->len;
1201 -                               list = list->next;
1202 -                               insp = list;
1203 -                       } else {
1204 -                               /* Eaten partially. */
1205 -
1206 -                               if (skb_shared(list)) {
1207 -                                       /* Sucks! We need to fork list. :-( */
1208 -                                       clone = skb_clone(list, GFP_ATOMIC);
1209 -                                       if (!clone)
1210 -                                               return NULL;
1211 -                                       insp = list->next;
1212 -                                       list = clone;
1213 -                               } else {
1214 -                                       /* This may be pulled without
1215 -                                        * problems. */
1216 -                                       insp = list;
1217 -                               }
1218 -                               if (!pskb_pull(list, eat)) {
1219 -                                       if (clone)
1220 -                                               kfree_skb(clone);
1221 -                                       return NULL;
1222 -                               }
1223 -                               break;
1224 -                       }
1225 -               } while (eat);
1226 -
1227 -               /* Free pulled out fragments. */
1228 -               while ((list = skb_shinfo(skb)->frag_list) != insp) {
1229 -                       skb_shinfo(skb)->frag_list = list->next;
1230 -                       kfree_skb(list);
1231 -               }
1232 -               /* And insert new clone at head. */
1233 -               if (clone) {
1234 -                       clone->next = list;
1235 -                       skb_shinfo(skb)->frag_list = clone;
1236 -               }
1237 -       }
1238 -       /* Success! Now we may commit changes to skb data. */
1239 -
1240 -pull_pages:
1241 -       eat = delta;
1242 -       k = 0;
1243 -       for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1244 -               if (skb_shinfo(skb)->frags[i].size <= eat) {
1245 -                       put_page(skb_shinfo(skb)->frags[i].page);
1246 -                       eat -= skb_shinfo(skb)->frags[i].size;
1247 -               } else {
1248 -                       skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
1249 -                       if (eat) {
1250 -                               skb_shinfo(skb)->frags[k].page_offset += eat;
1251 -                               skb_shinfo(skb)->frags[k].size -= eat;
1252 -                               eat = 0;
1253 -                       }
1254 -                       k++;
1255 -               }
1256 -       }
1257 -       skb_shinfo(skb)->nr_frags = k;
1258 -
1259 -       skb->tail     += delta;
1260 -       skb->data_len -= delta;
1261 -
1262 -       return skb_tail_pointer(skb);
1263 -}
1264 -
1265 -/* Copy some data bits from skb to kernel buffer. */
1266 -
1267 -int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
1268 -{
1269 -       int i, copy;
1270 -       int start = skb_headlen(skb);
1271 -
1272 -       if (offset > (int)skb->len - len)
1273 -               goto fault;
1274 -
1275 -       /* Copy header. */
1276 -       if ((copy = start - offset) > 0) {
1277 -               if (copy > len)
1278 -                       copy = len;
1279 -               skb_copy_from_linear_data_offset(skb, offset, to, copy);
1280 -               if ((len -= copy) == 0)
1281 -                       return 0;
1282 -               offset += copy;
1283 -               to     += copy;
1284 -       }
1285 -
1286 -       for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1287 -               int end;
1288 -
1289 -               WARN_ON(start > offset + len);
1290 -
1291 -               end = start + skb_shinfo(skb)->frags[i].size;
1292 -               if ((copy = end - offset) > 0) {
1293 -                       u8 *vaddr;
1294 -
1295 -                       if (copy > len)
1296 -                               copy = len;
1297 -
1298 -                       vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
1299 -                       memcpy(to,
1300 -                              vaddr + skb_shinfo(skb)->frags[i].page_offset+
1301 -                              offset - start, copy);
1302 -                       kunmap_skb_frag(vaddr);
1303 -
1304 -                       if ((len -= copy) == 0)
1305 -                               return 0;
1306 -                       offset += copy;
1307 -                       to     += copy;
1308 -               }
1309 -               start = end;
1310 -       }
1311 -
1312 -       if (skb_shinfo(skb)->frag_list) {
1313 -               struct sk_buff *list = skb_shinfo(skb)->frag_list;
1314 -
1315 -               for (; list; list = list->next) {
1316 -                       int end;
1317 -
1318 -                       WARN_ON(start > offset + len);
1319 -
1320 -                       end = start + list->len;
1321 -                       if ((copy = end - offset) > 0) {
1322 -                               if (copy > len)
1323 -                                       copy = len;
1324 -                               if (skb_copy_bits(list, offset - start,
1325 -                                                 to, copy))
1326 -                                       goto fault;
1327 -                               if ((len -= copy) == 0)
1328 -                                       return 0;
1329 -                               offset += copy;
1330 -                               to     += copy;
1331 -                       }
1332 -                       start = end;
1333 -               }
1334 -       }
1335 -       if (!len)
1336 -               return 0;
1337 -
1338 -fault:
1339 -       return -EFAULT;
1340 -}
1341 -
1342 -/*
1343 - * Callback from splice_to_pipe(), if we need to release some pages
1344 - * at the end of the spd in case we error'ed out in filling the pipe.
1345 - */
1346 -static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
1347 -{
1348 -       put_page(spd->pages[i]);
1349 -}
1350 -
1351 -static inline struct page *linear_to_page(struct page *page, unsigned int len,
1352 -                                         unsigned int offset)
1353 -{
1354 -       struct page *p = alloc_pages(GFP_KERNEL, 0);
1355 -
1356 -       if (!p)
1357 -               return NULL;
1358 -       memcpy(page_address(p) + offset, page_address(page) + offset, len);
1359 -
1360 -       return p;
1361 -}
1362 -
1363 -/*
1364 - * Fill page/offset/length into spd, if it can hold more pages.
1365 - */
1366 -static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page,
1367 -                               unsigned int len, unsigned int offset,
1368 -                               struct sk_buff *skb, int linear)
1369 -{
1370 -       if (unlikely(spd->nr_pages == PIPE_BUFFERS))
1371 -               return 1;
1372 -
1373 -       if (linear) {
1374 -               page = linear_to_page(page, len, offset);
1375 -               if (!page)
1376 -                       return 1;
1377 -       } else
1378 -               get_page(page);
1379 -
1380 -       spd->pages[spd->nr_pages] = page;
1381 -       spd->partial[spd->nr_pages].len = len;
1382 -       spd->partial[spd->nr_pages].offset = offset;
1383 -       spd->nr_pages++;
1384 -
1385 -       return 0;
1386 -}
1387 -
1388 -static inline void __segment_seek(struct page **page, unsigned int *poff,
1389 -                                 unsigned int *plen, unsigned int off)
1390 -{
1391 -       *poff += off;
1392 -       *page += *poff / PAGE_SIZE;
1393 -       *poff = *poff % PAGE_SIZE;
1394 -       *plen -= off;
1395 -}
1396 -
1397 -static inline int __splice_segment(struct page *page, unsigned int poff,
1398 -                                  unsigned int plen, unsigned int *off,
1399 -                                  unsigned int *len, struct sk_buff *skb,
1400 -                                  struct splice_pipe_desc *spd, int linear)
1401 -{
1402 -       if (!*len)
1403 -               return 1;
1404 -
1405 -       /* skip this segment if already processed */
1406 -       if (*off >= plen) {
1407 -               *off -= plen;
1408 -               return 0;
1409 -       }
1410 -
1411 -       /* ignore any bits we already processed */
1412 -       if (*off) {
1413 -               __segment_seek(&page, &poff, &plen, *off);
1414 -               *off = 0;
1415 -       }
1416 -
1417 -       do {
1418 -               unsigned int flen = min(*len, plen);
1419 -
1420 -               /* the linear region may spread across several pages  */
1421 -               flen = min_t(unsigned int, flen, PAGE_SIZE - poff);
1422 -
1423 -               if (spd_fill_page(spd, page, flen, poff, skb, linear))
1424 -                       return 1;
1425 -
1426 -               __segment_seek(&page, &poff, &plen, flen);
1427 -               *len -= flen;
1428 -
1429 -       } while (*len && plen);
1430 -
1431 -       return 0;
1432 -}
1433 -
1434 -/*
1435 - * Map linear and fragment data from the skb to spd. It reports failure if the
1436 - * pipe is full or if we already spliced the requested length.
1437 - */
1438 -static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
1439 -                     unsigned int *len,
1440 -                     struct splice_pipe_desc *spd)
1441 -{
1442 -       int seg;
1443 -
1444 -       /*
1445 -        * map the linear part
1446 -        */
1447 -       if (__splice_segment(virt_to_page(skb->data),
1448 -                            (unsigned long) skb->data & (PAGE_SIZE - 1),
1449 -                            skb_headlen(skb),
1450 -                            offset, len, skb, spd, 1))
1451 -               return 1;
1452 -
1453 -       /*
1454 -        * then map the fragments
1455 -        */
1456 -       for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) {
1457 -               const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
1458 -
1459 -               if (__splice_segment(f->page, f->page_offset, f->size,
1460 -                                    offset, len, skb, spd, 0))
1461 -                       return 1;
1462 -       }
1463 -
1464 -       return 0;
1465 -}
1466 -
1467 -/*
1468 - * Map data from the skb to a pipe. Should handle both the linear part,
1469 - * the fragments, and the frag list. It does NOT handle frag lists within
1470 - * the frag list, if such a thing exists. We'd probably need to recurse to
1471 - * handle that cleanly.
1472 - */
1473 -int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
1474 -                   struct pipe_inode_info *pipe, unsigned int tlen,
1475 -                   unsigned int flags)
1476 -{
1477 -       struct partial_page partial[PIPE_BUFFERS];
1478 -       struct page *pages[PIPE_BUFFERS];
1479 -       struct splice_pipe_desc spd = {
1480 -               .pages = pages,
1481 -               .partial = partial,
1482 -               .flags = flags,
1483 -               .ops = &sock_pipe_buf_ops,
1484 -               .spd_release = sock_spd_release,
1485 -       };
1486 -
1487 -       /*
1488 -        * __skb_splice_bits() only fails if the output has no room left,
1489 -        * so no point in going over the frag_list for the error case.
1490 -        */
1491 -       if (__skb_splice_bits(skb, &offset, &tlen, &spd))
1492 -               goto done;
1493 -       else if (!tlen)
1494 -               goto done;
1495 -
1496 -       /*
1497 -        * now see if we have a frag_list to map
1498 -        */
1499 -       if (skb_shinfo(skb)->frag_list) {
1500 -               struct sk_buff *list = skb_shinfo(skb)->frag_list;
1501 -
1502 -               for (; list && tlen; list = list->next) {
1503 -                       if (__skb_splice_bits(list, &offset, &tlen, &spd))
1504 -                               break;
1505 -               }
1506 -       }
1507 -
1508 -done:
1509 -       if (spd.nr_pages) {
1510 -               struct sock *sk = skb->sk;
1511 -               int ret;
1512 -
1513 -               /*
1514 -                * Drop the socket lock, otherwise we have reverse
1515 -                * locking dependencies between sk_lock and i_mutex
1516 -                * here as compared to sendfile(). We enter here
1517 -                * with the socket lock held, and splice_to_pipe() will
1518 -                * grab the pipe inode lock. For sendfile() emulation,
1519 -                * we call into ->sendpage() with the i_mutex lock held
1520 -                * and networking will grab the socket lock.
1521 -                */
1522 -               release_sock(sk);
1523 -               ret = splice_to_pipe(pipe, &spd);
1524 -               lock_sock(sk);
1525 -               return ret;
1526 -       }
1527 -
1528 -       return 0;
1529 -}
1530 -
1531 -/**
1532 - *     skb_store_bits - store bits from kernel buffer to skb
1533 - *     @skb: destination buffer
1534 - *     @offset: offset in destination
1535 - *     @from: source buffer
1536 - *     @len: number of bytes to copy
1537 - *
1538 - *     Copy the specified number of bytes from the source buffer to the
1539 - *     destination skb.  This function handles all the messy bits of
1540 - *     traversing fragment lists and such.
1541 - */
1542 -
1543 -int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len)
1544 -{
1545 -       int i, copy;
1546 -       int start = skb_headlen(skb);
1547 -
1548 -       if (offset > (int)skb->len - len)
1549 -               goto fault;
1550 -
1551 -       if ((copy = start - offset) > 0) {
1552 -               if (copy > len)
1553 -                       copy = len;
1554 -               skb_copy_to_linear_data_offset(skb, offset, from, copy);
1555 -               if ((len -= copy) == 0)
1556 -                       return 0;
1557 -               offset += copy;
1558 -               from += copy;
1559 -       }
1560 -
1561 -       for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1562 -               skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1563 -               int end;
1564 -
1565 -               WARN_ON(start > offset + len);
1566 -
1567 -               end = start + frag->size;
1568 -               if ((copy = end - offset) > 0) {
1569 -                       u8 *vaddr;
1570 -
1571 -                       if (copy > len)
1572 -                               copy = len;
1573 -
1574 -                       vaddr = kmap_skb_frag(frag);
1575 -                       memcpy(vaddr + frag->page_offset + offset - start,
1576 -                              from, copy);
1577 -                       kunmap_skb_frag(vaddr);
1578 -
1579 -                       if ((len -= copy) == 0)
1580 -                               return 0;
1581 -                       offset += copy;
1582 -                       from += copy;
1583 -               }
1584 -               start = end;
1585 -       }
1586 -
1587 -       if (skb_shinfo(skb)->frag_list) {
1588 -               struct sk_buff *list = skb_shinfo(skb)->frag_list;
1589 -
1590 -               for (; list; list = list->next) {
1591 -                       int end;
1592 -
1593 -                       WARN_ON(start > offset + len);
1594 -
1595 -                       end = start + list->len;
1596 -                       if ((copy = end - offset) > 0) {
1597 -                               if (copy > len)
1598 -                                       copy = len;
1599 -                               if (skb_store_bits(list, offset - start,
1600 -                                                  from, copy))
1601 -                                       goto fault;
1602 -                               if ((len -= copy) == 0)
1603 -                                       return 0;
1604 -                               offset += copy;
1605 -                               from += copy;
1606 -                       }
1607 -                       start = end;
1608 -               }
1609 -       }
1610 -       if (!len)
1611 -               return 0;
1612 -
1613 -fault:
1614 -       return -EFAULT;
1615 -}
1616 -
1617 -EXPORT_SYMBOL(skb_store_bits);
1618 -
1619 -/* Checksum skb data. */
1620 -
1621 -__wsum skb_checksum(const struct sk_buff *skb, int offset,
1622 -                         int len, __wsum csum)
1623 -{
1624 -       int start = skb_headlen(skb);
1625 -       int i, copy = start - offset;
1626 -       int pos = 0;
1627 -
1628 -       /* Checksum header. */
1629 -       if (copy > 0) {
1630 -               if (copy > len)
1631 -                       copy = len;
1632 -               csum = csum_partial(skb->data + offset, copy, csum);
1633 -               if ((len -= copy) == 0)
1634 -                       return csum;
1635 -               offset += copy;
1636 -               pos     = copy;
1637 -       }
1638 -
1639 -       for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1640 -               int end;
1641 -
1642 -               WARN_ON(start > offset + len);
1643 -
1644 -               end = start + skb_shinfo(skb)->frags[i].size;
1645 -               if ((copy = end - offset) > 0) {
1646 -                       __wsum csum2;
1647 -                       u8 *vaddr;
1648 -                       skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1649 -
1650 -                       if (copy > len)
1651 -                               copy = len;
1652 -                       vaddr = kmap_skb_frag(frag);
1653 -                       csum2 = csum_partial(vaddr + frag->page_offset +
1654 -                                            offset - start, copy, 0);
1655 -                       kunmap_skb_frag(vaddr);
1656 -                       csum = csum_block_add(csum, csum2, pos);
1657 -                       if (!(len -= copy))
1658 -                               return csum;
1659 -                       offset += copy;
1660 -                       pos    += copy;
1661 -               }
1662 -               start = end;
1663 -       }
1664 -
1665 -       if (skb_shinfo(skb)->frag_list) {
1666 -               struct sk_buff *list = skb_shinfo(skb)->frag_list;
1667 -
1668 -               for (; list; list = list->next) {
1669 -                       int end;
1670 -
1671 -                       WARN_ON(start > offset + len);
1672 -
1673 -                       end = start + list->len;
1674 -                       if ((copy = end - offset) > 0) {
1675 -                               __wsum csum2;
1676 -                               if (copy > len)
1677 -                                       copy = len;
1678 -                               csum2 = skb_checksum(list, offset - start,
1679 -                                                    copy, 0);
1680 -                               csum = csum_block_add(csum, csum2, pos);
1681 -                               if ((len -= copy) == 0)
1682 -                                       return csum;
1683 -                               offset += copy;
1684 -                               pos    += copy;
1685 -                       }
1686 -                       start = end;
1687 -               }
1688 -       }
1689 -       BUG_ON(len);
1690 -
1691 -       return csum;
1692 -}
1693 -
1694 -/* Both of above in one bottle. */
1695 -
1696 -__wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
1697 -                                   u8 *to, int len, __wsum csum)
1698 -{
1699 -       int start = skb_headlen(skb);
1700 -       int i, copy = start - offset;
1701 -       int pos = 0;
1702 -
1703 -       /* Copy header. */
1704 -       if (copy > 0) {
1705 -               if (copy > len)
1706 -                       copy = len;
1707 -               csum = csum_partial_copy_nocheck(skb->data + offset, to,
1708 -                                                copy, csum);
1709 -               if ((len -= copy) == 0)
1710 -                       return csum;
1711 -               offset += copy;
1712 -               to     += copy;
1713 -               pos     = copy;
1714 -       }
1715 -
1716 -       for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1717 -               int end;
1718 -
1719 -               WARN_ON(start > offset + len);
1720 -
1721 -               end = start + skb_shinfo(skb)->frags[i].size;
1722 -               if ((copy = end - offset) > 0) {
1723 -                       __wsum csum2;
1724 -                       u8 *vaddr;
1725 -                       skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1726 -
1727 -                       if (copy > len)
1728 -                               copy = len;
1729 -                       vaddr = kmap_skb_frag(frag);
1730 -                       csum2 = csum_partial_copy_nocheck(vaddr +
1731 -                                                         frag->page_offset +
1732 -                                                         offset - start, to,
1733 -                                                         copy, 0);
1734 -                       kunmap_skb_frag(vaddr);
1735 -                       csum = csum_block_add(csum, csum2, pos);
1736 -                       if (!(len -= copy))
1737 -                               return csum;
1738 -                       offset += copy;
1739 -                       to     += copy;
1740 -                       pos    += copy;
1741 -               }
1742 -               start = end;
1743 -       }
1744 -
1745 -       if (skb_shinfo(skb)->frag_list) {
1746 -               struct sk_buff *list = skb_shinfo(skb)->frag_list;
1747 -
1748 -               for (; list; list = list->next) {
1749 -                       __wsum csum2;
1750 -                       int end;
1751 -
1752 -                       WARN_ON(start > offset + len);
1753 -
1754 -                       end = start + list->len;
1755 -                       if ((copy = end - offset) > 0) {
1756 -                               if (copy > len)
1757 -                                       copy = len;
1758 -                               csum2 = skb_copy_and_csum_bits(list,
1759 -                                                              offset - start,
1760 -                                                              to, copy, 0);
1761 -                               csum = csum_block_add(csum, csum2, pos);
1762 -                               if ((len -= copy) == 0)
1763 -                                       return csum;
1764 -                               offset += copy;
1765 -                               to     += copy;
1766 -                               pos    += copy;
1767 -                       }
1768 -                       start = end;
1769 -               }
1770 -       }
1771 -       BUG_ON(len);
1772 -       return csum;
1773 -}
1774 -
1775 -void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
1776 -{
1777 -       __wsum csum;
1778 -       long csstart;
1779 -
1780 -       if (skb->ip_summed == CHECKSUM_PARTIAL)
1781 -               csstart = skb->csum_start - skb_headroom(skb);
1782 -       else
1783 -               csstart = skb_headlen(skb);
1784 -
1785 -       BUG_ON(csstart > skb_headlen(skb));
1786 -
1787 -       skb_copy_from_linear_data(skb, to, csstart);
1788 -
1789 -       csum = 0;
1790 -       if (csstart != skb->len)
1791 -               csum = skb_copy_and_csum_bits(skb, csstart, to + csstart,
1792 -                                             skb->len - csstart, 0);
1793 -
1794 -       if (skb->ip_summed == CHECKSUM_PARTIAL) {
1795 -               long csstuff = csstart + skb->csum_offset;
1796 -
1797 -               *((__sum16 *)(to + csstuff)) = csum_fold(csum);
1798 -       }
1799 -}
1800 -
1801 -/**
1802 - *     skb_dequeue - remove from the head of the queue
1803 - *     @list: list to dequeue from
1804 - *
1805 - *     Remove the head of the list. The list lock is taken so the function
1806 - *     may be used safely with other locking list functions. The head item is
1807 - *     returned or %NULL if the list is empty.
1808 - */
1809 -
1810 -struct sk_buff *skb_dequeue(struct sk_buff_head *list)
1811 -{
1812 -       unsigned long flags;
1813 -       struct sk_buff *result;
1814 -
1815 -       spin_lock_irqsave(&list->lock, flags);
1816 -       result = __skb_dequeue(list);
1817 -       spin_unlock_irqrestore(&list->lock, flags);
1818 -       return result;
1819 -}
1820 -
1821 -/**
1822 - *     skb_dequeue_tail - remove from the tail of the queue
1823 - *     @list: list to dequeue from
1824 - *
1825 - *     Remove the tail of the list. The list lock is taken so the function
1826 - *     may be used safely with other locking list functions. The tail item is
1827 - *     returned or %NULL if the list is empty.
1828 - */
1829 -struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list)
1830 -{
1831 -       unsigned long flags;
1832 -       struct sk_buff *result;
1833 -
1834 -       spin_lock_irqsave(&list->lock, flags);
1835 -       result = __skb_dequeue_tail(list);
1836 -       spin_unlock_irqrestore(&list->lock, flags);
1837 -       return result;
1838 -}
1839 -
1840 -/**
1841 - *     skb_queue_purge - empty a list
1842 - *     @list: list to empty
1843 - *
1844 - *     Delete all buffers on an &sk_buff list. Each buffer is removed from
1845 - *     the list and one reference dropped. This function takes the list
1846 - *     lock and is atomic with respect to other list locking functions.
1847 - */
1848 -void skb_queue_purge(struct sk_buff_head *list)
1849 -{
1850 -       struct sk_buff *skb;
1851 -       while ((skb = skb_dequeue(list)) != NULL)
1852 -               kfree_skb(skb);
1853 -}
1854 -
1855 -/**
1856 - *     skb_queue_head - queue a buffer at the list head
1857 - *     @list: list to use
1858 - *     @newsk: buffer to queue
1859 - *
1860 - *     Queue a buffer at the start of the list. This function takes the
1861 - *     list lock and can be used safely with other locking &sk_buff functions
1862 - *     safely.
1863 - *
1864 - *     A buffer cannot be placed on two lists at the same time.
1865 - */
1866 -void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk)
1867 -{
1868 -       unsigned long flags;
1869 -
1870 -       spin_lock_irqsave(&list->lock, flags);
1871 -       __skb_queue_head(list, newsk);
1872 -       spin_unlock_irqrestore(&list->lock, flags);
1873 -}
1874 -
1875 -/**
1876 - *     skb_queue_tail - queue a buffer at the list tail
1877 - *     @list: list to use
1878 - *     @newsk: buffer to queue
1879 - *
1880 - *     Queue a buffer at the tail of the list. This function takes the
1881 - *     list lock and can be used safely with other locking &sk_buff functions
1882 - *     safely.
1883 - *
1884 - *     A buffer cannot be placed on two lists at the same time.
1885 - */
1886 -void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk)
1887 -{
1888 -       unsigned long flags;
1889 -
1890 -       spin_lock_irqsave(&list->lock, flags);
1891 -       __skb_queue_tail(list, newsk);
1892 -       spin_unlock_irqrestore(&list->lock, flags);
1893 -}
1894 -
1895 -/**
1896 - *     skb_unlink      -       remove a buffer from a list
1897 - *     @skb: buffer to remove
1898 - *     @list: list to use
1899 - *
1900 - *     Remove a packet from a list. The list locks are taken and this
1901 - *     function is atomic with respect to other list locked calls
1902 - *
1903 - *     You must know what list the SKB is on.
1904 - */
1905 -void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
1906 -{
1907 -       unsigned long flags;
1908 -
1909 -       spin_lock_irqsave(&list->lock, flags);
1910 -       __skb_unlink(skb, list);
1911 -       spin_unlock_irqrestore(&list->lock, flags);
1912 -}
1913 -
1914 -/**
1915 - *     skb_append      -       append a buffer
1916 - *     @old: buffer to insert after
1917 - *     @newsk: buffer to insert
1918 - *     @list: list to use
1919 - *
1920 - *     Place a packet after a given packet in a list. The list locks are taken
1921 - *     and this function is atomic with respect to other list locked calls.
1922 - *     A buffer cannot be placed on two lists at the same time.
1923 - */
1924 -void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list)
1925 -{
1926 -       unsigned long flags;
1927 -
1928 -       spin_lock_irqsave(&list->lock, flags);
1929 -       __skb_queue_after(list, old, newsk);
1930 -       spin_unlock_irqrestore(&list->lock, flags);
1931 -}
1932 -
1933 -
1934 -/**
1935 - *     skb_insert      -       insert a buffer
1936 - *     @old: buffer to insert before
1937 - *     @newsk: buffer to insert
1938 - *     @list: list to use
1939 - *
1940 - *     Place a packet before a given packet in a list. The list locks are
1941 - *     taken and this function is atomic with respect to other list locked
1942 - *     calls.
1943 - *
1944 - *     A buffer cannot be placed on two lists at the same time.
1945 - */
1946 -void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list)
1947 -{
1948 -       unsigned long flags;
1949 -
1950 -       spin_lock_irqsave(&list->lock, flags);
1951 -       __skb_insert(newsk, old->prev, old, list);
1952 -       spin_unlock_irqrestore(&list->lock, flags);
1953 -}
1954 -
1955 -static inline void skb_split_inside_header(struct sk_buff *skb,
1956 -                                          struct sk_buff* skb1,
1957 -                                          const u32 len, const int pos)
1958 -{
1959 -       int i;
1960 -
1961 -       skb_copy_from_linear_data_offset(skb, len, skb_put(skb1, pos - len),
1962 -                                        pos - len);
1963 -       /* And move data appendix as is. */
1964 -       for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1965 -               skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i];
1966 -
1967 -       skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags;
1968 -       skb_shinfo(skb)->nr_frags  = 0;
1969 -       skb1->data_len             = skb->data_len;
1970 -       skb1->len                  += skb1->data_len;
1971 -       skb->data_len              = 0;
1972 -       skb->len                   = len;
1973 -       skb_set_tail_pointer(skb, len);
1974 -}
1975 -
1976 -static inline void skb_split_no_header(struct sk_buff *skb,
1977 -                                      struct sk_buff* skb1,
1978 -                                      const u32 len, int pos)
1979 -{
1980 -       int i, k = 0;
1981 -       const int nfrags = skb_shinfo(skb)->nr_frags;
1982 -
1983 -       skb_shinfo(skb)->nr_frags = 0;
1984 -       skb1->len                 = skb1->data_len = skb->len - len;
1985 -       skb->len                  = len;
1986 -       skb->data_len             = len - pos;
1987 -
1988 -       for (i = 0; i < nfrags; i++) {
1989 -               int size = skb_shinfo(skb)->frags[i].size;
1990 -
1991 -               if (pos + size > len) {
1992 -                       skb_shinfo(skb1)->frags[k] = skb_shinfo(skb)->frags[i];
1993 -
1994 -                       if (pos < len) {
1995 -                               /* Split frag.
1996 -                                * We have two variants in this case:
1997 -                                * 1. Move all the frag to the second
1998 -                                *    part, if it is possible. F.e.
1999 -                                *    this approach is mandatory for TUX,
2000 -                                *    where splitting is expensive.
2001 -                                * 2. Split is accurately. We make this.
2002 -                                */
2003 -                               get_page(skb_shinfo(skb)->frags[i].page);
2004 -                               skb_shinfo(skb1)->frags[0].page_offset += len - pos;
2005 -                               skb_shinfo(skb1)->frags[0].size -= len - pos;
2006 -                               skb_shinfo(skb)->frags[i].size  = len - pos;
2007 -                               skb_shinfo(skb)->nr_frags++;
2008 -                       }
2009 -                       k++;
2010 -               } else
2011 -                       skb_shinfo(skb)->nr_frags++;
2012 -               pos += size;
2013 -       }
2014 -       skb_shinfo(skb1)->nr_frags = k;
2015 -}
2016 -
2017 -/**
2018 - * skb_split - Split fragmented skb to two parts at length len.
2019 - * @skb: the buffer to split
2020 - * @skb1: the buffer to receive the second part
2021 - * @len: new length for skb
2022 - */
2023 -void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
2024 -{
2025 -       int pos = skb_headlen(skb);
2026 -
2027 -       if (len < pos)  /* Split line is inside header. */
2028 -               skb_split_inside_header(skb, skb1, len, pos);
2029 -       else            /* Second chunk has no header, nothing to copy. */
2030 -               skb_split_no_header(skb, skb1, len, pos);
2031 -}
2032 -
2033 -/**
2034 - * skb_prepare_seq_read - Prepare a sequential read of skb data
2035 - * @skb: the buffer to read
2036 - * @from: lower offset of data to be read
2037 - * @to: upper offset of data to be read
2038 - * @st: state variable
2039 - *
2040 - * Initializes the specified state variable. Must be called before
2041 - * invoking skb_seq_read() for the first time.
2042 - */
2043 -void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from,
2044 -                         unsigned int to, struct skb_seq_state *st)
2045 -{
2046 -       st->lower_offset = from;
2047 -       st->upper_offset = to;
2048 -       st->root_skb = st->cur_skb = skb;
2049 -       st->frag_idx = st->stepped_offset = 0;
2050 -       st->frag_data = NULL;
2051 -}
2052 -
2053 -/**
2054 - * skb_seq_read - Sequentially read skb data
2055 - * @consumed: number of bytes consumed by the caller so far
2056 - * @data: destination pointer for data to be returned
2057 - * @st: state variable
2058 - *
2059 - * Reads a block of skb data at &consumed relative to the
2060 - * lower offset specified to skb_prepare_seq_read(). Assigns
2061 - * the head of the data block to &data and returns the length
2062 - * of the block or 0 if the end of the skb data or the upper
2063 - * offset has been reached.
2064 - *
2065 - * The caller is not required to consume all of the data
2066 - * returned, i.e. &consumed is typically set to the number
2067 - * of bytes already consumed and the next call to
2068 - * skb_seq_read() will return the remaining part of the block.
2069 - *
2070 - * Note 1: The size of each block of data returned can be arbitary,
2071 - *       this limitation is the cost for zerocopy seqeuental
2072 - *       reads of potentially non linear data.
2073 - *
2074 - * Note 2: Fragment lists within fragments are not implemented
2075 - *       at the moment, state->root_skb could be replaced with
2076 - *       a stack for this purpose.
2077 - */
2078 -unsigned int skb_seq_read(unsigned int consumed, const u8 **data,
2079 -                         struct skb_seq_state *st)
2080 -{
2081 -       unsigned int block_limit, abs_offset = consumed + st->lower_offset;
2082 -       skb_frag_t *frag;
2083 -
2084 -       if (unlikely(abs_offset >= st->upper_offset))
2085 -               return 0;
2086 -
2087 -next_skb:
2088 -       block_limit = skb_headlen(st->cur_skb) + st->stepped_offset;
2089 -
2090 -       if (abs_offset < block_limit && !st->frag_data) {
2091 -               *data = st->cur_skb->data + (abs_offset - st->stepped_offset);
2092 -               return block_limit - abs_offset;
2093 -       }
2094 -
2095 -       if (st->frag_idx == 0 && !st->frag_data)
2096 -               st->stepped_offset += skb_headlen(st->cur_skb);
2097 -
2098 -       while (st->frag_idx < skb_shinfo(st->cur_skb)->nr_frags) {
2099 -               frag = &skb_shinfo(st->cur_skb)->frags[st->frag_idx];
2100 -               block_limit = frag->size + st->stepped_offset;
2101 -
2102 -               if (abs_offset < block_limit) {
2103 -                       if (!st->frag_data)
2104 -                               st->frag_data = kmap_skb_frag(frag);
2105 -
2106 -                       *data = (u8 *) st->frag_data + frag->page_offset +
2107 -                               (abs_offset - st->stepped_offset);
2108 -
2109 -                       return block_limit - abs_offset;
2110 -               }
2111 -
2112 -               if (st->frag_data) {
2113 -                       kunmap_skb_frag(st->frag_data);
2114 -                       st->frag_data = NULL;
2115 -               }
2116 -
2117 -               st->frag_idx++;
2118 -               st->stepped_offset += frag->size;
2119 -       }
2120 -
2121 -       if (st->frag_data) {
2122 -               kunmap_skb_frag(st->frag_data);
2123 -               st->frag_data = NULL;
2124 -       }
2125 -
2126 -       if (st->root_skb == st->cur_skb &&
2127 -           skb_shinfo(st->root_skb)->frag_list) {
2128 -               st->cur_skb = skb_shinfo(st->root_skb)->frag_list;
2129 -               st->frag_idx = 0;
2130 -               goto next_skb;
2131 -       } else if (st->cur_skb->next) {
2132 -               st->cur_skb = st->cur_skb->next;
2133 -               st->frag_idx = 0;
2134 -               goto next_skb;
2135 -       }
2136 -
2137 -       return 0;
2138 -}
2139 -
2140 -/**
2141 - * skb_abort_seq_read - Abort a sequential read of skb data
2142 - * @st: state variable
2143 - *
2144 - * Must be called if skb_seq_read() was not called until it
2145 - * returned 0.
2146 - */
2147 -void skb_abort_seq_read(struct skb_seq_state *st)
2148 -{
2149 -       if (st->frag_data)
2150 -               kunmap_skb_frag(st->frag_data);
2151 -}
2152 -
2153 -#define TS_SKB_CB(state)       ((struct skb_seq_state *) &((state)->cb))
2154 -
2155 -static unsigned int skb_ts_get_next_block(unsigned int offset, const u8 **text,
2156 -                                         struct ts_config *conf,
2157 -                                         struct ts_state *state)
2158 -{
2159 -       return skb_seq_read(offset, text, TS_SKB_CB(state));
2160 -}
2161 -
2162 -static void skb_ts_finish(struct ts_config *conf, struct ts_state *state)
2163 -{
2164 -       skb_abort_seq_read(TS_SKB_CB(state));
2165 -}
2166 -
2167 -/**
2168 - * skb_find_text - Find a text pattern in skb data
2169 - * @skb: the buffer to look in
2170 - * @from: search offset
2171 - * @to: search limit
2172 - * @config: textsearch configuration
2173 - * @state: uninitialized textsearch state variable
2174 - *
2175 - * Finds a pattern in the skb data according to the specified
2176 - * textsearch configuration. Use textsearch_next() to retrieve
2177 - * subsequent occurrences of the pattern. Returns the offset
2178 - * to the first occurrence or UINT_MAX if no match was found.
2179 - */
2180 -unsigned int skb_find_text(struct sk_buff *skb, unsigned int from,
2181 -                          unsigned int to, struct ts_config *config,
2182 -                          struct ts_state *state)
2183 -{
2184 -       unsigned int ret;
2185 -
2186 -       config->get_next_block = skb_ts_get_next_block;
2187 -       config->finish = skb_ts_finish;
2188 -
2189 -       skb_prepare_seq_read(skb, from, to, TS_SKB_CB(state));
2190 -
2191 -       ret = textsearch_find(config, state);
2192 -       return (ret <= to - from ? ret : UINT_MAX);
2193 -}
2194 -
2195 -/**
2196 - * skb_append_datato_frags: - append the user data to a skb
2197 - * @sk: sock  structure
2198 - * @skb: skb structure to be appened with user data.
2199 - * @getfrag: call back function to be used for getting the user data
2200 - * @from: pointer to user message iov
2201 - * @length: length of the iov message
2202 - *
2203 - * Description: This procedure append the user data in the fragment part
2204 - * of the skb if any page alloc fails user this procedure returns  -ENOMEM
2205 - */
2206 -int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
2207 -                       int (*getfrag)(void *from, char *to, int offset,
2208 -                                       int len, int odd, struct sk_buff *skb),
2209 -                       void *from, int length)
2210 -{
2211 -       int frg_cnt = 0;
2212 -       skb_frag_t *frag = NULL;
2213 -       struct page *page = NULL;
2214 -       int copy, left;
2215 -       int offset = 0;
2216 -       int ret;
2217 -
2218 -       do {
2219 -               /* Return error if we don't have space for new frag */
2220 -               frg_cnt = skb_shinfo(skb)->nr_frags;
2221 -               if (frg_cnt >= MAX_SKB_FRAGS)
2222 -                       return -EFAULT;
2223 -
2224 -               /* allocate a new page for next frag */
2225 -               page = alloc_pages(sk->sk_allocation, 0);
2226 -
2227 -               /* If alloc_page fails just return failure and caller will
2228 -                * free previous allocated pages by doing kfree_skb()
2229 -                */
2230 -               if (page == NULL)
2231 -                       return -ENOMEM;
2232 -
2233 -               /* initialize the next frag */
2234 -               sk->sk_sndmsg_page = page;
2235 -               sk->sk_sndmsg_off = 0;
2236 -               skb_fill_page_desc(skb, frg_cnt, page, 0, 0);
2237 -               skb->truesize += PAGE_SIZE;
2238 -               atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
2239 -
2240 -               /* get the new initialized frag */
2241 -               frg_cnt = skb_shinfo(skb)->nr_frags;
2242 -               frag = &skb_shinfo(skb)->frags[frg_cnt - 1];
2243 -
2244 -               /* copy the user data to page */
2245 -               left = PAGE_SIZE - frag->page_offset;
2246 -               copy = (length > left)? left : length;
2247 -
2248 -               ret = getfrag(from, (page_address(frag->page) +
2249 -                           frag->page_offset + frag->size),
2250 -                           offset, copy, 0, skb);
2251 -               if (ret < 0)
2252 -                       return -EFAULT;
2253 -
2254 -               /* copy was successful so update the size parameters */
2255 -               sk->sk_sndmsg_off += copy;
2256 -               frag->size += copy;
2257 -               skb->len += copy;
2258 -               skb->data_len += copy;
2259 -               offset += copy;
2260 -               length -= copy;
2261 -
2262 -       } while (length > 0);
2263 -
2264 -       return 0;
2265 -}
2266 -
2267 -/**
2268 - *     skb_pull_rcsum - pull skb and update receive checksum
2269 - *     @skb: buffer to update
2270 - *     @len: length of data pulled
2271 - *
2272 - *     This function performs an skb_pull on the packet and updates
2273 - *     the CHECKSUM_COMPLETE checksum.  It should be used on
2274 - *     receive path processing instead of skb_pull unless you know
2275 - *     that the checksum difference is zero (e.g., a valid IP header)
2276 - *     or you are setting ip_summed to CHECKSUM_NONE.
2277 - */
2278 -unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len)
2279 -{
2280 -       BUG_ON(len > skb->len);
2281 -       skb->len -= len;
2282 -       BUG_ON(skb->len < skb->data_len);
2283 -       skb_postpull_rcsum(skb, skb->data, len);
2284 -       return skb->data += len;
2285 -}
2286 -
2287 -EXPORT_SYMBOL_GPL(skb_pull_rcsum);
2288 -
2289 -/**
2290 - *     skb_segment - Perform protocol segmentation on skb.
2291 - *     @skb: buffer to segment
2292 - *     @features: features for the output path (see dev->features)
2293 - *
2294 - *     This function performs segmentation on the given skb.  It returns
2295 - *     a pointer to the first in a list of new skbs for the segments.
2296 - *     In case of error it returns ERR_PTR(err).
2297 - */
2298 -struct sk_buff *skb_segment(struct sk_buff *skb, int features)
2299 -{
2300 -       struct sk_buff *segs = NULL;
2301 -       struct sk_buff *tail = NULL;
2302 -       unsigned int mss = skb_shinfo(skb)->gso_size;
2303 -       unsigned int doffset = skb->data - skb_mac_header(skb);
2304 -       unsigned int offset = doffset;
2305 -       unsigned int headroom;
2306 -       unsigned int len;
2307 -       int sg = features & NETIF_F_SG;
2308 -       int nfrags = skb_shinfo(skb)->nr_frags;
2309 -       int err = -ENOMEM;
2310 -       int i = 0;
2311 -       int pos;
2312 -
2313 -       __skb_push(skb, doffset);
2314 -       headroom = skb_headroom(skb);
2315 -       pos = skb_headlen(skb);
2316 -
2317 -       do {
2318 -               struct sk_buff *nskb;
2319 -               skb_frag_t *frag;
2320 -               int hsize;
2321 -               int k;
2322 -               int size;
2323 -
2324 -               len = skb->len - offset;
2325 -               if (len > mss)
2326 -                       len = mss;
2327 -
2328 -               hsize = skb_headlen(skb) - offset;
2329 -               if (hsize < 0)
2330 -                       hsize = 0;
2331 -               if (hsize > len || !sg)
2332 -                       hsize = len;
2333 -
2334 -               nskb = alloc_skb(hsize + doffset + headroom, GFP_ATOMIC);
2335 -               if (unlikely(!nskb))
2336 -                       goto err;
2337 -
2338 -               if (segs)
2339 -                       tail->next = nskb;
2340 -               else
2341 -                       segs = nskb;
2342 -               tail = nskb;
2343 -
2344 -               __copy_skb_header(nskb, skb);
2345 -               nskb->mac_len = skb->mac_len;
2346 -
2347 -               skb_reserve(nskb, headroom);
2348 -               skb_reset_mac_header(nskb);
2349 -               skb_set_network_header(nskb, skb->mac_len);
2350 -               nskb->transport_header = (nskb->network_header +
2351 -                                         skb_network_header_len(skb));
2352 -               skb_copy_from_linear_data(skb, skb_put(nskb, doffset),
2353 -                                         doffset);
2354 -               if (!sg) {
2355 -                       nskb->ip_summed = CHECKSUM_NONE;
2356 -                       nskb->csum = skb_copy_and_csum_bits(skb, offset,
2357 -                                                           skb_put(nskb, len),
2358 -                                                           len, 0);
2359 -                       continue;
2360 -               }
2361 -
2362 -               frag = skb_shinfo(nskb)->frags;
2363 -               k = 0;
2364 -
2365 -               skb_copy_from_linear_data_offset(skb, offset,
2366 -                                                skb_put(nskb, hsize), hsize);
2367 -
2368 -               while (pos < offset + len) {
2369 -                       BUG_ON(i >= nfrags);
2370 -
2371 -                       *frag = skb_shinfo(skb)->frags[i];
2372 -                       get_page(frag->page);
2373 -                       size = frag->size;
2374 -
2375 -                       if (pos < offset) {
2376 -                               frag->page_offset += offset - pos;
2377 -                               frag->size -= offset - pos;
2378 -                       }
2379 -
2380 -                       k++;
2381 -
2382 -                       if (pos + size <= offset + len) {
2383 -                               i++;
2384 -                               pos += size;
2385 -                       } else {
2386 -                               frag->size -= pos + size - (offset + len);
2387 -                               break;
2388 -                       }
2389 -
2390 -                       frag++;
2391 -               }
2392 -
2393 -               skb_shinfo(nskb)->nr_frags = k;
2394 -               nskb->data_len = len - hsize;
2395 -               nskb->len += nskb->data_len;
2396 -               nskb->truesize += nskb->data_len;
2397 -       } while ((offset += len) < skb->len);
2398 -
2399 -       return segs;
2400 -
2401 -err:
2402 -       while ((skb = segs)) {
2403 -               segs = skb->next;
2404 -               kfree_skb(skb);
2405 -       }
2406 -       return ERR_PTR(err);
2407 -}
2408 -
2409 -EXPORT_SYMBOL_GPL(skb_segment);
2410 -
2411 -void __init skb_init(void)
2412 -{
2413 -       skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
2414 -                                             sizeof(struct sk_buff),
2415 -                                             0,
2416 -                                             SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2417 -                                             NULL);
2418 -       skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache",
2419 -                                               (2*sizeof(struct sk_buff)) +
2420 -                                               sizeof(atomic_t),
2421 -                                               0,
2422 -                                               SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2423 -                                               NULL);
2424 -}
2425 -
2426 -/**
2427 - *     skb_to_sgvec - Fill a scatter-gather list from a socket buffer
2428 - *     @skb: Socket buffer containing the buffers to be mapped
2429 - *     @sg: The scatter-gather list to map into
2430 - *     @offset: The offset into the buffer's contents to start mapping
2431 - *     @len: Length of buffer space to be mapped
2432 - *
2433 - *     Fill the specified scatter-gather list with mappings/pointers into a
2434 - *     region of the buffer space attached to a socket buffer.
2435 - */
2436 -static int
2437 -__skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
2438 -{
2439 -       int start = skb_headlen(skb);
2440 -       int i, copy = start - offset;
2441 -       int elt = 0;
2442 -
2443 -       if (copy > 0) {
2444 -               if (copy > len)
2445 -                       copy = len;
2446 -               sg_set_buf(sg, skb->data + offset, copy);
2447 -               elt++;
2448 -               if ((len -= copy) == 0)
2449 -                       return elt;
2450 -               offset += copy;
2451 -       }
2452 -
2453 -       for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2454 -               int end;
2455 -
2456 -               WARN_ON(start > offset + len);
2457 -
2458 -               end = start + skb_shinfo(skb)->frags[i].size;
2459 -               if ((copy = end - offset) > 0) {
2460 -                       skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2461 -
2462 -                       if (copy > len)
2463 -                               copy = len;
2464 -                       sg_set_page(&sg[elt], frag->page, copy,
2465 -                                       frag->page_offset+offset-start);
2466 -                       elt++;
2467 -                       if (!(len -= copy))
2468 -                               return elt;
2469 -                       offset += copy;
2470 -               }
2471 -               start = end;
2472 -       }
2473 -
2474 -       if (skb_shinfo(skb)->frag_list) {
2475 -               struct sk_buff *list = skb_shinfo(skb)->frag_list;
2476 -
2477 -               for (; list; list = list->next) {
2478 -                       int end;
2479 -
2480 -                       WARN_ON(start > offset + len);
2481 -
2482 -                       end = start + list->len;
2483 -                       if ((copy = end - offset) > 0) {
2484 -                               if (copy > len)
2485 -                                       copy = len;
2486 -                               elt += __skb_to_sgvec(list, sg+elt, offset - start,
2487 -                                                     copy);
2488 -                               if ((len -= copy) == 0)
2489 -                                       return elt;
2490 -                               offset += copy;
2491 -                       }
2492 -                       start = end;
2493 -               }
2494 -       }
2495 -       BUG_ON(len);
2496 -       return elt;
2497 -}
2498 -
2499 -int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
2500 -{
2501 -       int nsg = __skb_to_sgvec(skb, sg, offset, len);
2502 -
2503 -       sg_mark_end(&sg[nsg - 1]);
2504 -
2505 -       return nsg;
2506 -}
2507 -
2508 -/**
2509 - *     skb_cow_data - Check that a socket buffer's data buffers are writable
2510 - *     @skb: The socket buffer to check.
2511 - *     @tailbits: Amount of trailing space to be added
2512 - *     @trailer: Returned pointer to the skb where the @tailbits space begins
2513 - *
2514 - *     Make sure that the data buffers attached to a socket buffer are
2515 - *     writable. If they are not, private copies are made of the data buffers
2516 - *     and the socket buffer is set to use these instead.
2517 - *
2518 - *     If @tailbits is given, make sure that there is space to write @tailbits
2519 - *     bytes of data beyond current end of socket buffer.  @trailer will be
2520 - *     set to point to the skb in which this space begins.
2521 - *
2522 - *     The number of scatterlist elements required to completely map the
2523 - *     COW'd and extended socket buffer will be returned.
2524 - */
2525 -int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
2526 -{
2527 -       int copyflag;
2528 -       int elt;
2529 -       struct sk_buff *skb1, **skb_p;
2530 -
2531 -       /* If skb is cloned or its head is paged, reallocate
2532 -        * head pulling out all the pages (pages are considered not writable
2533 -        * at the moment even if they are anonymous).
2534 -        */
2535 -       if ((skb_cloned(skb) || skb_shinfo(skb)->nr_frags) &&
2536 -           __pskb_pull_tail(skb, skb_pagelen(skb)-skb_headlen(skb)) == NULL)
2537 -               return -ENOMEM;
2538 -
2539 -       /* Easy case. Most of packets will go this way. */
2540 -       if (!skb_shinfo(skb)->frag_list) {
2541 -               /* A little of trouble, not enough of space for trailer.
2542 -                * This should not happen, when stack is tuned to generate
2543 -                * good frames. OK, on miss we reallocate and reserve even more
2544 -                * space, 128 bytes is fair. */
2545 -
2546 -               if (skb_tailroom(skb) < tailbits &&
2547 -                   pskb_expand_head(skb, 0, tailbits-skb_tailroom(skb)+128, GFP_ATOMIC))
2548 -                       return -ENOMEM;
2549 -
2550 -               /* Voila! */
2551 -               *trailer = skb;
2552 -               return 1;
2553 -       }
2554 -
2555 -       /* Misery. We are in troubles, going to mincer fragments... */
2556 -
2557 -       elt = 1;
2558 -       skb_p = &skb_shinfo(skb)->frag_list;
2559 -       copyflag = 0;
2560 -
2561 -       while ((skb1 = *skb_p) != NULL) {
2562 -               int ntail = 0;
2563 -
2564 -               /* The fragment is partially pulled by someone,
2565 -                * this can happen on input. Copy it and everything
2566 -                * after it. */
2567 -
2568 -               if (skb_shared(skb1))
2569 -                       copyflag = 1;
2570 -
2571 -               /* If the skb is the last, worry about trailer. */
2572 -
2573 -               if (skb1->next == NULL && tailbits) {
2574 -                       if (skb_shinfo(skb1)->nr_frags ||
2575 -                           skb_shinfo(skb1)->frag_list ||
2576 -                           skb_tailroom(skb1) < tailbits)
2577 -                               ntail = tailbits + 128;
2578 -               }
2579 -
2580 -               if (copyflag ||
2581 -                   skb_cloned(skb1) ||
2582 -                   ntail ||
2583 -                   skb_shinfo(skb1)->nr_frags ||
2584 -                   skb_shinfo(skb1)->frag_list) {
2585 -                       struct sk_buff *skb2;
2586 -
2587 -                       /* Fuck, we are miserable poor guys... */
2588 -                       if (ntail == 0)
2589 -                               skb2 = skb_copy(skb1, GFP_ATOMIC);
2590 -                       else
2591 -                               skb2 = skb_copy_expand(skb1,
2592 -                                                      skb_headroom(skb1),
2593 -                                                      ntail,
2594 -                                                      GFP_ATOMIC);
2595 -                       if (unlikely(skb2 == NULL))
2596 -                               return -ENOMEM;
2597 -
2598 -                       if (skb1->sk)
2599 -                               skb_set_owner_w(skb2, skb1->sk);
2600 -
2601 -                       /* Looking around. Are we still alive?
2602 -                        * OK, link new skb, drop old one */
2603 -
2604 -                       skb2->next = skb1->next;
2605 -                       *skb_p = skb2;
2606 -                       kfree_skb(skb1);
2607 -                       skb1 = skb2;
2608 -               }
2609 -               elt++;
2610 -               *trailer = skb1;
2611 -               skb_p = &skb1->next;
2612 -       }
2613 -
2614 -       return elt;
2615 -}
2616 -
2617 -/**
2618 - * skb_partial_csum_set - set up and verify partial csum values for packet
2619 - * @skb: the skb to set
2620 - * @start: the number of bytes after skb->data to start checksumming.
2621 - * @off: the offset from start to place the checksum.
2622 - *
2623 - * For untrusted partially-checksummed packets, we need to make sure the values
2624 - * for skb->csum_start and skb->csum_offset are valid so we don't oops.
2625 - *
2626 - * This function checks and sets those values and skb->ip_summed: if this
2627 - * returns false you should drop the packet.
2628 - */
2629 -bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off)
2630 -{
2631 -       if (unlikely(start > skb->len - 2) ||
2632 -           unlikely((int)start + off > skb->len - 2)) {
2633 -               if (net_ratelimit())
2634 -                       printk(KERN_WARNING
2635 -                              "bad partial csum: csum=%u/%u len=%u\n",
2636 -                              start, off, skb->len);
2637 -               return false;
2638 -       }
2639 -       skb->ip_summed = CHECKSUM_PARTIAL;
2640 -       skb->csum_start = skb_headroom(skb) + start;
2641 -       skb->csum_offset = off;
2642 -       return true;
2643 -}
2644 -
2645 -void __skb_warn_lro_forwarding(const struct sk_buff *skb)
2646 -{
2647 -       if (net_ratelimit())
2648 -               pr_warning("%s: received packets cannot be forwarded"
2649 -                          " while LRO is enabled\n", skb->dev->name);
2650 -}
2651 -
2652 -EXPORT_SYMBOL(___pskb_trim);
2653 -EXPORT_SYMBOL(__kfree_skb);
2654 -EXPORT_SYMBOL(kfree_skb);
2655 -EXPORT_SYMBOL(__pskb_pull_tail);
2656 -EXPORT_SYMBOL(__alloc_skb);
2657 -EXPORT_SYMBOL(__netdev_alloc_skb);
2658 -EXPORT_SYMBOL(pskb_copy);
2659 -EXPORT_SYMBOL(pskb_expand_head);
2660 -EXPORT_SYMBOL(skb_checksum);
2661 -EXPORT_SYMBOL(skb_clone);
2662 -EXPORT_SYMBOL(skb_copy);
2663 -EXPORT_SYMBOL(skb_copy_and_csum_bits);
2664 -EXPORT_SYMBOL(skb_copy_and_csum_dev);
2665 -EXPORT_SYMBOL(skb_copy_bits);
2666 -EXPORT_SYMBOL(skb_copy_expand);
2667 -EXPORT_SYMBOL(skb_over_panic);
2668 -EXPORT_SYMBOL(skb_pad);
2669 -EXPORT_SYMBOL(skb_realloc_headroom);
2670 -EXPORT_SYMBOL(skb_under_panic);
2671 -EXPORT_SYMBOL(skb_dequeue);
2672 -EXPORT_SYMBOL(skb_dequeue_tail);
2673 -EXPORT_SYMBOL(skb_insert);
2674 -EXPORT_SYMBOL(skb_queue_purge);
2675 -EXPORT_SYMBOL(skb_queue_head);
2676 -EXPORT_SYMBOL(skb_queue_tail);
2677 -EXPORT_SYMBOL(skb_unlink);
2678 -EXPORT_SYMBOL(skb_append);
2679 -EXPORT_SYMBOL(skb_split);
2680 -EXPORT_SYMBOL(skb_prepare_seq_read);
2681 -EXPORT_SYMBOL(skb_seq_read);
2682 -EXPORT_SYMBOL(skb_abort_seq_read);
2683 -EXPORT_SYMBOL(skb_find_text);
2684 -EXPORT_SYMBOL(skb_append_datato_frags);
2685 -EXPORT_SYMBOL(__skb_warn_lro_forwarding);
2686 -
2687 -EXPORT_SYMBOL_GPL(skb_to_sgvec);
2688 -EXPORT_SYMBOL_GPL(skb_cow_data);
2689 -EXPORT_SYMBOL_GPL(skb_partial_csum_set);
2690 diff -Nurb linux-2.6.27-524/net/core/sock.c.orig linux-2.6.27-525/net/core/sock.c.orig
2691 --- linux-2.6.27-524/net/core/sock.c.orig       2009-12-04 16:03:48.000000000 -0500
2692 +++ linux-2.6.27-525/net/core/sock.c.orig       1969-12-31 19:00:00.000000000 -0500
2693 @@ -1,2301 +0,0 @@
2694 -/*
2695 - * INET                An implementation of the TCP/IP protocol suite for the LINUX
2696 - *             operating system.  INET is implemented using the  BSD Socket
2697 - *             interface as the means of communication with the user level.
2698 - *
2699 - *             Generic socket support routines. Memory allocators, socket lock/release
2700 - *             handler for protocols to use and generic option handler.
2701 - *
2702 - *
2703 - * Authors:    Ross Biro
2704 - *             Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
2705 - *             Florian La Roche, <flla@stud.uni-sb.de>
2706 - *             Alan Cox, <A.Cox@swansea.ac.uk>
2707 - *
2708 - * Fixes:
2709 - *             Alan Cox        :       Numerous verify_area() problems
2710 - *             Alan Cox        :       Connecting on a connecting socket
2711 - *                                     now returns an error for tcp.
2712 - *             Alan Cox        :       sock->protocol is set correctly.
2713 - *                                     and is not sometimes left as 0.
2714 - *             Alan Cox        :       connect handles icmp errors on a
2715 - *                                     connect properly. Unfortunately there
2716 - *                                     is a restart syscall nasty there. I
2717 - *                                     can't match BSD without hacking the C
2718 - *                                     library. Ideas urgently sought!
2719 - *             Alan Cox        :       Disallow bind() to addresses that are
2720 - *                                     not ours - especially broadcast ones!!
2721 - *             Alan Cox        :       Socket 1024 _IS_ ok for users. (fencepost)
2722 - *             Alan Cox        :       sock_wfree/sock_rfree don't destroy sockets,
2723 - *                                     instead they leave that for the DESTROY timer.
2724 - *             Alan Cox        :       Clean up error flag in accept
2725 - *             Alan Cox        :       TCP ack handling is buggy, the DESTROY timer
2726 - *                                     was buggy. Put a remove_sock() in the handler
2727 - *                                     for memory when we hit 0. Also altered the timer
2728 - *                                     code. The ACK stuff can wait and needs major
2729 - *                                     TCP layer surgery.
2730 - *             Alan Cox        :       Fixed TCP ack bug, removed remove sock
2731 - *                                     and fixed timer/inet_bh race.
2732 - *             Alan Cox        :       Added zapped flag for TCP
2733 - *             Alan Cox        :       Move kfree_skb into skbuff.c and tidied up surplus code
2734 - *             Alan Cox        :       for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
2735 - *             Alan Cox        :       kfree_s calls now are kfree_skbmem so we can track skb resources
2736 - *             Alan Cox        :       Supports socket option broadcast now as does udp. Packet and raw need fixing.
2737 - *             Alan Cox        :       Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
2738 - *             Rick Sladkey    :       Relaxed UDP rules for matching packets.
2739 - *             C.E.Hawkins     :       IFF_PROMISC/SIOCGHWADDR support
2740 - *     Pauline Middelink       :       identd support
2741 - *             Alan Cox        :       Fixed connect() taking signals I think.
2742 - *             Alan Cox        :       SO_LINGER supported
2743 - *             Alan Cox        :       Error reporting fixes
2744 - *             Anonymous       :       inet_create tidied up (sk->reuse setting)
2745 - *             Alan Cox        :       inet sockets don't set sk->type!
2746 - *             Alan Cox        :       Split socket option code
2747 - *             Alan Cox        :       Callbacks
2748 - *             Alan Cox        :       Nagle flag for Charles & Johannes stuff
2749 - *             Alex            :       Removed restriction on inet fioctl
2750 - *             Alan Cox        :       Splitting INET from NET core
2751 - *             Alan Cox        :       Fixed bogus SO_TYPE handling in getsockopt()
2752 - *             Adam Caldwell   :       Missing return in SO_DONTROUTE/SO_DEBUG code
2753 - *             Alan Cox        :       Split IP from generic code
2754 - *             Alan Cox        :       New kfree_skbmem()
2755 - *             Alan Cox        :       Make SO_DEBUG superuser only.
2756 - *             Alan Cox        :       Allow anyone to clear SO_DEBUG
2757 - *                                     (compatibility fix)
2758 - *             Alan Cox        :       Added optimistic memory grabbing for AF_UNIX throughput.
2759 - *             Alan Cox        :       Allocator for a socket is settable.
2760 - *             Alan Cox        :       SO_ERROR includes soft errors.
2761 - *             Alan Cox        :       Allow NULL arguments on some SO_ opts
2762 - *             Alan Cox        :       Generic socket allocation to make hooks
2763 - *                                     easier (suggested by Craig Metz).
2764 - *             Michael Pall    :       SO_ERROR returns positive errno again
2765 - *              Steve Whitehouse:       Added default destructor to free
2766 - *                                      protocol private data.
2767 - *              Steve Whitehouse:       Added various other default routines
2768 - *                                      common to several socket families.
2769 - *              Chris Evans     :       Call suser() check last on F_SETOWN
2770 - *             Jay Schulist    :       Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
2771 - *             Andi Kleen      :       Add sock_kmalloc()/sock_kfree_s()
2772 - *             Andi Kleen      :       Fix write_space callback
2773 - *             Chris Evans     :       Security fixes - signedness again
2774 - *             Arnaldo C. Melo :       cleanups, use skb_queue_purge
2775 - *
2776 - * To Fix:
2777 - *
2778 - *
2779 - *             This program is free software; you can redistribute it and/or
2780 - *             modify it under the terms of the GNU General Public License
2781 - *             as published by the Free Software Foundation; either version
2782 - *             2 of the License, or (at your option) any later version.
2783 - */
2784 -
2785 -#include <linux/capability.h>
2786 -#include <linux/errno.h>
2787 -#include <linux/types.h>
2788 -#include <linux/socket.h>
2789 -#include <linux/in.h>
2790 -#include <linux/kernel.h>
2791 -#include <linux/module.h>
2792 -#include <linux/proc_fs.h>
2793 -#include <linux/seq_file.h>
2794 -#include <linux/sched.h>
2795 -#include <linux/timer.h>
2796 -#include <linux/string.h>
2797 -#include <linux/sockios.h>
2798 -#include <linux/net.h>
2799 -#include <linux/mm.h>
2800 -#include <linux/slab.h>
2801 -#include <linux/interrupt.h>
2802 -#include <linux/poll.h>
2803 -#include <linux/tcp.h>
2804 -#include <linux/init.h>
2805 -#include <linux/highmem.h>
2806 -
2807 -#include <asm/uaccess.h>
2808 -#include <asm/system.h>
2809 -
2810 -#include <linux/netdevice.h>
2811 -#include <net/protocol.h>
2812 -#include <linux/skbuff.h>
2813 -#include <net/net_namespace.h>
2814 -#include <net/request_sock.h>
2815 -#include <net/sock.h>
2816 -#include <net/xfrm.h>
2817 -#include <linux/ipsec.h>
2818 -
2819 -#include <linux/filter.h>
2820 -#include <linux/vs_socket.h>
2821 -#include <linux/vs_limit.h>
2822 -#include <linux/vs_context.h>
2823 -#include <linux/vs_network.h>
2824 -
2825 -#ifdef CONFIG_INET
2826 -#include <net/tcp.h>
2827 -#endif
2828 -
2829 -/*
2830 - * Each address family might have different locking rules, so we have
2831 - * one slock key per address family:
2832 - */
2833 -static struct lock_class_key af_family_keys[AF_MAX];
2834 -static struct lock_class_key af_family_slock_keys[AF_MAX];
2835 -
2836 -#ifdef CONFIG_DEBUG_LOCK_ALLOC
2837 -/*
2838 - * Make lock validator output more readable. (we pre-construct these
2839 - * strings build-time, so that runtime initialization of socket
2840 - * locks is fast):
2841 - */
2842 -static const char *af_family_key_strings[AF_MAX+1] = {
2843 -  "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX"     , "sk_lock-AF_INET"     ,
2844 -  "sk_lock-AF_AX25"  , "sk_lock-AF_IPX"      , "sk_lock-AF_APPLETALK",
2845 -  "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE"   , "sk_lock-AF_ATMPVC"   ,
2846 -  "sk_lock-AF_X25"   , "sk_lock-AF_INET6"    , "sk_lock-AF_ROSE"     ,
2847 -  "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI"  , "sk_lock-AF_SECURITY" ,
2848 -  "sk_lock-AF_KEY"   , "sk_lock-AF_NETLINK"  , "sk_lock-AF_PACKET"   ,
2849 -  "sk_lock-AF_ASH"   , "sk_lock-AF_ECONET"   , "sk_lock-AF_ATMSVC"   ,
2850 -  "sk_lock-21"       , "sk_lock-AF_SNA"      , "sk_lock-AF_IRDA"     ,
2851 -  "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE"  , "sk_lock-AF_LLC"      ,
2852 -  "sk_lock-27"       , "sk_lock-28"          , "sk_lock-AF_CAN"      ,
2853 -  "sk_lock-AF_TIPC"  , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV"        ,
2854 -  "sk_lock-AF_RXRPC" , "sk_lock-AF_MAX"
2855 -};
2856 -static const char *af_family_slock_key_strings[AF_MAX+1] = {
2857 -  "slock-AF_UNSPEC", "slock-AF_UNIX"     , "slock-AF_INET"     ,
2858 -  "slock-AF_AX25"  , "slock-AF_IPX"      , "slock-AF_APPLETALK",
2859 -  "slock-AF_NETROM", "slock-AF_BRIDGE"   , "slock-AF_ATMPVC"   ,
2860 -  "slock-AF_X25"   , "slock-AF_INET6"    , "slock-AF_ROSE"     ,
2861 -  "slock-AF_DECnet", "slock-AF_NETBEUI"  , "slock-AF_SECURITY" ,
2862 -  "slock-AF_KEY"   , "slock-AF_NETLINK"  , "slock-AF_PACKET"   ,
2863 -  "slock-AF_ASH"   , "slock-AF_ECONET"   , "slock-AF_ATMSVC"   ,
2864 -  "slock-21"       , "slock-AF_SNA"      , "slock-AF_IRDA"     ,
2865 -  "slock-AF_PPPOX" , "slock-AF_WANPIPE"  , "slock-AF_LLC"      ,
2866 -  "slock-27"       , "slock-28"          , "slock-AF_CAN"      ,
2867 -  "slock-AF_TIPC"  , "slock-AF_BLUETOOTH", "slock-AF_IUCV"     ,
2868 -  "slock-AF_RXRPC" , "slock-AF_MAX"
2869 -};
2870 -static const char *af_family_clock_key_strings[AF_MAX+1] = {
2871 -  "clock-AF_UNSPEC", "clock-AF_UNIX"     , "clock-AF_INET"     ,
2872 -  "clock-AF_AX25"  , "clock-AF_IPX"      , "clock-AF_APPLETALK",
2873 -  "clock-AF_NETROM", "clock-AF_BRIDGE"   , "clock-AF_ATMPVC"   ,
2874 -  "clock-AF_X25"   , "clock-AF_INET6"    , "clock-AF_ROSE"     ,
2875 -  "clock-AF_DECnet", "clock-AF_NETBEUI"  , "clock-AF_SECURITY" ,
2876 -  "clock-AF_KEY"   , "clock-AF_NETLINK"  , "clock-AF_PACKET"   ,
2877 -  "clock-AF_ASH"   , "clock-AF_ECONET"   , "clock-AF_ATMSVC"   ,
2878 -  "clock-21"       , "clock-AF_SNA"      , "clock-AF_IRDA"     ,
2879 -  "clock-AF_PPPOX" , "clock-AF_WANPIPE"  , "clock-AF_LLC"      ,
2880 -  "clock-27"       , "clock-28"          , "clock-AF_CAN"      ,
2881 -  "clock-AF_TIPC"  , "clock-AF_BLUETOOTH", "clock-AF_IUCV"     ,
2882 -  "clock-AF_RXRPC" , "clock-AF_MAX"
2883 -};
2884 -#endif
2885 -
2886 -/*
2887 - * sk_callback_lock locking rules are per-address-family,
2888 - * so split the lock classes by using a per-AF key:
2889 - */
2890 -static struct lock_class_key af_callback_keys[AF_MAX];
2891 -
2892 -/* Take into consideration the size of the struct sk_buff overhead in the
2893 - * determination of these values, since that is non-constant across
2894 - * platforms.  This makes socket queueing behavior and performance
2895 - * not depend upon such differences.
2896 - */
2897 -#define _SK_MEM_PACKETS                256
2898 -#define _SK_MEM_OVERHEAD       (sizeof(struct sk_buff) + 256)
2899 -#define SK_WMEM_MAX            (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
2900 -#define SK_RMEM_MAX            (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
2901 -
2902 -/* Run time adjustable parameters. */
2903 -__u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
2904 -__u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
2905 -__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
2906 -__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
2907 -
2908 -/* Maximal space eaten by iovec or ancilliary data plus some space */
2909 -int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
2910 -
2911 -static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
2912 -{
2913 -       struct timeval tv;
2914 -
2915 -       if (optlen < sizeof(tv))
2916 -               return -EINVAL;
2917 -       if (copy_from_user(&tv, optval, sizeof(tv)))
2918 -               return -EFAULT;
2919 -       if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
2920 -               return -EDOM;
2921 -
2922 -       if (tv.tv_sec < 0) {
2923 -               static int warned __read_mostly;
2924 -
2925 -               *timeo_p = 0;
2926 -               if (warned < 10 && net_ratelimit()) {
2927 -                       warned++;
2928 -                       printk(KERN_INFO "sock_set_timeout: `%s' (pid %d) "
2929 -                              "tries to set negative timeout\n",
2930 -                               current->comm, task_pid_nr(current));
2931 -               }
2932 -               return 0;
2933 -       }
2934 -       *timeo_p = MAX_SCHEDULE_TIMEOUT;
2935 -       if (tv.tv_sec == 0 && tv.tv_usec == 0)
2936 -               return 0;
2937 -       if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
2938 -               *timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
2939 -       return 0;
2940 -}
2941 -
2942 -static void sock_warn_obsolete_bsdism(const char *name)
2943 -{
2944 -       static int warned;
2945 -       static char warncomm[TASK_COMM_LEN];
2946 -       if (strcmp(warncomm, current->comm) && warned < 5) {
2947 -               strcpy(warncomm,  current->comm);
2948 -               printk(KERN_WARNING "process `%s' is using obsolete "
2949 -                      "%s SO_BSDCOMPAT\n", warncomm, name);
2950 -               warned++;
2951 -       }
2952 -}
2953 -
2954 -static void sock_disable_timestamp(struct sock *sk)
2955 -{
2956 -       if (sock_flag(sk, SOCK_TIMESTAMP)) {
2957 -               sock_reset_flag(sk, SOCK_TIMESTAMP);
2958 -               net_disable_timestamp();
2959 -       }
2960 -}
2961 -
2962 -
2963 -int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
2964 -{
2965 -       int err = 0;
2966 -       int skb_len;
2967 -
2968 -       /* Cast sk->rcvbuf to unsigned... It's pointless, but reduces
2969 -          number of warnings when compiling with -W --ANK
2970 -        */
2971 -       if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
2972 -           (unsigned)sk->sk_rcvbuf) {
2973 -               err = -ENOMEM;
2974 -               goto out;
2975 -       }
2976 -
2977 -       err = sk_filter(sk, skb);
2978 -       if (err)
2979 -               goto out;
2980 -
2981 -       if (!sk_rmem_schedule(sk, skb->truesize)) {
2982 -               err = -ENOBUFS;
2983 -               goto out;
2984 -       }
2985 -
2986 -       skb->dev = NULL;
2987 -       skb_set_owner_r(skb, sk);
2988 -
2989 -       /* Cache the SKB length before we tack it onto the receive
2990 -        * queue.  Once it is added it no longer belongs to us and
2991 -        * may be freed by other threads of control pulling packets
2992 -        * from the queue.
2993 -        */
2994 -       skb_len = skb->len;
2995 -
2996 -       skb_queue_tail(&sk->sk_receive_queue, skb);
2997 -
2998 -       if (!sock_flag(sk, SOCK_DEAD))
2999 -               sk->sk_data_ready(sk, skb_len);
3000 -out:
3001 -       return err;
3002 -}
3003 -EXPORT_SYMBOL(sock_queue_rcv_skb);
3004 -
3005 -int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
3006 -{
3007 -       int rc = NET_RX_SUCCESS;
3008 -
3009 -       if (sk_filter(sk, skb))
3010 -               goto discard_and_relse;
3011 -
3012 -       skb->dev = NULL;
3013 -
3014 -       if (nested)
3015 -               bh_lock_sock_nested(sk);
3016 -       else
3017 -               bh_lock_sock(sk);
3018 -       if (!sock_owned_by_user(sk)) {
3019 -               /*
3020 -                * trylock + unlock semantics:
3021 -                */
3022 -               mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
3023 -
3024 -               rc = sk->sk_backlog_rcv(sk, skb);
3025 -
3026 -               mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
3027 -       } else
3028 -               sk_add_backlog(sk, skb);
3029 -       bh_unlock_sock(sk);
3030 -out:
3031 -       sock_put(sk);
3032 -       return rc;
3033 -discard_and_relse:
3034 -       kfree_skb(skb);
3035 -       goto out;
3036 -}
3037 -EXPORT_SYMBOL(sk_receive_skb);
3038 -
3039 -struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
3040 -{
3041 -       struct dst_entry *dst = sk->sk_dst_cache;
3042 -
3043 -       if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
3044 -               sk->sk_dst_cache = NULL;
3045 -               dst_release(dst);
3046 -               return NULL;
3047 -       }
3048 -
3049 -       return dst;
3050 -}
3051 -EXPORT_SYMBOL(__sk_dst_check);
3052 -
3053 -struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
3054 -{
3055 -       struct dst_entry *dst = sk_dst_get(sk);
3056 -
3057 -       if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
3058 -               sk_dst_reset(sk);
3059 -               dst_release(dst);
3060 -               return NULL;
3061 -       }
3062 -
3063 -       return dst;
3064 -}
3065 -EXPORT_SYMBOL(sk_dst_check);
3066 -
3067 -static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen)
3068 -{
3069 -       int ret = -ENOPROTOOPT;
3070 -#ifdef CONFIG_NETDEVICES
3071 -       struct net *net = sock_net(sk);
3072 -       char devname[IFNAMSIZ];
3073 -       int index;
3074 -
3075 -       /* Sorry... */
3076 -       ret = -EPERM;
3077 -       if (!capable(CAP_NET_RAW))
3078 -               goto out;
3079 -
3080 -       ret = -EINVAL;
3081 -       if (optlen < 0)
3082 -               goto out;
3083 -
3084 -       /* Bind this socket to a particular device like "eth0",
3085 -        * as specified in the passed interface name. If the
3086 -        * name is "" or the option length is zero the socket
3087 -        * is not bound.
3088 -        */
3089 -       if (optlen > IFNAMSIZ - 1)
3090 -               optlen = IFNAMSIZ - 1;
3091 -       memset(devname, 0, sizeof(devname));
3092 -
3093 -       ret = -EFAULT;
3094 -       if (copy_from_user(devname, optval, optlen))
3095 -               goto out;
3096 -
3097 -       if (devname[0] == '\0') {
3098 -               index = 0;
3099 -       } else {
3100 -               struct net_device *dev = dev_get_by_name(net, devname);
3101 -
3102 -               ret = -ENODEV;
3103 -               if (!dev)
3104 -                       goto out;
3105 -
3106 -               index = dev->ifindex;
3107 -               dev_put(dev);
3108 -       }
3109 -
3110 -       lock_sock(sk);
3111 -       sk->sk_bound_dev_if = index;
3112 -       sk_dst_reset(sk);
3113 -       release_sock(sk);
3114 -
3115 -       ret = 0;
3116 -
3117 -out:
3118 -#endif
3119 -
3120 -       return ret;
3121 -}
3122 -
3123 -static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
3124 -{
3125 -       if (valbool)
3126 -               sock_set_flag(sk, bit);
3127 -       else
3128 -               sock_reset_flag(sk, bit);
3129 -}
3130 -
3131 -/*
3132 - *     This is meant for all protocols to use and covers goings on
3133 - *     at the socket level. Everything here is generic.
3134 - */
3135 -
3136 -int sock_setsockopt(struct socket *sock, int level, int optname,
3137 -                   char __user *optval, int optlen)
3138 -{
3139 -       struct sock *sk=sock->sk;
3140 -       int val;
3141 -       int valbool;
3142 -       struct linger ling;
3143 -       int ret = 0;
3144 -
3145 -       /*
3146 -        *      Options without arguments
3147 -        */
3148 -
3149 -       if (optname == SO_BINDTODEVICE)
3150 -               return sock_bindtodevice(sk, optval, optlen);
3151 -
3152 -       if (optlen < sizeof(int))
3153 -               return -EINVAL;
3154 -
3155 -       if (get_user(val, (int __user *)optval))
3156 -               return -EFAULT;
3157 -
3158 -       valbool = val?1:0;
3159 -
3160 -       lock_sock(sk);
3161 -
3162 -       switch(optname) {
3163 -       case SO_DEBUG:
3164 -               if (val && !capable(CAP_NET_ADMIN)) {
3165 -                       ret = -EACCES;
3166 -               } else
3167 -                       sock_valbool_flag(sk, SOCK_DBG, valbool);
3168 -               break;
3169 -       case SO_REUSEADDR:
3170 -               sk->sk_reuse = valbool;
3171 -               break;
3172 -       case SO_TYPE:
3173 -       case SO_ERROR:
3174 -               ret = -ENOPROTOOPT;
3175 -               break;
3176 -       case SO_DONTROUTE:
3177 -               sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
3178 -               break;
3179 -       case SO_BROADCAST:
3180 -               sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
3181 -               break;
3182 -       case SO_SNDBUF:
3183 -               /* Don't error on this BSD doesn't and if you think
3184 -                  about it this is right. Otherwise apps have to
3185 -                  play 'guess the biggest size' games. RCVBUF/SNDBUF
3186 -                  are treated in BSD as hints */
3187 -
3188 -               if (val > sysctl_wmem_max)
3189 -                       val = sysctl_wmem_max;
3190 -set_sndbuf:
3191 -               sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
3192 -               if ((val * 2) < SOCK_MIN_SNDBUF)
3193 -                       sk->sk_sndbuf = SOCK_MIN_SNDBUF;
3194 -               else
3195 -                       sk->sk_sndbuf = val * 2;
3196 -
3197 -               /*
3198 -                *      Wake up sending tasks if we
3199 -                *      upped the value.
3200 -                */
3201 -               sk->sk_write_space(sk);
3202 -               break;
3203 -
3204 -       case SO_SNDBUFFORCE:
3205 -               if (!capable(CAP_NET_ADMIN)) {
3206 -                       ret = -EPERM;
3207 -                       break;
3208 -               }
3209 -               goto set_sndbuf;
3210 -
3211 -       case SO_RCVBUF:
3212 -               /* Don't error on this BSD doesn't and if you think
3213 -                  about it this is right. Otherwise apps have to
3214 -                  play 'guess the biggest size' games. RCVBUF/SNDBUF
3215 -                  are treated in BSD as hints */
3216 -
3217 -               if (val > sysctl_rmem_max)
3218 -                       val = sysctl_rmem_max;
3219 -set_rcvbuf:
3220 -               sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
3221 -               /*
3222 -                * We double it on the way in to account for
3223 -                * "struct sk_buff" etc. overhead.   Applications
3224 -                * assume that the SO_RCVBUF setting they make will
3225 -                * allow that much actual data to be received on that
3226 -                * socket.
3227 -                *
3228 -                * Applications are unaware that "struct sk_buff" and
3229 -                * other overheads allocate from the receive buffer
3230 -                * during socket buffer allocation.
3231 -                *
3232 -                * And after considering the possible alternatives,
3233 -                * returning the value we actually used in getsockopt
3234 -                * is the most desirable behavior.
3235 -                */
3236 -               if ((val * 2) < SOCK_MIN_RCVBUF)
3237 -                       sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
3238 -               else
3239 -                       sk->sk_rcvbuf = val * 2;
3240 -               break;
3241 -
3242 -       case SO_RCVBUFFORCE:
3243 -               if (!capable(CAP_NET_ADMIN)) {
3244 -                       ret = -EPERM;
3245 -                       break;
3246 -               }
3247 -               goto set_rcvbuf;
3248 -
3249 -       case SO_KEEPALIVE:
3250 -#ifdef CONFIG_INET
3251 -               if (sk->sk_protocol == IPPROTO_TCP)
3252 -                       tcp_set_keepalive(sk, valbool);
3253 -#endif
3254 -               sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
3255 -               break;
3256 -
3257 -       case SO_OOBINLINE:
3258 -               sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
3259 -               break;
3260 -
3261 -       case SO_NO_CHECK:
3262 -               sk->sk_no_check = valbool;
3263 -               break;
3264 -
3265 -       case SO_PRIORITY:
3266 -               if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
3267 -                       sk->sk_priority = val;
3268 -               else
3269 -                       ret = -EPERM;
3270 -               break;
3271 -
3272 -       case SO_LINGER:
3273 -               if (optlen < sizeof(ling)) {
3274 -                       ret = -EINVAL;  /* 1003.1g */
3275 -                       break;
3276 -               }
3277 -               if (copy_from_user(&ling,optval,sizeof(ling))) {
3278 -                       ret = -EFAULT;
3279 -                       break;
3280 -               }
3281 -               if (!ling.l_onoff)
3282 -                       sock_reset_flag(sk, SOCK_LINGER);
3283 -               else {
3284 -#if (BITS_PER_LONG == 32)
3285 -                       if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
3286 -                               sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
3287 -                       else
3288 -#endif
3289 -                               sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
3290 -                       sock_set_flag(sk, SOCK_LINGER);
3291 -               }
3292 -               break;
3293 -
3294 -       case SO_BSDCOMPAT:
3295 -               sock_warn_obsolete_bsdism("setsockopt");
3296 -               break;
3297 -
3298 -       case SO_PASSCRED:
3299 -               if (valbool)
3300 -                       set_bit(SOCK_PASSCRED, &sock->flags);
3301 -               else
3302 -                       clear_bit(SOCK_PASSCRED, &sock->flags);
3303 -               break;
3304 -
3305 -       case SO_TIMESTAMP:
3306 -       case SO_TIMESTAMPNS:
3307 -               if (valbool)  {
3308 -                       if (optname == SO_TIMESTAMP)
3309 -                               sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
3310 -                       else
3311 -                               sock_set_flag(sk, SOCK_RCVTSTAMPNS);
3312 -                       sock_set_flag(sk, SOCK_RCVTSTAMP);
3313 -                       sock_enable_timestamp(sk);
3314 -               } else {
3315 -                       sock_reset_flag(sk, SOCK_RCVTSTAMP);
3316 -                       sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
3317 -               }
3318 -               break;
3319 -
3320 -       case SO_RCVLOWAT:
3321 -               if (val < 0)
3322 -                       val = INT_MAX;
3323 -               sk->sk_rcvlowat = val ? : 1;
3324 -               break;
3325 -
3326 -       case SO_RCVTIMEO:
3327 -               ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
3328 -               break;
3329 -
3330 -       case SO_SNDTIMEO:
3331 -               ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
3332 -               break;
3333 -
3334 -       case SO_ATTACH_FILTER:
3335 -               ret = -EINVAL;
3336 -               if (optlen == sizeof(struct sock_fprog)) {
3337 -                       struct sock_fprog fprog;
3338 -
3339 -                       ret = -EFAULT;
3340 -                       if (copy_from_user(&fprog, optval, sizeof(fprog)))
3341 -                               break;
3342 -
3343 -                       ret = sk_attach_filter(&fprog, sk);
3344 -               }
3345 -               break;
3346 -
3347 -       case SO_DETACH_FILTER:
3348 -               ret = sk_detach_filter(sk);
3349 -               break;
3350 -
3351 -       case SO_PASSSEC:
3352 -               if (valbool)
3353 -                       set_bit(SOCK_PASSSEC, &sock->flags);
3354 -               else
3355 -                       clear_bit(SOCK_PASSSEC, &sock->flags);
3356 -               break;
3357 -       case SO_MARK:
3358 -               if (!capable(CAP_NET_ADMIN))
3359 -                       ret = -EPERM;
3360 -               else {
3361 -                       sk->sk_mark = val;
3362 -               }
3363 -               break;
3364 -
3365 -               /* We implement the SO_SNDLOWAT etc to
3366 -                  not be settable (1003.1g 5.3) */
3367 -       default:
3368 -               ret = -ENOPROTOOPT;
3369 -               break;
3370 -       }
3371 -       release_sock(sk);
3372 -       return ret;
3373 -}
3374 -
3375 -
3376 -int sock_getsockopt(struct socket *sock, int level, int optname,
3377 -                   char __user *optval, int __user *optlen)
3378 -{
3379 -       struct sock *sk = sock->sk;
3380 -
3381 -       union {
3382 -               int val;
3383 -               struct linger ling;
3384 -               struct timeval tm;
3385 -       } v;
3386 -
3387 -       unsigned int lv = sizeof(int);
3388 -       int len;
3389 -
3390 -       if (get_user(len, optlen))
3391 -               return -EFAULT;
3392 -       if (len < 0)
3393 -               return -EINVAL;
3394 -
3395 -       memset(&v, 0, sizeof(v));
3396 -
3397 -       switch(optname) {
3398 -       case SO_DEBUG:
3399 -               v.val = sock_flag(sk, SOCK_DBG);
3400 -               break;
3401 -
3402 -       case SO_DONTROUTE:
3403 -               v.val = sock_flag(sk, SOCK_LOCALROUTE);
3404 -               break;
3405 -
3406 -       case SO_BROADCAST:
3407 -               v.val = !!sock_flag(sk, SOCK_BROADCAST);
3408 -               break;
3409 -
3410 -       case SO_SNDBUF:
3411 -               v.val = sk->sk_sndbuf;
3412 -               break;
3413 -
3414 -       case SO_RCVBUF:
3415 -               v.val = sk->sk_rcvbuf;
3416 -               break;
3417 -
3418 -       case SO_REUSEADDR:
3419 -               v.val = sk->sk_reuse;
3420 -               break;
3421 -
3422 -       case SO_KEEPALIVE:
3423 -               v.val = !!sock_flag(sk, SOCK_KEEPOPEN);
3424 -               break;
3425 -
3426 -       case SO_TYPE:
3427 -               v.val = sk->sk_type;
3428 -               break;
3429 -
3430 -       case SO_ERROR:
3431 -               v.val = -sock_error(sk);
3432 -               if (v.val==0)
3433 -                       v.val = xchg(&sk->sk_err_soft, 0);
3434 -               break;
3435 -
3436 -       case SO_OOBINLINE:
3437 -               v.val = !!sock_flag(sk, SOCK_URGINLINE);
3438 -               break;
3439 -
3440 -       case SO_NO_CHECK:
3441 -               v.val = sk->sk_no_check;
3442 -               break;
3443 -
3444 -       case SO_PRIORITY:
3445 -               v.val = sk->sk_priority;
3446 -               break;
3447 -
3448 -       case SO_LINGER:
3449 -               lv              = sizeof(v.ling);
3450 -               v.ling.l_onoff  = !!sock_flag(sk, SOCK_LINGER);
3451 -               v.ling.l_linger = sk->sk_lingertime / HZ;
3452 -               break;
3453 -
3454 -       case SO_BSDCOMPAT:
3455 -               sock_warn_obsolete_bsdism("getsockopt");
3456 -               break;
3457 -
3458 -       case SO_TIMESTAMP:
3459 -               v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
3460 -                               !sock_flag(sk, SOCK_RCVTSTAMPNS);
3461 -               break;
3462 -
3463 -       case SO_TIMESTAMPNS:
3464 -               v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
3465 -               break;
3466 -
3467 -       case SO_RCVTIMEO:
3468 -               lv=sizeof(struct timeval);
3469 -               if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
3470 -                       v.tm.tv_sec = 0;
3471 -                       v.tm.tv_usec = 0;
3472 -               } else {
3473 -                       v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
3474 -                       v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
3475 -               }
3476 -               break;
3477 -
3478 -       case SO_SNDTIMEO:
3479 -               lv=sizeof(struct timeval);
3480 -               if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
3481 -                       v.tm.tv_sec = 0;
3482 -                       v.tm.tv_usec = 0;
3483 -               } else {
3484 -                       v.tm.tv_sec = sk->sk_sndtimeo / HZ;
3485 -                       v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
3486 -               }
3487 -               break;
3488 -
3489 -       case SO_RCVLOWAT:
3490 -               v.val = sk->sk_rcvlowat;
3491 -               break;
3492 -
3493 -       case SO_SNDLOWAT:
3494 -               v.val=1;
3495 -               break;
3496 -
3497 -       case SO_PASSCRED:
3498 -               v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0;
3499 -               break;
3500 -
3501 -       case SO_PEERCRED:
3502 -               if (len > sizeof(sk->sk_peercred))
3503 -                       len = sizeof(sk->sk_peercred);
3504 -               if (copy_to_user(optval, &sk->sk_peercred, len))
3505 -                       return -EFAULT;
3506 -               goto lenout;
3507 -
3508 -       case SO_PEERNAME:
3509 -       {
3510 -               char address[128];
3511 -
3512 -               if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
3513 -                       return -ENOTCONN;
3514 -               if (lv < len)
3515 -                       return -EINVAL;
3516 -               if (copy_to_user(optval, address, len))
3517 -                       return -EFAULT;
3518 -               goto lenout;
3519 -       }
3520 -
3521 -       /* Dubious BSD thing... Probably nobody even uses it, but
3522 -        * the UNIX standard wants it for whatever reason... -DaveM
3523 -        */
3524 -       case SO_ACCEPTCONN:
3525 -               v.val = sk->sk_state == TCP_LISTEN;
3526 -               break;
3527 -
3528 -       case SO_PASSSEC:
3529 -               v.val = test_bit(SOCK_PASSSEC, &sock->flags) ? 1 : 0;
3530 -               break;
3531 -
3532 -       case SO_PEERSEC:
3533 -               return security_socket_getpeersec_stream(sock, optval, optlen, len);
3534 -
3535 -       case SO_MARK:
3536 -               v.val = sk->sk_mark;
3537 -               break;
3538 -
3539 -       default:
3540 -               return -ENOPROTOOPT;
3541 -       }
3542 -
3543 -       if (len > lv)
3544 -               len = lv;
3545 -       if (copy_to_user(optval, &v, len))
3546 -               return -EFAULT;
3547 -lenout:
3548 -       if (put_user(len, optlen))
3549 -               return -EFAULT;
3550 -       return 0;
3551 -}
3552 -
3553 -/*
3554 - * Initialize an sk_lock.
3555 - *
3556 - * (We also register the sk_lock with the lock validator.)
3557 - */
3558 -static inline void sock_lock_init(struct sock *sk)
3559 -{
3560 -       sock_lock_init_class_and_name(sk,
3561 -                       af_family_slock_key_strings[sk->sk_family],
3562 -                       af_family_slock_keys + sk->sk_family,
3563 -                       af_family_key_strings[sk->sk_family],
3564 -                       af_family_keys + sk->sk_family);
3565 -}
3566 -
3567 -static void sock_copy(struct sock *nsk, const struct sock *osk)
3568 -{
3569 -#ifdef CONFIG_SECURITY_NETWORK
3570 -       void *sptr = nsk->sk_security;
3571 -#endif
3572 -
3573 -       memcpy(nsk, osk, osk->sk_prot->obj_size);
3574 -#ifdef CONFIG_SECURITY_NETWORK
3575 -       nsk->sk_security = sptr;
3576 -       security_sk_clone(osk, nsk);
3577 -#endif
3578 -}
3579 -
3580 -static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
3581 -               int family)
3582 -{
3583 -       struct sock *sk;
3584 -       struct kmem_cache *slab;
3585 -
3586 -       slab = prot->slab;
3587 -       if (slab != NULL)
3588 -               sk = kmem_cache_alloc(slab, priority);
3589 -       else
3590 -               sk = kmalloc(prot->obj_size, priority);
3591 -
3592 -       if (sk != NULL) {
3593 -               if (security_sk_alloc(sk, family, priority))
3594 -                       goto out_free;
3595 -
3596 -               if (!try_module_get(prot->owner))
3597 -                       goto out_free_sec;
3598 -       }
3599 -               sock_vx_init(sk);
3600 -               sock_nx_init(sk);
3601 -
3602 -       return sk;
3603 -
3604 -out_free_sec:
3605 -       security_sk_free(sk);
3606 -out_free:
3607 -       if (slab != NULL)
3608 -               kmem_cache_free(slab, sk);
3609 -       else
3610 -               kfree(sk);
3611 -       return NULL;
3612 -}
3613 -
3614 -static void sk_prot_free(struct proto *prot, struct sock *sk)
3615 -{
3616 -       struct kmem_cache *slab;
3617 -       struct module *owner;
3618 -
3619 -       owner = prot->owner;
3620 -       slab = prot->slab;
3621 -
3622 -       security_sk_free(sk);
3623 -       if (slab != NULL)
3624 -               kmem_cache_free(slab, sk);
3625 -       else
3626 -               kfree(sk);
3627 -       module_put(owner);
3628 -}
3629 -
3630 -/**
3631 - *     sk_alloc - All socket objects are allocated here
3632 - *     @net: the applicable net namespace
3633 - *     @family: protocol family
3634 - *     @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
3635 - *     @prot: struct proto associated with this new sock instance
3636 - */
3637 -struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
3638 -                     struct proto *prot)
3639 -{
3640 -       struct sock *sk;
3641 -
3642 -       sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
3643 -       if (sk) {
3644 -               sk->sk_family = family;
3645 -               /*
3646 -                * See comment in struct sock definition to understand
3647 -                * why we need sk_prot_creator -acme
3648 -                */
3649 -               sk->sk_prot = sk->sk_prot_creator = prot;
3650 -               sock_lock_init(sk);
3651 -               sock_net_set(sk, get_net(net));
3652 -       }
3653 -
3654 -       return sk;
3655 -}
3656 -
3657 -void sk_free(struct sock *sk)
3658 -{
3659 -       struct sk_filter *filter;
3660 -
3661 -       if (sk->sk_destruct)
3662 -               sk->sk_destruct(sk);
3663 -
3664 -       filter = rcu_dereference(sk->sk_filter);
3665 -       if (filter) {
3666 -               sk_filter_uncharge(sk, filter);
3667 -               rcu_assign_pointer(sk->sk_filter, NULL);
3668 -       }
3669 -
3670 -       sock_disable_timestamp(sk);
3671 -
3672 -       if (atomic_read(&sk->sk_omem_alloc))
3673 -               printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
3674 -                      __func__, atomic_read(&sk->sk_omem_alloc));
3675 -
3676 -       put_net(sock_net(sk));
3677 -       vx_sock_dec(sk);
3678 -       clr_vx_info(&sk->sk_vx_info);
3679 -       sk->sk_xid = -1;
3680 -       clr_nx_info(&sk->sk_nx_info);
3681 -       sk->sk_nid = -1;
3682 -       sk_prot_free(sk->sk_prot_creator, sk);
3683 -}
3684 -
3685 -/*
3686 - * Last sock_put should drop referrence to sk->sk_net. It has already
3687 - * been dropped in sk_change_net. Taking referrence to stopping namespace
3688 - * is not an option.
3689 - * Take referrence to a socket to remove it from hash _alive_ and after that
3690 - * destroy it in the context of init_net.
3691 - */
3692 -void sk_release_kernel(struct sock *sk)
3693 -{
3694 -       if (sk == NULL || sk->sk_socket == NULL)
3695 -               return;
3696 -
3697 -       sock_hold(sk);
3698 -       sock_release(sk->sk_socket);
3699 -       release_net(sock_net(sk));
3700 -       sock_net_set(sk, get_net(&init_net));
3701 -       sock_put(sk);
3702 -}
3703 -EXPORT_SYMBOL(sk_release_kernel);
3704 -
3705 -struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
3706 -{
3707 -       struct sock *newsk;
3708 -
3709 -       newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family);
3710 -       if (newsk != NULL) {
3711 -               struct sk_filter *filter;
3712 -
3713 -               sock_copy(newsk, sk);
3714 -
3715 -               /* SANITY */
3716 -               get_net(sock_net(newsk));
3717 -               sock_vx_init(newsk);
3718 -               sock_nx_init(newsk);
3719 -               sk_node_init(&newsk->sk_node);
3720 -               sock_lock_init(newsk);
3721 -               bh_lock_sock(newsk);
3722 -               newsk->sk_backlog.head  = newsk->sk_backlog.tail = NULL;
3723 -
3724 -               atomic_set(&newsk->sk_rmem_alloc, 0);
3725 -               atomic_set(&newsk->sk_wmem_alloc, 0);
3726 -               atomic_set(&newsk->sk_omem_alloc, 0);
3727 -               skb_queue_head_init(&newsk->sk_receive_queue);
3728 -               skb_queue_head_init(&newsk->sk_write_queue);
3729 -#ifdef CONFIG_NET_DMA
3730 -               skb_queue_head_init(&newsk->sk_async_wait_queue);
3731 -#endif
3732 -
3733 -               rwlock_init(&newsk->sk_dst_lock);
3734 -               rwlock_init(&newsk->sk_callback_lock);
3735 -               lockdep_set_class_and_name(&newsk->sk_callback_lock,
3736 -                               af_callback_keys + newsk->sk_family,
3737 -                               af_family_clock_key_strings[newsk->sk_family]);
3738 -
3739 -               newsk->sk_dst_cache     = NULL;
3740 -               newsk->sk_wmem_queued   = 0;
3741 -               newsk->sk_forward_alloc = 0;
3742 -               newsk->sk_send_head     = NULL;
3743 -               newsk->sk_userlocks     = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
3744 -
3745 -               sock_reset_flag(newsk, SOCK_DONE);
3746 -               skb_queue_head_init(&newsk->sk_error_queue);
3747 -
3748 -               filter = newsk->sk_filter;
3749 -               if (filter != NULL)
3750 -                       sk_filter_charge(newsk, filter);
3751 -
3752 -               if (unlikely(xfrm_sk_clone_policy(newsk))) {
3753 -                       /* It is still raw copy of parent, so invalidate
3754 -                        * destructor and make plain sk_free() */
3755 -                       newsk->sk_destruct = NULL;
3756 -                       sk_free(newsk);
3757 -                       newsk = NULL;
3758 -                       goto out;
3759 -               }
3760 -
3761 -               newsk->sk_err      = 0;
3762 -               newsk->sk_priority = 0;
3763 -               atomic_set(&newsk->sk_refcnt, 2);
3764 -
3765 -               set_vx_info(&newsk->sk_vx_info, sk->sk_vx_info);
3766 -               newsk->sk_xid = sk->sk_xid;
3767 -               vx_sock_inc(newsk);
3768 -               set_nx_info(&newsk->sk_nx_info, sk->sk_nx_info);
3769 -               newsk->sk_nid = sk->sk_nid;
3770 -
3771 -               /*
3772 -                * Increment the counter in the same struct proto as the master
3773 -                * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
3774 -                * is the same as sk->sk_prot->socks, as this field was copied
3775 -                * with memcpy).
3776 -                *
3777 -                * This _changes_ the previous behaviour, where
3778 -                * tcp_create_openreq_child always was incrementing the
3779 -                * equivalent to tcp_prot->socks (inet_sock_nr), so this have
3780 -                * to be taken into account in all callers. -acme
3781 -                */
3782 -               sk_refcnt_debug_inc(newsk);
3783 -               sk_set_socket(newsk, NULL);
3784 -               newsk->sk_sleep  = NULL;
3785 -
3786 -               if (newsk->sk_prot->sockets_allocated)
3787 -                       atomic_inc(newsk->sk_prot->sockets_allocated);
3788 -       }
3789 -out:
3790 -       return newsk;
3791 -}
3792 -
3793 -EXPORT_SYMBOL_GPL(sk_clone);
3794 -
3795 -void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
3796 -{
3797 -       __sk_dst_set(sk, dst);
3798 -       sk->sk_route_caps = dst->dev->features;
3799 -       if (sk->sk_route_caps & NETIF_F_GSO)
3800 -               sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
3801 -       if (sk_can_gso(sk)) {
3802 -               if (dst->header_len) {
3803 -                       sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
3804 -               } else {
3805 -                       sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
3806 -                       sk->sk_gso_max_size = dst->dev->gso_max_size;
3807 -               }
3808 -       }
3809 -}
3810 -EXPORT_SYMBOL_GPL(sk_setup_caps);
3811 -
3812 -void __init sk_init(void)
3813 -{
3814 -       if (num_physpages <= 4096) {
3815 -               sysctl_wmem_max = 32767;
3816 -               sysctl_rmem_max = 32767;
3817 -               sysctl_wmem_default = 32767;
3818 -               sysctl_rmem_default = 32767;
3819 -       } else if (num_physpages >= 131072) {
3820 -               sysctl_wmem_max = 131071;
3821 -               sysctl_rmem_max = 131071;
3822 -       }
3823 -}
3824 -
3825 -/*
3826 - *     Simple resource managers for sockets.
3827 - */
3828 -
3829 -
3830 -/*
3831 - * Write buffer destructor automatically called from kfree_skb.
3832 - */
3833 -void sock_wfree(struct sk_buff *skb)
3834 -{
3835 -       struct sock *sk = skb->sk;
3836 -
3837 -       /* In case it might be waiting for more memory. */
3838 -       atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
3839 -       if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE))
3840 -               sk->sk_write_space(sk);
3841 -       sock_put(sk);
3842 -}
3843 -
3844 -/*
3845 - * Read buffer destructor automatically called from kfree_skb.
3846 - */
3847 -void sock_rfree(struct sk_buff *skb)
3848 -{
3849 -       struct sock *sk = skb->sk;
3850 -
3851 -       atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
3852 -       sk_mem_uncharge(skb->sk, skb->truesize);
3853 -}
3854 -
3855 -
3856 -int sock_i_uid(struct sock *sk)
3857 -{
3858 -       int uid;
3859 -
3860 -       read_lock(&sk->sk_callback_lock);
3861 -       uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0;
3862 -       read_unlock(&sk->sk_callback_lock);
3863 -       return uid;
3864 -}
3865 -
3866 -unsigned long sock_i_ino(struct sock *sk)
3867 -{
3868 -       unsigned long ino;
3869 -
3870 -       read_lock(&sk->sk_callback_lock);
3871 -       ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
3872 -       read_unlock(&sk->sk_callback_lock);
3873 -       return ino;
3874 -}
3875 -
3876 -/*
3877 - * Allocate a skb from the socket's send buffer.
3878 - */
3879 -struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
3880 -                            gfp_t priority)
3881 -{
3882 -       if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
3883 -               struct sk_buff * skb = alloc_skb(size, priority);
3884 -               if (skb) {
3885 -                       skb_set_owner_w(skb, sk);
3886 -                       return skb;
3887 -               }
3888 -       }
3889 -       return NULL;
3890 -}
3891 -
3892 -/*
3893 - * Allocate a skb from the socket's receive buffer.
3894 - */
3895 -struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
3896 -                            gfp_t priority)
3897 -{
3898 -       if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
3899 -               struct sk_buff *skb = alloc_skb(size, priority);
3900 -               if (skb) {
3901 -                       skb_set_owner_r(skb, sk);
3902 -                       return skb;
3903 -               }
3904 -       }
3905 -       return NULL;
3906 -}
3907 -
3908 -/*
3909 - * Allocate a memory block from the socket's option memory buffer.
3910 - */
3911 -void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
3912 -{
3913 -       if ((unsigned)size <= sysctl_optmem_max &&
3914 -           atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
3915 -               void *mem;
3916 -               /* First do the add, to avoid the race if kmalloc
3917 -                * might sleep.
3918 -                */
3919 -               atomic_add(size, &sk->sk_omem_alloc);
3920 -               mem = kmalloc(size, priority);
3921 -               if (mem)
3922 -                       return mem;
3923 -               atomic_sub(size, &sk->sk_omem_alloc);
3924 -       }
3925 -       return NULL;
3926 -}
3927 -
3928 -/*
3929 - * Free an option memory block.
3930 - */
3931 -void sock_kfree_s(struct sock *sk, void *mem, int size)
3932 -{
3933 -       kfree(mem);
3934 -       atomic_sub(size, &sk->sk_omem_alloc);
3935 -}
3936 -
3937 -/* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
3938 -   I think, these locks should be removed for datagram sockets.
3939 - */
3940 -static long sock_wait_for_wmem(struct sock * sk, long timeo)
3941 -{
3942 -       DEFINE_WAIT(wait);
3943 -
3944 -       clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
3945 -       for (;;) {
3946 -               if (!timeo)
3947 -                       break;
3948 -               if (signal_pending(current))
3949 -                       break;
3950 -               set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
3951 -               prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
3952 -               if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
3953 -                       break;
3954 -               if (sk->sk_shutdown & SEND_SHUTDOWN)
3955 -                       break;
3956 -               if (sk->sk_err)
3957 -                       break;
3958 -               timeo = schedule_timeout(timeo);
3959 -       }
3960 -       finish_wait(sk->sk_sleep, &wait);
3961 -       return timeo;
3962 -}
3963 -
3964 -
3965 -/*
3966 - *     Generic send/receive buffer handlers
3967 - */
3968 -
3969 -static struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
3970 -                                           unsigned long header_len,
3971 -                                           unsigned long data_len,
3972 -                                           int noblock, int *errcode)
3973 -{
3974 -       struct sk_buff *skb;
3975 -       gfp_t gfp_mask;
3976 -       long timeo;
3977 -       int err;
3978 -
3979 -       gfp_mask = sk->sk_allocation;
3980 -       if (gfp_mask & __GFP_WAIT)
3981 -               gfp_mask |= __GFP_REPEAT;
3982 -
3983 -       timeo = sock_sndtimeo(sk, noblock);
3984 -       while (1) {
3985 -               err = sock_error(sk);
3986 -               if (err != 0)
3987 -                       goto failure;
3988 -
3989 -               err = -EPIPE;
3990 -               if (sk->sk_shutdown & SEND_SHUTDOWN)
3991 -                       goto failure;
3992 -
3993 -               if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
3994 -                       skb = alloc_skb(header_len, gfp_mask);
3995 -                       if (skb) {
3996 -                               int npages;
3997 -                               int i;
3998 -
3999 -                               /* No pages, we're done... */
4000 -                               if (!data_len)
4001 -                                       break;
4002 -
4003 -                               npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
4004 -                               skb->truesize += data_len;
4005 -                               skb_shinfo(skb)->nr_frags = npages;
4006 -                               for (i = 0; i < npages; i++) {
4007 -                                       struct page *page;
4008 -                                       skb_frag_t *frag;
4009 -
4010 -                                       page = alloc_pages(sk->sk_allocation, 0);
4011 -                                       if (!page) {
4012 -                                               err = -ENOBUFS;
4013 -                                               skb_shinfo(skb)->nr_frags = i;
4014 -                                               kfree_skb(skb);
4015 -                                               goto failure;
4016 -                                       }
4017 -
4018 -                                       frag = &skb_shinfo(skb)->frags[i];
4019 -                                       frag->page = page;
4020 -                                       frag->page_offset = 0;
4021 -                                       frag->size = (data_len >= PAGE_SIZE ?
4022 -                                                     PAGE_SIZE :
4023 -                                                     data_len);
4024 -                                       data_len -= PAGE_SIZE;
4025 -                               }
4026 -
4027 -                               /* Full success... */
4028 -                               break;
4029 -                       }
4030 -                       err = -ENOBUFS;
4031 -                       goto failure;
4032 -               }
4033 -               set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
4034 -               set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
4035 -               err = -EAGAIN;
4036 -               if (!timeo)
4037 -                       goto failure;
4038 -               if (signal_pending(current))
4039 -                       goto interrupted;
4040 -               timeo = sock_wait_for_wmem(sk, timeo);
4041 -       }
4042 -
4043 -       skb_set_owner_w(skb, sk);
4044 -       return skb;
4045 -
4046 -interrupted:
4047 -       err = sock_intr_errno(timeo);
4048 -failure:
4049 -       *errcode = err;
4050 -       return NULL;
4051 -}
4052 -
4053 -struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
4054 -                                   int noblock, int *errcode)
4055 -{
4056 -       return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
4057 -}
4058 -
4059 -static void __lock_sock(struct sock *sk)
4060 -{
4061 -       DEFINE_WAIT(wait);
4062 -
4063 -       for (;;) {
4064 -               prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
4065 -                                       TASK_UNINTERRUPTIBLE);
4066 -               spin_unlock_bh(&sk->sk_lock.slock);
4067 -               schedule();
4068 -               spin_lock_bh(&sk->sk_lock.slock);
4069 -               if (!sock_owned_by_user(sk))
4070 -                       break;
4071 -       }
4072 -       finish_wait(&sk->sk_lock.wq, &wait);
4073 -}
4074 -
4075 -static void __release_sock(struct sock *sk)
4076 -{
4077 -       struct sk_buff *skb = sk->sk_backlog.head;
4078 -
4079 -       do {
4080 -               sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
4081 -               bh_unlock_sock(sk);
4082 -
4083 -               do {
4084 -                       struct sk_buff *next = skb->next;
4085 -
4086 -                       skb->next = NULL;
4087 -                       sk->sk_backlog_rcv(sk, skb);
4088 -
4089 -                       /*
4090 -                        * We are in process context here with softirqs
4091 -                        * disabled, use cond_resched_softirq() to preempt.
4092 -                        * This is safe to do because we've taken the backlog
4093 -                        * queue private:
4094 -                        */
4095 -                       cond_resched_softirq();
4096 -
4097 -                       skb = next;
4098 -               } while (skb != NULL);
4099 -
4100 -               bh_lock_sock(sk);
4101 -       } while ((skb = sk->sk_backlog.head) != NULL);
4102 -}
4103 -
4104 -/**
4105 - * sk_wait_data - wait for data to arrive at sk_receive_queue
4106 - * @sk:    sock to wait on
4107 - * @timeo: for how long
4108 - *
4109 - * Now socket state including sk->sk_err is changed only under lock,
4110 - * hence we may omit checks after joining wait queue.
4111 - * We check receive queue before schedule() only as optimization;
4112 - * it is very likely that release_sock() added new data.
4113 - */
4114 -int sk_wait_data(struct sock *sk, long *timeo)
4115 -{
4116 -       int rc;
4117 -       DEFINE_WAIT(wait);
4118 -
4119 -       prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
4120 -       set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
4121 -       rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
4122 -       clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
4123 -       finish_wait(sk->sk_sleep, &wait);
4124 -       return rc;
4125 -}
4126 -
4127 -EXPORT_SYMBOL(sk_wait_data);
4128 -
4129 -/**
4130 - *     __sk_mem_schedule - increase sk_forward_alloc and memory_allocated
4131 - *     @sk: socket
4132 - *     @size: memory size to allocate
4133 - *     @kind: allocation type
4134 - *
4135 - *     If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means
4136 - *     rmem allocation. This function assumes that protocols which have
4137 - *     memory_pressure use sk_wmem_queued as write buffer accounting.
4138 - */
4139 -int __sk_mem_schedule(struct sock *sk, int size, int kind)
4140 -{
4141 -       struct proto *prot = sk->sk_prot;
4142 -       int amt = sk_mem_pages(size);
4143 -       int allocated;
4144 -
4145 -       sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
4146 -       allocated = atomic_add_return(amt, prot->memory_allocated);
4147 -
4148 -       /* Under limit. */
4149 -       if (allocated <= prot->sysctl_mem[0]) {
4150 -               if (prot->memory_pressure && *prot->memory_pressure)
4151 -                       *prot->memory_pressure = 0;
4152 -               return 1;
4153 -       }
4154 -
4155 -       /* Under pressure. */
4156 -       if (allocated > prot->sysctl_mem[1])
4157 -               if (prot->enter_memory_pressure)
4158 -                       prot->enter_memory_pressure(sk);
4159 -
4160 -       /* Over hard limit. */
4161 -       if (allocated > prot->sysctl_mem[2])
4162 -               goto suppress_allocation;
4163 -
4164 -       /* guarantee minimum buffer size under pressure */
4165 -       if (kind == SK_MEM_RECV) {
4166 -               if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0])
4167 -                       return 1;
4168 -       } else { /* SK_MEM_SEND */
4169 -               if (sk->sk_type == SOCK_STREAM) {
4170 -                       if (sk->sk_wmem_queued < prot->sysctl_wmem[0])
4171 -                               return 1;
4172 -               } else if (atomic_read(&sk->sk_wmem_alloc) <
4173 -                          prot->sysctl_wmem[0])
4174 -                               return 1;
4175 -       }
4176 -
4177 -       if (prot->memory_pressure) {
4178 -               if (!*prot->memory_pressure ||
4179 -                   prot->sysctl_mem[2] > atomic_read(prot->sockets_allocated) *
4180 -                   sk_mem_pages(sk->sk_wmem_queued +
4181 -                                atomic_read(&sk->sk_rmem_alloc) +
4182 -                                sk->sk_forward_alloc))
4183 -                       return 1;
4184 -       }
4185 -
4186 -suppress_allocation:
4187 -
4188 -       if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) {
4189 -               sk_stream_moderate_sndbuf(sk);
4190 -
4191 -               /* Fail only if socket is _under_ its sndbuf.
4192 -                * In this case we cannot block, so that we have to fail.
4193 -                */
4194 -               if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
4195 -                       return 1;
4196 -       }
4197 -
4198 -       /* Alas. Undo changes. */
4199 -       sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
4200 -       atomic_sub(amt, prot->memory_allocated);
4201 -       return 0;
4202 -}
4203 -
4204 -EXPORT_SYMBOL(__sk_mem_schedule);
4205 -
4206 -/**
4207 - *     __sk_reclaim - reclaim memory_allocated
4208 - *     @sk: socket
4209 - */
4210 -void __sk_mem_reclaim(struct sock *sk)
4211 -{
4212 -       struct proto *prot = sk->sk_prot;
4213 -
4214 -       atomic_sub(sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT,
4215 -                  prot->memory_allocated);
4216 -       sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1;
4217 -
4218 -       if (prot->memory_pressure && *prot->memory_pressure &&
4219 -           (atomic_read(prot->memory_allocated) < prot->sysctl_mem[0]))
4220 -               *prot->memory_pressure = 0;
4221 -}
4222 -
4223 -EXPORT_SYMBOL(__sk_mem_reclaim);
4224 -
4225 -
4226 -/*
4227 - * Set of default routines for initialising struct proto_ops when
4228 - * the protocol does not support a particular function. In certain
4229 - * cases where it makes no sense for a protocol to have a "do nothing"
4230 - * function, some default processing is provided.
4231 - */
4232 -
4233 -int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
4234 -{
4235 -       return -EOPNOTSUPP;
4236 -}
4237 -
4238 -int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
4239 -                   int len, int flags)
4240 -{
4241 -       return -EOPNOTSUPP;
4242 -}
4243 -
4244 -int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
4245 -{
4246 -       return -EOPNOTSUPP;
4247 -}
4248 -
4249 -int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
4250 -{
4251 -       return -EOPNOTSUPP;
4252 -}
4253 -
4254 -int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
4255 -                   int *len, int peer)
4256 -{
4257 -       return -EOPNOTSUPP;
4258 -}
4259 -
4260 -unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt)
4261 -{
4262 -       return 0;
4263 -}
4264 -
4265 -int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
4266 -{
4267 -       return -EOPNOTSUPP;
4268 -}
4269 -
4270 -int sock_no_listen(struct socket *sock, int backlog)
4271 -{
4272 -       return -EOPNOTSUPP;
4273 -}
4274 -
4275 -int sock_no_shutdown(struct socket *sock, int how)
4276 -{
4277 -       return -EOPNOTSUPP;
4278 -}
4279 -
4280 -int sock_no_setsockopt(struct socket *sock, int level, int optname,
4281 -                   char __user *optval, int optlen)
4282 -{
4283 -       return -EOPNOTSUPP;
4284 -}
4285 -
4286 -int sock_no_getsockopt(struct socket *sock, int level, int optname,
4287 -                   char __user *optval, int __user *optlen)
4288 -{
4289 -       return -EOPNOTSUPP;
4290 -}
4291 -
4292 -int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
4293 -                   size_t len)
4294 -{
4295 -       return -EOPNOTSUPP;
4296 -}
4297 -
4298 -int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
4299 -                   size_t len, int flags)
4300 -{
4301 -       return -EOPNOTSUPP;
4302 -}
4303 -
4304 -int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
4305 -{
4306 -       /* Mirror missing mmap method error code */
4307 -       return -ENODEV;
4308 -}
4309 -
4310 -ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
4311 -{
4312 -       ssize_t res;
4313 -       struct msghdr msg = {.msg_flags = flags};
4314 -       struct kvec iov;
4315 -       char *kaddr = kmap(page);
4316 -       iov.iov_base = kaddr + offset;
4317 -       iov.iov_len = size;
4318 -       res = kernel_sendmsg(sock, &msg, &iov, 1, size);
4319 -       kunmap(page);
4320 -       return res;
4321 -}
4322 -
4323 -/*
4324 - *     Default Socket Callbacks
4325 - */
4326 -
4327 -static void sock_def_wakeup(struct sock *sk)
4328 -{
4329 -       read_lock(&sk->sk_callback_lock);
4330 -       if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
4331 -               wake_up_interruptible_all(sk->sk_sleep);
4332 -       read_unlock(&sk->sk_callback_lock);
4333 -}
4334 -
4335 -static void sock_def_error_report(struct sock *sk)
4336 -{
4337 -       read_lock(&sk->sk_callback_lock);
4338 -       if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
4339 -               wake_up_interruptible(sk->sk_sleep);
4340 -       sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
4341 -       read_unlock(&sk->sk_callback_lock);
4342 -}
4343 -
4344 -static void sock_def_readable(struct sock *sk, int len)
4345 -{
4346 -       read_lock(&sk->sk_callback_lock);
4347 -       if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
4348 -               wake_up_interruptible_sync(sk->sk_sleep);
4349 -       sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
4350 -       read_unlock(&sk->sk_callback_lock);
4351 -}
4352 -
4353 -static void sock_def_write_space(struct sock *sk)
4354 -{
4355 -       read_lock(&sk->sk_callback_lock);
4356 -
4357 -       /* Do not wake up a writer until he can make "significant"
4358 -        * progress.  --DaveM
4359 -        */
4360 -       if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
4361 -               if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
4362 -                       wake_up_interruptible_sync(sk->sk_sleep);
4363 -
4364 -               /* Should agree with poll, otherwise some programs break */
4365 -               if (sock_writeable(sk))
4366 -                       sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
4367 -       }
4368 -
4369 -       read_unlock(&sk->sk_callback_lock);
4370 -}
4371 -
4372 -static void sock_def_destruct(struct sock *sk)
4373 -{
4374 -       kfree(sk->sk_protinfo);
4375 -}
4376 -
4377 -void sk_send_sigurg(struct sock *sk)
4378 -{
4379 -       if (sk->sk_socket && sk->sk_socket->file)
4380 -               if (send_sigurg(&sk->sk_socket->file->f_owner))
4381 -                       sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI);
4382 -}
4383 -
4384 -void sk_reset_timer(struct sock *sk, struct timer_list* timer,
4385 -                   unsigned long expires)
4386 -{
4387 -       if (!mod_timer(timer, expires))
4388 -               sock_hold(sk);
4389 -}
4390 -
4391 -EXPORT_SYMBOL(sk_reset_timer);
4392 -
4393 -void sk_stop_timer(struct sock *sk, struct timer_list* timer)
4394 -{
4395 -       if (timer_pending(timer) && del_timer(timer))
4396 -               __sock_put(sk);
4397 -}
4398 -
4399 -EXPORT_SYMBOL(sk_stop_timer);
4400 -
4401 -void sock_init_data(struct socket *sock, struct sock *sk)
4402 -{
4403 -       skb_queue_head_init(&sk->sk_receive_queue);
4404 -       skb_queue_head_init(&sk->sk_write_queue);
4405 -       skb_queue_head_init(&sk->sk_error_queue);
4406 -#ifdef CONFIG_NET_DMA
4407 -       skb_queue_head_init(&sk->sk_async_wait_queue);
4408 -#endif
4409 -
4410 -       sk->sk_send_head        =       NULL;
4411 -
4412 -       init_timer(&sk->sk_timer);
4413 -
4414 -       sk->sk_allocation       =       GFP_KERNEL;
4415 -       sk->sk_rcvbuf           =       sysctl_rmem_default;
4416 -       sk->sk_sndbuf           =       sysctl_wmem_default;
4417 -       sk->sk_state            =       TCP_CLOSE;
4418 -       sk_set_socket(sk, sock);
4419 -
4420 -       sock_set_flag(sk, SOCK_ZAPPED);
4421 -
4422 -       if (sock) {
4423 -               sk->sk_type     =       sock->type;
4424 -               sk->sk_sleep    =       &sock->wait;
4425 -               sock->sk        =       sk;
4426 -       } else
4427 -               sk->sk_sleep    =       NULL;
4428 -
4429 -       rwlock_init(&sk->sk_dst_lock);
4430 -       rwlock_init(&sk->sk_callback_lock);
4431 -       lockdep_set_class_and_name(&sk->sk_callback_lock,
4432 -                       af_callback_keys + sk->sk_family,
4433 -                       af_family_clock_key_strings[sk->sk_family]);
4434 -
4435 -       sk->sk_state_change     =       sock_def_wakeup;
4436 -       sk->sk_data_ready       =       sock_def_readable;
4437 -       sk->sk_write_space      =       sock_def_write_space;
4438 -       sk->sk_error_report     =       sock_def_error_report;
4439 -       sk->sk_destruct         =       sock_def_destruct;
4440 -
4441 -       sk->sk_sndmsg_page      =       NULL;
4442 -       sk->sk_sndmsg_off       =       0;
4443 -
4444 -       sk->sk_peercred.pid     =       0;
4445 -       sk->sk_peercred.uid     =       -1;
4446 -       sk->sk_peercred.gid     =       -1;
4447 -       sk->sk_write_pending    =       0;
4448 -       sk->sk_rcvlowat         =       1;
4449 -       sk->sk_rcvtimeo         =       MAX_SCHEDULE_TIMEOUT;
4450 -       sk->sk_sndtimeo         =       MAX_SCHEDULE_TIMEOUT;
4451 -
4452 -       sk->sk_stamp = ktime_set(-1L, 0);
4453 -
4454 -       set_vx_info(&sk->sk_vx_info, current->vx_info);
4455 -       sk->sk_xid = vx_current_xid();
4456 -       vx_sock_inc(sk);
4457 -       set_nx_info(&sk->sk_nx_info, current->nx_info);
4458 -       sk->sk_nid = nx_current_nid();
4459 -       atomic_set(&sk->sk_refcnt, 1);
4460 -       atomic_set(&sk->sk_drops, 0);
4461 -}
4462 -
4463 -void lock_sock_nested(struct sock *sk, int subclass)
4464 -{
4465 -       might_sleep();
4466 -       spin_lock_bh(&sk->sk_lock.slock);
4467 -       if (sk->sk_lock.owned)
4468 -               __lock_sock(sk);
4469 -       sk->sk_lock.owned = 1;
4470 -       spin_unlock(&sk->sk_lock.slock);
4471 -       /*
4472 -        * The sk_lock has mutex_lock() semantics here:
4473 -        */
4474 -       mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
4475 -       local_bh_enable();
4476 -}
4477 -
4478 -EXPORT_SYMBOL(lock_sock_nested);
4479 -
4480 -void release_sock(struct sock *sk)
4481 -{
4482 -       /*
4483 -        * The sk_lock has mutex_unlock() semantics:
4484 -        */
4485 -       mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
4486 -
4487 -       spin_lock_bh(&sk->sk_lock.slock);
4488 -       if (sk->sk_backlog.tail)
4489 -               __release_sock(sk);
4490 -       sk->sk_lock.owned = 0;
4491 -       if (waitqueue_active(&sk->sk_lock.wq))
4492 -               wake_up(&sk->sk_lock.wq);
4493 -       spin_unlock_bh(&sk->sk_lock.slock);
4494 -}
4495 -EXPORT_SYMBOL(release_sock);
4496 -
4497 -int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
4498 -{
4499 -       struct timeval tv;
4500 -       if (!sock_flag(sk, SOCK_TIMESTAMP))
4501 -               sock_enable_timestamp(sk);
4502 -       tv = ktime_to_timeval(sk->sk_stamp);
4503 -       if (tv.tv_sec == -1)
4504 -               return -ENOENT;
4505 -       if (tv.tv_sec == 0) {
4506 -               sk->sk_stamp = ktime_get_real();
4507 -               tv = ktime_to_timeval(sk->sk_stamp);
4508 -       }
4509 -       return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
4510 -}
4511 -EXPORT_SYMBOL(sock_get_timestamp);
4512 -
4513 -int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
4514 -{
4515 -       struct timespec ts;
4516 -       if (!sock_flag(sk, SOCK_TIMESTAMP))
4517 -               sock_enable_timestamp(sk);
4518 -       ts = ktime_to_timespec(sk->sk_stamp);
4519 -       if (ts.tv_sec == -1)
4520 -               return -ENOENT;
4521 -       if (ts.tv_sec == 0) {
4522 -               sk->sk_stamp = ktime_get_real();
4523 -               ts = ktime_to_timespec(sk->sk_stamp);
4524 -       }
4525 -       return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
4526 -}
4527 -EXPORT_SYMBOL(sock_get_timestampns);
4528 -
4529 -void sock_enable_timestamp(struct sock *sk)
4530 -{
4531 -       if (!sock_flag(sk, SOCK_TIMESTAMP)) {
4532 -               sock_set_flag(sk, SOCK_TIMESTAMP);
4533 -               net_enable_timestamp();
4534 -       }
4535 -}
4536 -
4537 -/*
4538 - *     Get a socket option on an socket.
4539 - *
4540 - *     FIX: POSIX 1003.1g is very ambiguous here. It states that
4541 - *     asynchronous errors should be reported by getsockopt. We assume
4542 - *     this means if you specify SO_ERROR (otherwise whats the point of it).
4543 - */
4544 -int sock_common_getsockopt(struct socket *sock, int level, int optname,
4545 -                          char __user *optval, int __user *optlen)
4546 -{
4547 -       struct sock *sk = sock->sk;
4548 -
4549 -       return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
4550 -}
4551 -
4552 -EXPORT_SYMBOL(sock_common_getsockopt);
4553 -
4554 -#ifdef CONFIG_COMPAT
4555 -int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
4556 -                                 char __user *optval, int __user *optlen)
4557 -{
4558 -       struct sock *sk = sock->sk;
4559 -
4560 -       if (sk->sk_prot->compat_getsockopt != NULL)
4561 -               return sk->sk_prot->compat_getsockopt(sk, level, optname,
4562 -                                                     optval, optlen);
4563 -       return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
4564 -}
4565 -EXPORT_SYMBOL(compat_sock_common_getsockopt);
4566 -#endif
4567 -
4568 -int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
4569 -                       struct msghdr *msg, size_t size, int flags)
4570 -{
4571 -       struct sock *sk = sock->sk;
4572 -       int addr_len = 0;
4573 -       int err;
4574 -
4575 -       err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
4576 -                                  flags & ~MSG_DONTWAIT, &addr_len);
4577 -       if (err >= 0)
4578 -               msg->msg_namelen = addr_len;
4579 -       return err;
4580 -}
4581 -
4582 -EXPORT_SYMBOL(sock_common_recvmsg);
4583 -
4584 -/*
4585 - *     Set socket options on an inet socket.
4586 - */
4587 -int sock_common_setsockopt(struct socket *sock, int level, int optname,
4588 -                          char __user *optval, int optlen)
4589 -{
4590 -       struct sock *sk = sock->sk;
4591 -
4592 -       return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
4593 -}
4594 -
4595 -EXPORT_SYMBOL(sock_common_setsockopt);
4596 -
4597 -#ifdef CONFIG_COMPAT
4598 -int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
4599 -                                 char __user *optval, int optlen)
4600 -{
4601 -       struct sock *sk = sock->sk;
4602 -
4603 -       if (sk->sk_prot->compat_setsockopt != NULL)
4604 -               return sk->sk_prot->compat_setsockopt(sk, level, optname,
4605 -                                                     optval, optlen);
4606 -       return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
4607 -}
4608 -EXPORT_SYMBOL(compat_sock_common_setsockopt);
4609 -#endif
4610 -
4611 -void sk_common_release(struct sock *sk)
4612 -{
4613 -       if (sk->sk_prot->destroy)
4614 -               sk->sk_prot->destroy(sk);
4615 -
4616 -       /*
4617 -        * Observation: when sock_common_release is called, processes have
4618 -        * no access to socket. But net still has.
4619 -        * Step one, detach it from networking:
4620 -        *
4621 -        * A. Remove from hash tables.
4622 -        */
4623 -
4624 -       sk->sk_prot->unhash(sk);
4625 -
4626 -       /*
4627 -        * In this point socket cannot receive new packets, but it is possible
4628 -        * that some packets are in flight because some CPU runs receiver and
4629 -        * did hash table lookup before we unhashed socket. They will achieve
4630 -        * receive queue and will be purged by socket destructor.
4631 -        *
4632 -        * Also we still have packets pending on receive queue and probably,
4633 -        * our own packets waiting in device queues. sock_destroy will drain
4634 -        * receive queue, but transmitted packets will delay socket destruction
4635 -        * until the last reference will be released.
4636 -        */
4637 -
4638 -       sock_orphan(sk);
4639 -
4640 -       xfrm_sk_free_policy(sk);
4641 -
4642 -       sk_refcnt_debug_release(sk);
4643 -       sock_put(sk);
4644 -}
4645 -
4646 -EXPORT_SYMBOL(sk_common_release);
4647 -
4648 -static DEFINE_RWLOCK(proto_list_lock);
4649 -static LIST_HEAD(proto_list);
4650 -
4651 -#ifdef CONFIG_PROC_FS
4652 -#define PROTO_INUSE_NR 64      /* should be enough for the first time */
4653 -struct prot_inuse {
4654 -       int val[PROTO_INUSE_NR];
4655 -};
4656 -
4657 -static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
4658 -
4659 -#ifdef CONFIG_NET_NS
4660 -void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
4661 -{
4662 -       int cpu = smp_processor_id();
4663 -       per_cpu_ptr(net->core.inuse, cpu)->val[prot->inuse_idx] += val;
4664 -}
4665 -EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
4666 -
4667 -int sock_prot_inuse_get(struct net *net, struct proto *prot)
4668 -{
4669 -       int cpu, idx = prot->inuse_idx;
4670 -       int res = 0;
4671 -
4672 -       for_each_possible_cpu(cpu)
4673 -               res += per_cpu_ptr(net->core.inuse, cpu)->val[idx];
4674 -
4675 -       return res >= 0 ? res : 0;
4676 -}
4677 -EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
4678 -
4679 -static int sock_inuse_init_net(struct net *net)
4680 -{
4681 -       net->core.inuse = alloc_percpu(struct prot_inuse);
4682 -       return net->core.inuse ? 0 : -ENOMEM;
4683 -}
4684 -
4685 -static void sock_inuse_exit_net(struct net *net)
4686 -{
4687 -       free_percpu(net->core.inuse);
4688 -}
4689 -
4690 -static struct pernet_operations net_inuse_ops = {
4691 -       .init = sock_inuse_init_net,
4692 -       .exit = sock_inuse_exit_net,
4693 -};
4694 -
4695 -static __init int net_inuse_init(void)
4696 -{
4697 -       if (register_pernet_subsys(&net_inuse_ops))
4698 -               panic("Cannot initialize net inuse counters");
4699 -
4700 -       return 0;
4701 -}
4702 -
4703 -core_initcall(net_inuse_init);
4704 -#else
4705 -static DEFINE_PER_CPU(struct prot_inuse, prot_inuse);
4706 -
4707 -void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
4708 -{
4709 -       __get_cpu_var(prot_inuse).val[prot->inuse_idx] += val;
4710 -}
4711 -EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
4712 -
4713 -int sock_prot_inuse_get(struct net *net, struct proto *prot)
4714 -{
4715 -       int cpu, idx = prot->inuse_idx;
4716 -       int res = 0;
4717 -
4718 -       for_each_possible_cpu(cpu)
4719 -               res += per_cpu(prot_inuse, cpu).val[idx];
4720 -
4721 -       return res >= 0 ? res : 0;
4722 -}
4723 -EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
4724 -#endif
4725 -
4726 -static void assign_proto_idx(struct proto *prot)
4727 -{
4728 -       prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR);
4729 -
4730 -       if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) {
4731 -               printk(KERN_ERR "PROTO_INUSE_NR exhausted\n");
4732 -               return;
4733 -       }
4734 -
4735 -       set_bit(prot->inuse_idx, proto_inuse_idx);
4736 -}
4737 -
4738 -static void release_proto_idx(struct proto *prot)
4739 -{
4740 -       if (prot->inuse_idx != PROTO_INUSE_NR - 1)
4741 -               clear_bit(prot->inuse_idx, proto_inuse_idx);
4742 -}
4743 -#else
4744 -static inline void assign_proto_idx(struct proto *prot)
4745 -{
4746 -}
4747 -
4748 -static inline void release_proto_idx(struct proto *prot)
4749 -{
4750 -}
4751 -#endif
4752 -
4753 -int proto_register(struct proto *prot, int alloc_slab)
4754 -{
4755 -       char *request_sock_slab_name = NULL;
4756 -       char *timewait_sock_slab_name;
4757 -
4758 -       if (alloc_slab) {
4759 -               prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
4760 -                                              SLAB_HWCACHE_ALIGN, NULL);
4761 -
4762 -               if (prot->slab == NULL) {
4763 -                       printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
4764 -                              prot->name);
4765 -                       goto out;
4766 -               }
4767 -
4768 -               if (prot->rsk_prot != NULL) {
4769 -                       static const char mask[] = "request_sock_%s";
4770 -
4771 -                       request_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
4772 -                       if (request_sock_slab_name == NULL)
4773 -                               goto out_free_sock_slab;
4774 -
4775 -                       sprintf(request_sock_slab_name, mask, prot->name);
4776 -                       prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name,
4777 -                                                                prot->rsk_prot->obj_size, 0,
4778 -                                                                SLAB_HWCACHE_ALIGN, NULL);
4779 -
4780 -                       if (prot->rsk_prot->slab == NULL) {
4781 -                               printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n",
4782 -                                      prot->name);
4783 -                               goto out_free_request_sock_slab_name;
4784 -                       }
4785 -               }
4786 -
4787 -               if (prot->twsk_prot != NULL) {
4788 -                       static const char mask[] = "tw_sock_%s";
4789 -
4790 -                       timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
4791 -
4792 -                       if (timewait_sock_slab_name == NULL)
4793 -                               goto out_free_request_sock_slab;
4794 -
4795 -                       sprintf(timewait_sock_slab_name, mask, prot->name);
4796 -                       prot->twsk_prot->twsk_slab =
4797 -                               kmem_cache_create(timewait_sock_slab_name,
4798 -                                                 prot->twsk_prot->twsk_obj_size,
4799 -                                                 0, SLAB_HWCACHE_ALIGN,
4800 -                                                 NULL);
4801 -                       if (prot->twsk_prot->twsk_slab == NULL)
4802 -                               goto out_free_timewait_sock_slab_name;
4803 -               }
4804 -       }
4805 -
4806 -       write_lock(&proto_list_lock);
4807 -       list_add(&prot->node, &proto_list);
4808 -       assign_proto_idx(prot);
4809 -       write_unlock(&proto_list_lock);
4810 -       return 0;
4811 -
4812 -out_free_timewait_sock_slab_name:
4813 -       kfree(timewait_sock_slab_name);
4814 -out_free_request_sock_slab:
4815 -       if (prot->rsk_prot && prot->rsk_prot->slab) {
4816 -               kmem_cache_destroy(prot->rsk_prot->slab);
4817 -               prot->rsk_prot->slab = NULL;
4818 -       }
4819 -out_free_request_sock_slab_name:
4820 -       kfree(request_sock_slab_name);
4821 -out_free_sock_slab:
4822 -       kmem_cache_destroy(prot->slab);
4823 -       prot->slab = NULL;
4824 -out:
4825 -       return -ENOBUFS;
4826 -}
4827 -
4828 -EXPORT_SYMBOL(proto_register);
4829 -
4830 -void proto_unregister(struct proto *prot)
4831 -{
4832 -       write_lock(&proto_list_lock);
4833 -       release_proto_idx(prot);
4834 -       list_del(&prot->node);
4835 -       write_unlock(&proto_list_lock);
4836 -
4837 -       if (prot->slab != NULL) {
4838 -               kmem_cache_destroy(prot->slab);
4839 -               prot->slab = NULL;
4840 -       }
4841 -
4842 -       if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
4843 -               const char *name = kmem_cache_name(prot->rsk_prot->slab);
4844 -
4845 -               kmem_cache_destroy(prot->rsk_prot->slab);
4846 -               kfree(name);
4847 -               prot->rsk_prot->slab = NULL;
4848 -       }
4849 -
4850 -       if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
4851 -               const char *name = kmem_cache_name(prot->twsk_prot->twsk_slab);
4852 -
4853 -               kmem_cache_destroy(prot->twsk_prot->twsk_slab);
4854 -               kfree(name);
4855 -               prot->twsk_prot->twsk_slab = NULL;
4856 -       }
4857 -}
4858 -
4859 -EXPORT_SYMBOL(proto_unregister);
4860 -
4861 -#ifdef CONFIG_PROC_FS
4862 -static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
4863 -       __acquires(proto_list_lock)
4864 -{
4865 -       read_lock(&proto_list_lock);
4866 -       return seq_list_start_head(&proto_list, *pos);
4867 -}
4868 -
4869 -static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
4870 -{
4871 -       return seq_list_next(v, &proto_list, pos);
4872 -}
4873 -
4874 -static void proto_seq_stop(struct seq_file *seq, void *v)
4875 -       __releases(proto_list_lock)
4876 -{
4877 -       read_unlock(&proto_list_lock);
4878 -}
4879 -
4880 -static char proto_method_implemented(const void *method)
4881 -{
4882 -       return method == NULL ? 'n' : 'y';
4883 -}
4884 -
4885 -static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
4886 -{
4887 -       seq_printf(seq, "%-9s %4u %6d  %6d   %-3s %6u   %-3s  %-10s "
4888 -                       "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
4889 -                  proto->name,
4890 -                  proto->obj_size,
4891 -                  proto->sockets_allocated != NULL ? atomic_read(proto->sockets_allocated) : -1,
4892 -                  proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
4893 -                  proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
4894 -                  proto->max_header,
4895 -                  proto->slab == NULL ? "no" : "yes",
4896 -                  module_name(proto->owner),
4897 -                  proto_method_implemented(proto->close),
4898 -                  proto_method_implemented(proto->connect),
4899 -                  proto_method_implemented(proto->disconnect),
4900 -                  proto_method_implemented(proto->accept),
4901 -                  proto_method_implemented(proto->ioctl),
4902 -                  proto_method_implemented(proto->init),
4903 -                  proto_method_implemented(proto->destroy),
4904 -                  proto_method_implemented(proto->shutdown),
4905 -                  proto_method_implemented(proto->setsockopt),
4906 -                  proto_method_implemented(proto->getsockopt),
4907 -                  proto_method_implemented(proto->sendmsg),
4908 -                  proto_method_implemented(proto->recvmsg),
4909 -                  proto_method_implemented(proto->sendpage),
4910 -                  proto_method_implemented(proto->bind),
4911 -                  proto_method_implemented(proto->backlog_rcv),
4912 -                  proto_method_implemented(proto->hash),
4913 -                  proto_method_implemented(proto->unhash),
4914 -                  proto_method_implemented(proto->get_port),
4915 -                  proto_method_implemented(proto->enter_memory_pressure));
4916 -}
4917 -
4918 -static int proto_seq_show(struct seq_file *seq, void *v)
4919 -{
4920 -       if (v == &proto_list)
4921 -               seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
4922 -                          "protocol",
4923 -                          "size",
4924 -                          "sockets",
4925 -                          "memory",
4926 -                          "press",
4927 -                          "maxhdr",
4928 -                          "slab",
4929 -                          "module",
4930 -                          "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
4931 -       else
4932 -               proto_seq_printf(seq, list_entry(v, struct proto, node));
4933 -       return 0;
4934 -}
4935 -
4936 -static const struct seq_operations proto_seq_ops = {
4937 -       .start  = proto_seq_start,
4938 -       .next   = proto_seq_next,
4939 -       .stop   = proto_seq_stop,
4940 -       .show   = proto_seq_show,
4941 -};
4942 -
4943 -static int proto_seq_open(struct inode *inode, struct file *file)
4944 -{
4945 -       return seq_open(file, &proto_seq_ops);
4946 -}
4947 -
4948 -static const struct file_operations proto_seq_fops = {
4949 -       .owner          = THIS_MODULE,
4950 -       .open           = proto_seq_open,
4951 -       .read           = seq_read,
4952 -       .llseek         = seq_lseek,
4953 -       .release        = seq_release,
4954 -};
4955 -
4956 -static int __init proto_init(void)
4957 -{
4958 -       /* register /proc/net/protocols */
4959 -       return proc_net_fops_create(&init_net, "protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0;
4960 -}
4961 -
4962 -subsys_initcall(proto_init);
4963 -
4964 -#endif /* PROC_FS */
4965 -
4966 -EXPORT_SYMBOL(sk_alloc);
4967 -EXPORT_SYMBOL(sk_free);
4968 -EXPORT_SYMBOL(sk_send_sigurg);
4969 -EXPORT_SYMBOL(sock_alloc_send_skb);
4970 -EXPORT_SYMBOL(sock_init_data);
4971 -EXPORT_SYMBOL(sock_kfree_s);
4972 -EXPORT_SYMBOL(sock_kmalloc);
4973 -EXPORT_SYMBOL(sock_no_accept);
4974 -EXPORT_SYMBOL(sock_no_bind);
4975 -EXPORT_SYMBOL(sock_no_connect);
4976 -EXPORT_SYMBOL(sock_no_getname);
4977 -EXPORT_SYMBOL(sock_no_getsockopt);
4978 -EXPORT_SYMBOL(sock_no_ioctl);
4979 -EXPORT_SYMBOL(sock_no_listen);
4980 -EXPORT_SYMBOL(sock_no_mmap);
4981 -EXPORT_SYMBOL(sock_no_poll);
4982 -EXPORT_SYMBOL(sock_no_recvmsg);
4983 -EXPORT_SYMBOL(sock_no_sendmsg);
4984 -EXPORT_SYMBOL(sock_no_sendpage);
4985 -EXPORT_SYMBOL(sock_no_setsockopt);
4986 -EXPORT_SYMBOL(sock_no_shutdown);
4987 -EXPORT_SYMBOL(sock_no_socketpair);
4988 -EXPORT_SYMBOL(sock_rfree);
4989 -EXPORT_SYMBOL(sock_setsockopt);
4990 -EXPORT_SYMBOL(sock_wfree);
4991 -EXPORT_SYMBOL(sock_wmalloc);
4992 -EXPORT_SYMBOL(sock_i_uid);
4993 -EXPORT_SYMBOL(sock_i_ino);
4994 -EXPORT_SYMBOL(sysctl_optmem_max);
4995 diff -Nurb linux-2.6.27-524/net/ipv4/udp.c.orig linux-2.6.27-525/net/ipv4/udp.c.orig
4996 --- linux-2.6.27-524/net/ipv4/udp.c.orig        2009-12-04 16:03:48.000000000 -0500
4997 +++ linux-2.6.27-525/net/ipv4/udp.c.orig        1969-12-31 19:00:00.000000000 -0500
4998 @@ -1,1766 +0,0 @@
4999 -/*
5000 - * INET                An implementation of the TCP/IP protocol suite for the LINUX
5001 - *             operating system.  INET is implemented using the  BSD Socket
5002 - *             interface as the means of communication with the user level.
5003 - *
5004 - *             The User Datagram Protocol (UDP).
5005 - *
5006 - * Authors:    Ross Biro
5007 - *             Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
5008 - *             Arnt Gulbrandsen, <agulbra@nvg.unit.no>
5009 - *             Alan Cox, <Alan.Cox@linux.org>
5010 - *             Hirokazu Takahashi, <taka@valinux.co.jp>
5011 - *
5012 - * Fixes:
5013 - *             Alan Cox        :       verify_area() calls
5014 - *             Alan Cox        :       stopped close while in use off icmp
5015 - *                                     messages. Not a fix but a botch that
5016 - *                                     for udp at least is 'valid'.
5017 - *             Alan Cox        :       Fixed icmp handling properly
5018 - *             Alan Cox        :       Correct error for oversized datagrams
5019 - *             Alan Cox        :       Tidied select() semantics.
5020 - *             Alan Cox        :       udp_err() fixed properly, also now
5021 - *                                     select and read wake correctly on errors
5022 - *             Alan Cox        :       udp_send verify_area moved to avoid mem leak
5023 - *             Alan Cox        :       UDP can count its memory
5024 - *             Alan Cox        :       send to an unknown connection causes
5025 - *                                     an ECONNREFUSED off the icmp, but
5026 - *                                     does NOT close.
5027 - *             Alan Cox        :       Switched to new sk_buff handlers. No more backlog!
5028 - *             Alan Cox        :       Using generic datagram code. Even smaller and the PEEK
5029 - *                                     bug no longer crashes it.
5030 - *             Fred Van Kempen :       Net2e support for sk->broadcast.
5031 - *             Alan Cox        :       Uses skb_free_datagram
5032 - *             Alan Cox        :       Added get/set sockopt support.
5033 - *             Alan Cox        :       Broadcasting without option set returns EACCES.
5034 - *             Alan Cox        :       No wakeup calls. Instead we now use the callbacks.
5035 - *             Alan Cox        :       Use ip_tos and ip_ttl
5036 - *             Alan Cox        :       SNMP Mibs
5037 - *             Alan Cox        :       MSG_DONTROUTE, and 0.0.0.0 support.
5038 - *             Matt Dillon     :       UDP length checks.
5039 - *             Alan Cox        :       Smarter af_inet used properly.
5040 - *             Alan Cox        :       Use new kernel side addressing.
5041 - *             Alan Cox        :       Incorrect return on truncated datagram receive.
5042 - *     Arnt Gulbrandsen        :       New udp_send and stuff
5043 - *             Alan Cox        :       Cache last socket
5044 - *             Alan Cox        :       Route cache
5045 - *             Jon Peatfield   :       Minor efficiency fix to sendto().
5046 - *             Mike Shaver     :       RFC1122 checks.
5047 - *             Alan Cox        :       Nonblocking error fix.
5048 - *     Willy Konynenberg       :       Transparent proxying support.
5049 - *             Mike McLagan    :       Routing by source
5050 - *             David S. Miller :       New socket lookup architecture.
5051 - *                                     Last socket cache retained as it
5052 - *                                     does have a high hit rate.
5053 - *             Olaf Kirch      :       Don't linearise iovec on sendmsg.
5054 - *             Andi Kleen      :       Some cleanups, cache destination entry
5055 - *                                     for connect.
5056 - *     Vitaly E. Lavrov        :       Transparent proxy revived after year coma.
5057 - *             Melvin Smith    :       Check msg_name not msg_namelen in sendto(),
5058 - *                                     return ENOTCONN for unconnected sockets (POSIX)
5059 - *             Janos Farkas    :       don't deliver multi/broadcasts to a different
5060 - *                                     bound-to-device socket
5061 - *     Hirokazu Takahashi      :       HW checksumming for outgoing UDP
5062 - *                                     datagrams.
5063 - *     Hirokazu Takahashi      :       sendfile() on UDP works now.
5064 - *             Arnaldo C. Melo :       convert /proc/net/udp to seq_file
5065 - *     YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
5066 - *     Alexey Kuznetsov:               allow both IPv4 and IPv6 sockets to bind
5067 - *                                     a single port at the same time.
5068 - *     Derek Atkins <derek@ihtfp.com>: Add Encapulation Support
5069 - *     James Chapman           :       Add L2TP encapsulation type.
5070 - *
5071 - *
5072 - *             This program is free software; you can redistribute it and/or
5073 - *             modify it under the terms of the GNU General Public License
5074 - *             as published by the Free Software Foundation; either version
5075 - *             2 of the License, or (at your option) any later version.
5076 - */
5077 -
5078 -#include <asm/system.h>
5079 -#include <asm/uaccess.h>
5080 -#include <asm/ioctls.h>
5081 -#include <linux/bootmem.h>
5082 -#include <linux/types.h>
5083 -#include <linux/fcntl.h>
5084 -#include <linux/module.h>
5085 -#include <linux/socket.h>
5086 -#include <linux/sockios.h>
5087 -#include <linux/igmp.h>
5088 -#include <linux/in.h>
5089 -#include <linux/errno.h>
5090 -#include <linux/timer.h>
5091 -#include <linux/mm.h>
5092 -#include <linux/inet.h>
5093 -#include <linux/netdevice.h>
5094 -#include <net/tcp_states.h>
5095 -#include <linux/skbuff.h>
5096 -#include <linux/proc_fs.h>
5097 -#include <linux/seq_file.h>
5098 -#include <net/net_namespace.h>
5099 -#include <net/icmp.h>
5100 -#include <net/route.h>
5101 -#include <net/checksum.h>
5102 -#include <net/xfrm.h>
5103 -#include "udp_impl.h"
5104 -
5105 -/*
5106 - *     Snmp MIB for the UDP layer
5107 - */
5108 -
5109 -DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly;
5110 -EXPORT_SYMBOL(udp_stats_in6);
5111 -
5112 -struct hlist_head udp_hash[UDP_HTABLE_SIZE];
5113 -DEFINE_RWLOCK(udp_hash_lock);
5114 -
5115 -int sysctl_udp_mem[3] __read_mostly;
5116 -int sysctl_udp_rmem_min __read_mostly;
5117 -int sysctl_udp_wmem_min __read_mostly;
5118 -
5119 -EXPORT_SYMBOL(sysctl_udp_mem);
5120 -EXPORT_SYMBOL(sysctl_udp_rmem_min);
5121 -EXPORT_SYMBOL(sysctl_udp_wmem_min);
5122 -
5123 -atomic_t udp_memory_allocated;
5124 -EXPORT_SYMBOL(udp_memory_allocated);
5125 -
5126 -static inline int __udp_lib_lport_inuse(struct net *net, __u16 num,
5127 -                                       const struct hlist_head udptable[])
5128 -{
5129 -       struct sock *sk;
5130 -       struct hlist_node *node;
5131 -
5132 -       sk_for_each(sk, node, &udptable[udp_hashfn(net, num)])
5133 -               if (net_eq(sock_net(sk), net) && sk->sk_hash == num)
5134 -                       return 1;
5135 -       return 0;
5136 -}
5137 -
5138 -/**
5139 - *  udp_lib_get_port  -  UDP/-Lite port lookup for IPv4 and IPv6
5140 - *
5141 - *  @sk:          socket struct in question
5142 - *  @snum:        port number to look up
5143 - *  @saddr_comp:  AF-dependent comparison of bound local IP addresses
5144 - */
5145 -int udp_lib_get_port(struct sock *sk, unsigned short snum,
5146 -                      int (*saddr_comp)(const struct sock *sk1,
5147 -                                        const struct sock *sk2 )    )
5148 -{
5149 -       struct hlist_head *udptable = sk->sk_prot->h.udp_hash;
5150 -       struct hlist_node *node;
5151 -       struct hlist_head *head;
5152 -       struct sock *sk2;
5153 -       int    error = 1;
5154 -       struct net *net = sock_net(sk);
5155 -
5156 -       write_lock_bh(&udp_hash_lock);
5157 -
5158 -       if (!snum) {
5159 -               int i, low, high, remaining;
5160 -               unsigned rover, best, best_size_so_far;
5161 -
5162 -               inet_get_local_port_range(&low, &high);
5163 -               remaining = (high - low) + 1;
5164 -
5165 -               best_size_so_far = UINT_MAX;
5166 -               best = rover = net_random() % remaining + low;
5167 -
5168 -               /* 1st pass: look for empty (or shortest) hash chain */
5169 -               for (i = 0; i < UDP_HTABLE_SIZE; i++) {
5170 -                       int size = 0;
5171 -
5172 -                       head = &udptable[udp_hashfn(net, rover)];
5173 -                       if (hlist_empty(head))
5174 -                               goto gotit;
5175 -
5176 -                       sk_for_each(sk2, node, head) {
5177 -                               if (++size >= best_size_so_far)
5178 -                                       goto next;
5179 -                       }
5180 -                       best_size_so_far = size;
5181 -                       best = rover;
5182 -               next:
5183 -                       /* fold back if end of range */
5184 -                       if (++rover > high)
5185 -                               rover = low + ((rover - low)
5186 -                                              & (UDP_HTABLE_SIZE - 1));
5187 -
5188 -
5189 -               }
5190 -
5191 -               /* 2nd pass: find hole in shortest hash chain */
5192 -               rover = best;
5193 -               for (i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++) {
5194 -                       if (! __udp_lib_lport_inuse(net, rover, udptable))
5195 -                               goto gotit;
5196 -                       rover += UDP_HTABLE_SIZE;
5197 -                       if (rover > high)
5198 -                               rover = low + ((rover - low)
5199 -                                              & (UDP_HTABLE_SIZE - 1));
5200 -               }
5201 -
5202 -
5203 -               /* All ports in use! */
5204 -               goto fail;
5205 -
5206 -gotit:
5207 -               snum = rover;
5208 -       } else {
5209 -               head = &udptable[udp_hashfn(net, snum)];
5210 -
5211 -               sk_for_each(sk2, node, head)
5212 -                       if (sk2->sk_hash == snum                             &&
5213 -                           sk2 != sk                                        &&
5214 -                           net_eq(sock_net(sk2), net)                       &&
5215 -                           (!sk2->sk_reuse        || !sk->sk_reuse)         &&
5216 -                           (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if
5217 -                            || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
5218 -                           (*saddr_comp)(sk, sk2)                             )
5219 -                               goto fail;
5220 -       }
5221 -
5222 -       inet_sk(sk)->num = snum;
5223 -       sk->sk_hash = snum;
5224 -       if (sk_unhashed(sk)) {
5225 -               head = &udptable[udp_hashfn(net, snum)];
5226 -               sk_add_node(sk, head);
5227 -               sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
5228 -       }
5229 -       error = 0;
5230 -fail:
5231 -       write_unlock_bh(&udp_hash_lock);
5232 -       return error;
5233 -}
5234 -
5235 -extern int ipv4_rcv_saddr_equal(const struct sock *, const struct sock *);
5236 -
5237 -int udp_v4_get_port(struct sock *sk, unsigned short snum)
5238 -{
5239 -       return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal);
5240 -}
5241 -
5242 -
5243 -/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
5244 - * harder than this. -DaveM
5245 - */
5246 -static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
5247 -               __be16 sport, __be32 daddr, __be16 dport,
5248 -               int dif, struct hlist_head udptable[])
5249 -{
5250 -       struct sock *sk, *result = NULL;
5251 -       struct hlist_node *node;
5252 -       unsigned short hnum = ntohs(dport);
5253 -       int badness = -1;
5254 -
5255 -       read_lock(&udp_hash_lock);
5256 -       sk_for_each(sk, node, &udptable[udp_hashfn(net, hnum)]) {
5257 -               struct inet_sock *inet = inet_sk(sk);
5258 -
5259 -               if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum &&
5260 -                               !ipv6_only_sock(sk)) {
5261 -                       int score = (sk->sk_family == PF_INET ? 1 : 0);
5262 -
5263 -                       if (inet->rcv_saddr) {
5264 -                               if (inet->rcv_saddr != daddr)
5265 -                                       continue;
5266 -                               score+=2;
5267 -                       } else {
5268 -                               /* block non nx_info ips */
5269 -                               if (!v4_addr_in_nx_info(sk->sk_nx_info,
5270 -                                       daddr, NXA_MASK_BIND))
5271 -                                       continue;
5272 -                       }
5273 -                       if (inet->daddr) {
5274 -                               if (inet->daddr != saddr)
5275 -                                       continue;
5276 -                               score+=2;
5277 -                       }
5278 -                       if (inet->dport) {
5279 -                               if (inet->dport != sport)
5280 -                                       continue;
5281 -                               score+=2;
5282 -                       }
5283 -                       if (sk->sk_bound_dev_if) {
5284 -                               if (sk->sk_bound_dev_if != dif)
5285 -                                       continue;
5286 -                               score+=2;
5287 -                       }
5288 -                       if (score == 9) {
5289 -                               result = sk;
5290 -                               break;
5291 -                       } else if (score > badness) {
5292 -                               result = sk;
5293 -                               badness = score;
5294 -                       }
5295 -               }
5296 -       }
5297 -
5298 -       if (result)
5299 -               sock_hold(result);
5300 -       read_unlock(&udp_hash_lock);
5301 -       return result;
5302 -}
5303 -
5304 -static inline struct sock *udp_v4_mcast_next(struct net *net, struct sock *sk,
5305 -                                            __be16 loc_port, __be32 loc_addr,
5306 -                                            __be16 rmt_port, __be32 rmt_addr,
5307 -                                            int dif)
5308 -{
5309 -       struct hlist_node *node;
5310 -       struct sock *s = sk;
5311 -       unsigned short hnum = ntohs(loc_port);
5312 -
5313 -       sk_for_each_from(s, node) {
5314 -               struct inet_sock *inet = inet_sk(s);
5315 -
5316 -               if (!net_eq(sock_net(s), net)                           ||
5317 -                   s->sk_hash != hnum                                  ||
5318 -                   (inet->daddr && inet->daddr != rmt_addr)            ||
5319 -                   (inet->dport != rmt_port && inet->dport)            ||
5320 -                   !v4_sock_addr_match(sk->sk_nx_info, inet, loc_addr) ||
5321 -                   ipv6_only_sock(s)                                   ||
5322 -                   (s->sk_bound_dev_if && s->sk_bound_dev_if != dif))
5323 -                       continue;
5324 -               if (!ip_mc_sf_allow(s, loc_addr, rmt_addr, dif))
5325 -                       continue;
5326 -               goto found;
5327 -       }
5328 -       s = NULL;
5329 -found:
5330 -       return s;
5331 -}
5332 -
5333 -/*
5334 - * This routine is called by the ICMP module when it gets some
5335 - * sort of error condition.  If err < 0 then the socket should
5336 - * be closed and the error returned to the user.  If err > 0
5337 - * it's just the icmp type << 8 | icmp code.
5338 - * Header points to the ip header of the error packet. We move
5339 - * on past this. Then (as it used to claim before adjustment)
5340 - * header points to the first 8 bytes of the udp header.  We need
5341 - * to find the appropriate port.
5342 - */
5343 -
5344 -void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[])
5345 -{
5346 -       struct inet_sock *inet;
5347 -       struct iphdr *iph = (struct iphdr*)skb->data;
5348 -       struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2));
5349 -       const int type = icmp_hdr(skb)->type;
5350 -       const int code = icmp_hdr(skb)->code;
5351 -       struct sock *sk;
5352 -       int harderr;
5353 -       int err;
5354 -       struct net *net = dev_net(skb->dev);
5355 -
5356 -       sk = __udp4_lib_lookup(net, iph->daddr, uh->dest,
5357 -                       iph->saddr, uh->source, skb->dev->ifindex, udptable);
5358 -       if (sk == NULL) {
5359 -               ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
5360 -               return; /* No socket for error */
5361 -       }
5362 -
5363 -       err = 0;
5364 -       harderr = 0;
5365 -       inet = inet_sk(sk);
5366 -
5367 -       switch (type) {
5368 -       default:
5369 -       case ICMP_TIME_EXCEEDED:
5370 -               err = EHOSTUNREACH;
5371 -               break;
5372 -       case ICMP_SOURCE_QUENCH:
5373 -               goto out;
5374 -       case ICMP_PARAMETERPROB:
5375 -               err = EPROTO;
5376 -               harderr = 1;
5377 -               break;
5378 -       case ICMP_DEST_UNREACH:
5379 -               if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
5380 -                       if (inet->pmtudisc != IP_PMTUDISC_DONT) {
5381 -                               err = EMSGSIZE;
5382 -                               harderr = 1;
5383 -                               break;
5384 -                       }
5385 -                       goto out;
5386 -               }
5387 -               err = EHOSTUNREACH;
5388 -               if (code <= NR_ICMP_UNREACH) {
5389 -                       harderr = icmp_err_convert[code].fatal;
5390 -                       err = icmp_err_convert[code].errno;
5391 -               }
5392 -               break;
5393 -       }
5394 -
5395 -       /*
5396 -        *      RFC1122: OK.  Passes ICMP errors back to application, as per
5397 -        *      4.1.3.3.
5398 -        */
5399 -       if (!inet->recverr) {
5400 -               if (!harderr || sk->sk_state != TCP_ESTABLISHED)
5401 -                       goto out;
5402 -       } else {
5403 -               ip_icmp_error(sk, skb, err, uh->dest, info, (u8*)(uh+1));
5404 -       }
5405 -       sk->sk_err = err;
5406 -       sk->sk_error_report(sk);
5407 -out:
5408 -       sock_put(sk);
5409 -}
5410 -
5411 -void udp_err(struct sk_buff *skb, u32 info)
5412 -{
5413 -       __udp4_lib_err(skb, info, udp_hash);
5414 -}
5415 -
5416 -/*
5417 - * Throw away all pending data and cancel the corking. Socket is locked.
5418 - */
5419 -void udp_flush_pending_frames(struct sock *sk)
5420 -{
5421 -       struct udp_sock *up = udp_sk(sk);
5422 -
5423 -       if (up->pending) {
5424 -               up->len = 0;
5425 -               up->pending = 0;
5426 -               ip_flush_pending_frames(sk);
5427 -       }
5428 -}
5429 -EXPORT_SYMBOL(udp_flush_pending_frames);
5430 -
5431 -/**
5432 - *     udp4_hwcsum_outgoing  -  handle outgoing HW checksumming
5433 - *     @sk:    socket we are sending on
5434 - *     @skb:   sk_buff containing the filled-in UDP header
5435 - *             (checksum field must be zeroed out)
5436 - */
5437 -static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
5438 -                                __be32 src, __be32 dst, int len      )
5439 -{
5440 -       unsigned int offset;
5441 -       struct udphdr *uh = udp_hdr(skb);
5442 -       __wsum csum = 0;
5443 -
5444 -       if (skb_queue_len(&sk->sk_write_queue) == 1) {
5445 -               /*
5446 -                * Only one fragment on the socket.
5447 -                */
5448 -               skb->csum_start = skb_transport_header(skb) - skb->head;
5449 -               skb->csum_offset = offsetof(struct udphdr, check);
5450 -               uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0);
5451 -       } else {
5452 -               /*
5453 -                * HW-checksum won't work as there are two or more
5454 -                * fragments on the socket so that all csums of sk_buffs
5455 -                * should be together
5456 -                */
5457 -               offset = skb_transport_offset(skb);
5458 -               skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
5459 -
5460 -               skb->ip_summed = CHECKSUM_NONE;
5461 -
5462 -               skb_queue_walk(&sk->sk_write_queue, skb) {
5463 -                       csum = csum_add(csum, skb->csum);
5464 -               }
5465 -
5466 -               uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum);
5467 -               if (uh->check == 0)
5468 -                       uh->check = CSUM_MANGLED_0;
5469 -       }
5470 -}
5471 -
5472 -/*
5473 - * Push out all pending data as one UDP datagram. Socket is locked.
5474 - */
5475 -static int udp_push_pending_frames(struct sock *sk)
5476 -{
5477 -       struct udp_sock  *up = udp_sk(sk);
5478 -       struct inet_sock *inet = inet_sk(sk);
5479 -       struct flowi *fl = &inet->cork.fl;
5480 -       struct sk_buff *skb;
5481 -       struct udphdr *uh;
5482 -       int err = 0;
5483 -       int is_udplite = IS_UDPLITE(sk);
5484 -       __wsum csum = 0;
5485 -
5486 -       /* Grab the skbuff where UDP header space exists. */
5487 -       if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
5488 -               goto out;
5489 -
5490 -       /*
5491 -        * Create a UDP header
5492 -        */
5493 -       uh = udp_hdr(skb);
5494 -       uh->source = fl->fl_ip_sport;
5495 -       uh->dest = fl->fl_ip_dport;
5496 -       uh->len = htons(up->len);
5497 -       uh->check = 0;
5498 -
5499 -       if (is_udplite)                                  /*     UDP-Lite      */
5500 -               csum  = udplite_csum_outgoing(sk, skb);
5501 -
5502 -       else if (sk->sk_no_check == UDP_CSUM_NOXMIT) {   /* UDP csum disabled */
5503 -
5504 -               skb->ip_summed = CHECKSUM_NONE;
5505 -               goto send;
5506 -
5507 -       } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
5508 -
5509 -               udp4_hwcsum_outgoing(sk, skb, fl->fl4_src,fl->fl4_dst, up->len);
5510 -               goto send;
5511 -
5512 -       } else                                           /*   `normal' UDP    */
5513 -               csum = udp_csum_outgoing(sk, skb);
5514 -
5515 -       /* add protocol-dependent pseudo-header */
5516 -       uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len,
5517 -                                     sk->sk_protocol, csum             );
5518 -       if (uh->check == 0)
5519 -               uh->check = CSUM_MANGLED_0;
5520 -
5521 -send:
5522 -       err = ip_push_pending_frames(sk);
5523 -out:
5524 -       up->len = 0;
5525 -       up->pending = 0;
5526 -       if (!err)
5527 -               UDP_INC_STATS_USER(sock_net(sk),
5528 -                               UDP_MIB_OUTDATAGRAMS, is_udplite);
5529 -       return err;
5530 -}
5531 -
5532 -int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
5533 -               size_t len)
5534 -{
5535 -       struct inet_sock *inet = inet_sk(sk);
5536 -       struct udp_sock *up = udp_sk(sk);
5537 -       int ulen = len;
5538 -       struct ipcm_cookie ipc;
5539 -       struct rtable *rt = NULL;
5540 -       int free = 0;
5541 -       int connected = 0;
5542 -       __be32 daddr, faddr, saddr;
5543 -       __be16 dport;
5544 -       u8  tos;
5545 -       int err, is_udplite = IS_UDPLITE(sk);
5546 -       int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
5547 -       int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
5548 -
5549 -       if (len > 0xFFFF)
5550 -               return -EMSGSIZE;
5551 -
5552 -       /*
5553 -        *      Check the flags.
5554 -        */
5555 -
5556 -       if (msg->msg_flags&MSG_OOB)     /* Mirror BSD error message compatibility */
5557 -               return -EOPNOTSUPP;
5558 -
5559 -       ipc.opt = NULL;
5560 -
5561 -       if (up->pending) {
5562 -               /*
5563 -                * There are pending frames.
5564 -                * The socket lock must be held while it's corked.
5565 -                */
5566 -               lock_sock(sk);
5567 -               if (likely(up->pending)) {
5568 -                       if (unlikely(up->pending != AF_INET)) {
5569 -                               release_sock(sk);
5570 -                               return -EINVAL;
5571 -                       }
5572 -                       goto do_append_data;
5573 -               }
5574 -               release_sock(sk);
5575 -       }
5576 -       ulen += sizeof(struct udphdr);
5577 -
5578 -       /*
5579 -        *      Get and verify the address.
5580 -        */
5581 -       if (msg->msg_name) {
5582 -               struct sockaddr_in * usin = (struct sockaddr_in*)msg->msg_name;
5583 -               if (msg->msg_namelen < sizeof(*usin))
5584 -                       return -EINVAL;
5585 -               if (usin->sin_family != AF_INET) {
5586 -                       if (usin->sin_family != AF_UNSPEC)
5587 -                               return -EAFNOSUPPORT;
5588 -               }
5589 -
5590 -               daddr = usin->sin_addr.s_addr;
5591 -               dport = usin->sin_port;
5592 -               if (dport == 0)
5593 -                       return -EINVAL;
5594 -       } else {
5595 -               if (sk->sk_state != TCP_ESTABLISHED)
5596 -                       return -EDESTADDRREQ;
5597 -               daddr = inet->daddr;
5598 -               dport = inet->dport;
5599 -               /* Open fast path for connected socket.
5600 -                  Route will not be used, if at least one option is set.
5601 -                */
5602 -               connected = 1;
5603 -       }
5604 -       ipc.addr = inet->saddr;
5605 -
5606 -       ipc.oif = sk->sk_bound_dev_if;
5607 -       if (msg->msg_controllen) {
5608 -               err = ip_cmsg_send(sock_net(sk), msg, &ipc);
5609 -               if (err)
5610 -                       return err;
5611 -               if (ipc.opt)
5612 -                       free = 1;
5613 -               connected = 0;
5614 -       }
5615 -       if (!ipc.opt)
5616 -               ipc.opt = inet->opt;
5617 -
5618 -       saddr = ipc.addr;
5619 -       ipc.addr = faddr = daddr;
5620 -
5621 -       if (ipc.opt && ipc.opt->srr) {
5622 -               if (!daddr)
5623 -                       return -EINVAL;
5624 -               faddr = ipc.opt->faddr;
5625 -               connected = 0;
5626 -       }
5627 -       tos = RT_TOS(inet->tos);
5628 -       if (sock_flag(sk, SOCK_LOCALROUTE) ||
5629 -           (msg->msg_flags & MSG_DONTROUTE) ||
5630 -           (ipc.opt && ipc.opt->is_strictroute)) {
5631 -               tos |= RTO_ONLINK;
5632 -               connected = 0;
5633 -       }
5634 -
5635 -       if (ipv4_is_multicast(daddr)) {
5636 -               if (!ipc.oif)
5637 -                       ipc.oif = inet->mc_index;
5638 -               if (!saddr)
5639 -                       saddr = inet->mc_addr;
5640 -               connected = 0;
5641 -       }
5642 -
5643 -       if (connected)
5644 -               rt = (struct rtable*)sk_dst_check(sk, 0);
5645 -
5646 -       if (rt == NULL) {
5647 -               struct flowi fl = { .oif = ipc.oif,
5648 -                                   .nl_u = { .ip4_u =
5649 -                                             { .daddr = faddr,
5650 -                                               .saddr = saddr,
5651 -                                               .tos = tos } },
5652 -                                   .proto = sk->sk_protocol,
5653 -                                   .uli_u = { .ports =
5654 -                                              { .sport = inet->sport,
5655 -                                                .dport = dport } } };
5656 -               struct net *net = sock_net(sk);
5657 -               struct nx_info *nxi = sk->sk_nx_info;
5658 -
5659 -               security_sk_classify_flow(sk, &fl);
5660 -               err = ip_v4_find_src(net, nxi, &rt, &fl);
5661 -               if (err)
5662 -                       goto out;
5663 -
5664 -               err = ip_route_output_flow(net, &rt, &fl, sk, 1);
5665 -               if (err) {
5666 -                       if (err == -ENETUNREACH)
5667 -                               IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
5668 -                       goto out;
5669 -               }
5670 -
5671 -               err = -EACCES;
5672 -               if ((rt->rt_flags & RTCF_BROADCAST) &&
5673 -                   !sock_flag(sk, SOCK_BROADCAST))
5674 -                       goto out;
5675 -               if (connected)
5676 -                       sk_dst_set(sk, dst_clone(&rt->u.dst));
5677 -       }
5678 -
5679 -       if (msg->msg_flags&MSG_CONFIRM)
5680 -               goto do_confirm;
5681 -back_from_confirm:
5682 -
5683 -       saddr = rt->rt_src;
5684 -       if (!ipc.addr)
5685 -               daddr = ipc.addr = rt->rt_dst;
5686 -
5687 -       lock_sock(sk);
5688 -       if (unlikely(up->pending)) {
5689 -               /* The socket is already corked while preparing it. */
5690 -               /* ... which is an evident application bug. --ANK */
5691 -               release_sock(sk);
5692 -
5693 -               LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n");
5694 -               err = -EINVAL;
5695 -               goto out;
5696 -       }
5697 -       /*
5698 -        *      Now cork the socket to pend data.
5699 -        */
5700 -       inet->cork.fl.fl4_dst = daddr;
5701 -       inet->cork.fl.fl_ip_dport = dport;
5702 -       inet->cork.fl.fl4_src = saddr;
5703 -       inet->cork.fl.fl_ip_sport = inet->sport;
5704 -       up->pending = AF_INET;
5705 -
5706 -do_append_data:
5707 -       up->len += ulen;
5708 -       getfrag  =  is_udplite ?  udplite_getfrag : ip_generic_getfrag;
5709 -       err = ip_append_data(sk, getfrag, msg->msg_iov, ulen,
5710 -                       sizeof(struct udphdr), &ipc, rt,
5711 -                       corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
5712 -       if (err)
5713 -               udp_flush_pending_frames(sk);
5714 -       else if (!corkreq)
5715 -               err = udp_push_pending_frames(sk);
5716 -       else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
5717 -               up->pending = 0;
5718 -       release_sock(sk);
5719 -
5720 -out:
5721 -       ip_rt_put(rt);
5722 -       if (free)
5723 -               kfree(ipc.opt);
5724 -       if (!err)
5725 -               return len;
5726 -       /*
5727 -        * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space.  Reporting
5728 -        * ENOBUFS might not be good (it's not tunable per se), but otherwise
5729 -        * we don't have a good statistic (IpOutDiscards but it can be too many
5730 -        * things).  We could add another new stat but at least for now that
5731 -        * seems like overkill.
5732 -        */
5733 -       if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
5734 -               UDP_INC_STATS_USER(sock_net(sk),
5735 -                               UDP_MIB_SNDBUFERRORS, is_udplite);
5736 -       }
5737 -       return err;
5738 -
5739 -do_confirm:
5740 -       dst_confirm(&rt->u.dst);
5741 -       if (!(msg->msg_flags&MSG_PROBE) || len)
5742 -               goto back_from_confirm;
5743 -       err = 0;
5744 -       goto out;
5745 -}
5746 -
5747 -int udp_sendpage(struct sock *sk, struct page *page, int offset,
5748 -                size_t size, int flags)
5749 -{
5750 -       struct udp_sock *up = udp_sk(sk);
5751 -       int ret;
5752 -
5753 -       if (!up->pending) {
5754 -               struct msghdr msg = {   .msg_flags = flags|MSG_MORE };
5755 -
5756 -               /* Call udp_sendmsg to specify destination address which
5757 -                * sendpage interface can't pass.
5758 -                * This will succeed only when the socket is connected.
5759 -                */
5760 -               ret = udp_sendmsg(NULL, sk, &msg, 0);
5761 -               if (ret < 0)
5762 -                       return ret;
5763 -       }
5764 -
5765 -       lock_sock(sk);
5766 -
5767 -       if (unlikely(!up->pending)) {
5768 -               release_sock(sk);
5769 -
5770 -               LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 3\n");
5771 -               return -EINVAL;
5772 -       }
5773 -
5774 -       ret = ip_append_page(sk, page, offset, size, flags);
5775 -       if (ret == -EOPNOTSUPP) {
5776 -               release_sock(sk);
5777 -               return sock_no_sendpage(sk->sk_socket, page, offset,
5778 -                                       size, flags);
5779 -       }
5780 -       if (ret < 0) {
5781 -               udp_flush_pending_frames(sk);
5782 -               goto out;
5783 -       }
5784 -
5785 -       up->len += size;
5786 -       if (!(up->corkflag || (flags&MSG_MORE)))
5787 -               ret = udp_push_pending_frames(sk);
5788 -       if (!ret)
5789 -               ret = size;
5790 -out:
5791 -       release_sock(sk);
5792 -       return ret;
5793 -}
5794 -
5795 -/*
5796 - *     IOCTL requests applicable to the UDP protocol
5797 - */
5798 -
5799 -int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
5800 -{
5801 -       switch (cmd) {
5802 -       case SIOCOUTQ:
5803 -       {
5804 -               int amount = atomic_read(&sk->sk_wmem_alloc);
5805 -               return put_user(amount, (int __user *)arg);
5806 -       }
5807 -
5808 -       case SIOCINQ:
5809 -       {
5810 -               struct sk_buff *skb;
5811 -               unsigned long amount;
5812 -
5813 -               amount = 0;
5814 -               spin_lock_bh(&sk->sk_receive_queue.lock);
5815 -               skb = skb_peek(&sk->sk_receive_queue);
5816 -               if (skb != NULL) {
5817 -                       /*
5818 -                        * We will only return the amount
5819 -                        * of this packet since that is all
5820 -                        * that will be read.
5821 -                        */
5822 -                       amount = skb->len - sizeof(struct udphdr);
5823 -               }
5824 -               spin_unlock_bh(&sk->sk_receive_queue.lock);
5825 -               return put_user(amount, (int __user *)arg);
5826 -       }
5827 -
5828 -       default:
5829 -               return -ENOIOCTLCMD;
5830 -       }
5831 -
5832 -       return 0;
5833 -}
5834 -
5835 -/*
5836 - *     This should be easy, if there is something there we
5837 - *     return it, otherwise we block.
5838 - */
5839 -
5840 -int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
5841 -               size_t len, int noblock, int flags, int *addr_len)
5842 -{
5843 -       struct inet_sock *inet = inet_sk(sk);
5844 -       struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
5845 -       struct sk_buff *skb;
5846 -       unsigned int ulen, copied;
5847 -       int peeked;
5848 -       int err;
5849 -       int is_udplite = IS_UDPLITE(sk);
5850 -
5851 -       /*
5852 -        *      Check any passed addresses
5853 -        */
5854 -       if (addr_len)
5855 -               *addr_len=sizeof(*sin);
5856 -
5857 -       if (flags & MSG_ERRQUEUE)
5858 -               return ip_recv_error(sk, msg, len);
5859 -
5860 -try_again:
5861 -       skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
5862 -                                 &peeked, &err);
5863 -       if (!skb)
5864 -               goto out;
5865 -
5866 -       ulen = skb->len - sizeof(struct udphdr);
5867 -       copied = len;
5868 -       if (copied > ulen)
5869 -               copied = ulen;
5870 -       else if (copied < ulen)
5871 -               msg->msg_flags |= MSG_TRUNC;
5872 -
5873 -       /*
5874 -        * If checksum is needed at all, try to do it while copying the
5875 -        * data.  If the data is truncated, or if we only want a partial
5876 -        * coverage checksum (UDP-Lite), do it before the copy.
5877 -        */
5878 -
5879 -       if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) {
5880 -               if (udp_lib_checksum_complete(skb))
5881 -                       goto csum_copy_err;
5882 -       }
5883 -
5884 -       if (skb_csum_unnecessary(skb))
5885 -               err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
5886 -                                             msg->msg_iov, copied       );
5887 -       else {
5888 -               err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov);
5889 -
5890 -               if (err == -EINVAL)
5891 -                       goto csum_copy_err;
5892 -       }
5893 -
5894 -       if (err)
5895 -               goto out_free;
5896 -
5897 -       if (!peeked)
5898 -               UDP_INC_STATS_USER(sock_net(sk),
5899 -                               UDP_MIB_INDATAGRAMS, is_udplite);
5900 -
5901 -       sock_recv_timestamp(msg, sk, skb);
5902 -
5903 -       /* Copy the address. */
5904 -       if (sin)
5905 -       {
5906 -               sin->sin_family = AF_INET;
5907 -               sin->sin_port = udp_hdr(skb)->source;
5908 -               sin->sin_addr.s_addr = nx_map_sock_lback(
5909 -                       skb->sk->sk_nx_info, ip_hdr(skb)->saddr);
5910 -               memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
5911 -       }
5912 -       if (inet->cmsg_flags)
5913 -               ip_cmsg_recv(msg, skb);
5914 -
5915 -       err = copied;
5916 -       if (flags & MSG_TRUNC)
5917 -               err = ulen;
5918 -
5919 -out_free:
5920 -       lock_sock(sk);
5921 -       skb_free_datagram(sk, skb);
5922 -       release_sock(sk);
5923 -out:
5924 -       return err;
5925 -
5926 -csum_copy_err:
5927 -       lock_sock(sk);
5928 -       if (!skb_kill_datagram(sk, skb, flags))
5929 -               UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
5930 -       release_sock(sk);
5931 -
5932 -       if (noblock)
5933 -               return -EAGAIN;
5934 -       goto try_again;
5935 -}
5936 -
5937 -
5938 -int udp_disconnect(struct sock *sk, int flags)
5939 -{
5940 -       struct inet_sock *inet = inet_sk(sk);
5941 -       /*
5942 -        *      1003.1g - break association.
5943 -        */
5944 -
5945 -       sk->sk_state = TCP_CLOSE;
5946 -       inet->daddr = 0;
5947 -       inet->dport = 0;
5948 -       sk->sk_bound_dev_if = 0;
5949 -       if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
5950 -               inet_reset_saddr(sk);
5951 -
5952 -       if (!(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) {
5953 -               sk->sk_prot->unhash(sk);
5954 -               inet->sport = 0;
5955 -       }
5956 -       sk_dst_reset(sk);
5957 -       return 0;
5958 -}
5959 -
5960 -static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
5961 -{
5962 -       int is_udplite = IS_UDPLITE(sk);
5963 -       int rc;
5964 -
5965 -       if ((rc = sock_queue_rcv_skb(sk, skb)) < 0) {
5966 -               /* Note that an ENOMEM error is charged twice */
5967 -               if (rc == -ENOMEM) {
5968 -                       UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS,
5969 -                                        is_udplite);
5970 -                       atomic_inc(&sk->sk_drops);
5971 -               }
5972 -               goto drop;
5973 -       }
5974 -
5975 -       return 0;
5976 -
5977 -drop:
5978 -       UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
5979 -       kfree_skb(skb);
5980 -       return -1;
5981 -}
5982 -
5983 -/* returns:
5984 - *  -1: error
5985 - *   0: success
5986 - *  >0: "udp encap" protocol resubmission
5987 - *
5988 - * Note that in the success and error cases, the skb is assumed to
5989 - * have either been requeued or freed.
5990 - */
5991 -int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
5992 -{
5993 -       struct udp_sock *up = udp_sk(sk);
5994 -       int rc;
5995 -       int is_udplite = IS_UDPLITE(sk);
5996 -
5997 -       /*
5998 -        *      Charge it to the socket, dropping if the queue is full.
5999 -        */
6000 -       if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
6001 -               goto drop;
6002 -       nf_reset(skb);
6003 -
6004 -       if (up->encap_type) {
6005 -               /*
6006 -                * This is an encapsulation socket so pass the skb to
6007 -                * the socket's udp_encap_rcv() hook. Otherwise, just
6008 -                * fall through and pass this up the UDP socket.
6009 -                * up->encap_rcv() returns the following value:
6010 -                * =0 if skb was successfully passed to the encap
6011 -                *    handler or was discarded by it.
6012 -                * >0 if skb should be passed on to UDP.
6013 -                * <0 if skb should be resubmitted as proto -N
6014 -                */
6015 -
6016 -               /* if we're overly short, let UDP handle it */
6017 -               if (skb->len > sizeof(struct udphdr) &&
6018 -                   up->encap_rcv != NULL) {
6019 -                       int ret;
6020 -
6021 -                       ret = (*up->encap_rcv)(sk, skb);
6022 -                       if (ret <= 0) {
6023 -                               UDP_INC_STATS_BH(sock_net(sk),
6024 -                                                UDP_MIB_INDATAGRAMS,
6025 -                                                is_udplite);
6026 -                               return -ret;
6027 -                       }
6028 -               }
6029 -
6030 -               /* FALLTHROUGH -- it's a UDP Packet */
6031 -       }
6032 -
6033 -       /*
6034 -        *      UDP-Lite specific tests, ignored on UDP sockets
6035 -        */
6036 -       if ((is_udplite & UDPLITE_RECV_CC)  &&  UDP_SKB_CB(skb)->partial_cov) {
6037 -
6038 -               /*
6039 -                * MIB statistics other than incrementing the error count are
6040 -                * disabled for the following two types of errors: these depend
6041 -                * on the application settings, not on the functioning of the
6042 -                * protocol stack as such.
6043 -                *
6044 -                * RFC 3828 here recommends (sec 3.3): "There should also be a
6045 -                * way ... to ... at least let the receiving application block
6046 -                * delivery of packets with coverage values less than a value
6047 -                * provided by the application."
6048 -                */
6049 -               if (up->pcrlen == 0) {          /* full coverage was set  */
6050 -                       LIMIT_NETDEBUG(KERN_WARNING "UDPLITE: partial coverage "
6051 -                               "%d while full coverage %d requested\n",
6052 -                               UDP_SKB_CB(skb)->cscov, skb->len);
6053 -                       goto drop;
6054 -               }
6055 -               /* The next case involves violating the min. coverage requested
6056 -                * by the receiver. This is subtle: if receiver wants x and x is
6057 -                * greater than the buffersize/MTU then receiver will complain
6058 -                * that it wants x while sender emits packets of smaller size y.
6059 -                * Therefore the above ...()->partial_cov statement is essential.
6060 -                */
6061 -               if (UDP_SKB_CB(skb)->cscov  <  up->pcrlen) {
6062 -                       LIMIT_NETDEBUG(KERN_WARNING
6063 -                               "UDPLITE: coverage %d too small, need min %d\n",
6064 -                               UDP_SKB_CB(skb)->cscov, up->pcrlen);
6065 -                       goto drop;
6066 -               }
6067 -       }
6068 -
6069 -       if (sk->sk_filter) {
6070 -               if (udp_lib_checksum_complete(skb))
6071 -                       goto drop;
6072 -       }
6073 -
6074 -       rc = 0;
6075 -
6076 -       bh_lock_sock(sk);
6077 -       if (!sock_owned_by_user(sk))
6078 -               rc = __udp_queue_rcv_skb(sk, skb);
6079 -       else
6080 -               sk_add_backlog(sk, skb);
6081 -       bh_unlock_sock(sk);
6082 -
6083 -       return rc;
6084 -
6085 -drop:
6086 -       UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
6087 -       kfree_skb(skb);
6088 -       return -1;
6089 -}
6090 -
6091 -/*
6092 - *     Multicasts and broadcasts go to each listener.
6093 - *
6094 - *     Note: called only from the BH handler context,
6095 - *     so we don't need to lock the hashes.
6096 - */
6097 -static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
6098 -                                   struct udphdr  *uh,
6099 -                                   __be32 saddr, __be32 daddr,
6100 -                                   struct hlist_head udptable[])
6101 -{
6102 -       struct sock *sk;
6103 -       int dif;
6104 -
6105 -       read_lock(&udp_hash_lock);
6106 -       sk = sk_head(&udptable[udp_hashfn(net, ntohs(uh->dest))]);
6107 -       dif = skb->dev->ifindex;
6108 -       sk = udp_v4_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif);
6109 -       if (sk) {
6110 -               struct sock *sknext = NULL;
6111 -
6112 -               do {
6113 -                       struct sk_buff *skb1 = skb;
6114 -
6115 -                       sknext = udp_v4_mcast_next(net, sk_next(sk), uh->dest,
6116 -                                                  daddr, uh->source, saddr,
6117 -                                                  dif);
6118 -                       if (sknext)
6119 -                               skb1 = skb_clone(skb, GFP_ATOMIC);
6120 -
6121 -                       if (skb1) {
6122 -                               int ret = udp_queue_rcv_skb(sk, skb1);
6123 -                               if (ret > 0)
6124 -                                       /* we should probably re-process instead
6125 -                                        * of dropping packets here. */
6126 -                                       kfree_skb(skb1);
6127 -                       }
6128 -                       sk = sknext;
6129 -               } while (sknext);
6130 -       } else
6131 -               kfree_skb(skb);
6132 -       read_unlock(&udp_hash_lock);
6133 -       return 0;
6134 -}
6135 -
6136 -/* Initialize UDP checksum. If exited with zero value (success),
6137 - * CHECKSUM_UNNECESSARY means, that no more checks are required.
6138 - * Otherwise, csum completion requires chacksumming packet body,
6139 - * including udp header and folding it to skb->csum.
6140 - */
6141 -static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
6142 -                                int proto)
6143 -{
6144 -       const struct iphdr *iph;
6145 -       int err;
6146 -
6147 -       UDP_SKB_CB(skb)->partial_cov = 0;
6148 -       UDP_SKB_CB(skb)->cscov = skb->len;
6149 -
6150 -       if (proto == IPPROTO_UDPLITE) {
6151 -               err = udplite_checksum_init(skb, uh);
6152 -               if (err)
6153 -                       return err;
6154 -       }
6155 -
6156 -       iph = ip_hdr(skb);
6157 -       if (uh->check == 0) {
6158 -               skb->ip_summed = CHECKSUM_UNNECESSARY;
6159 -       } else if (skb->ip_summed == CHECKSUM_COMPLETE) {
6160 -              if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len,
6161 -                                     proto, skb->csum))
6162 -                       skb->ip_summed = CHECKSUM_UNNECESSARY;
6163 -       }
6164 -       if (!skb_csum_unnecessary(skb))
6165 -               skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
6166 -                                              skb->len, proto, 0);
6167 -       /* Probably, we should checksum udp header (it should be in cache
6168 -        * in any case) and data in tiny packets (< rx copybreak).
6169 -        */
6170 -
6171 -       return 0;
6172 -}
6173 -
6174 -/*
6175 - *     All we need to do is get the socket, and then do a checksum.
6176 - */
6177 -
6178 -int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
6179 -                  int proto)
6180 -{
6181 -       struct sock *sk;
6182 -       struct udphdr *uh;
6183 -       unsigned short ulen;
6184 -       struct rtable *rt = (struct rtable*)skb->dst;
6185 -       __be32 saddr = ip_hdr(skb)->saddr;
6186 -       __be32 daddr = ip_hdr(skb)->daddr;
6187 -       struct net *net = dev_net(skb->dev);
6188 -
6189 -       /*
6190 -        *  Validate the packet.
6191 -        */
6192 -       if (!pskb_may_pull(skb, sizeof(struct udphdr)))
6193 -               goto drop;              /* No space for header. */
6194 -
6195 -       uh   = udp_hdr(skb);
6196 -       ulen = ntohs(uh->len);
6197 -       if (ulen > skb->len)
6198 -               goto short_packet;
6199 -
6200 -       if (proto == IPPROTO_UDP) {
6201 -               /* UDP validates ulen. */
6202 -               if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen))
6203 -                       goto short_packet;
6204 -               uh = udp_hdr(skb);
6205 -       }
6206 -
6207 -       if (udp4_csum_init(skb, uh, proto))
6208 -               goto csum_error;
6209 -
6210 -       if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
6211 -               return __udp4_lib_mcast_deliver(net, skb, uh,
6212 -                               saddr, daddr, udptable);
6213 -
6214 -       sk = __udp4_lib_lookup(net, saddr, uh->source, daddr,
6215 -                       uh->dest, inet_iif(skb), udptable);
6216 -
6217 -       if (sk != NULL) {
6218 -               int ret = udp_queue_rcv_skb(sk, skb);
6219 -               sock_put(sk);
6220 -
6221 -               /* a return value > 0 means to resubmit the input, but
6222 -                * it wants the return to be -protocol, or 0
6223 -                */
6224 -               if (ret > 0)
6225 -                       return -ret;
6226 -               return 0;
6227 -       }
6228 -
6229 -       if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
6230 -               goto drop;
6231 -       nf_reset(skb);
6232 -
6233 -       /* No socket. Drop packet silently, if checksum is wrong */
6234 -       if (udp_lib_checksum_complete(skb))
6235 -               goto csum_error;
6236 -
6237 -       UDP_INC_STATS_BH(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
6238 -       icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
6239 -
6240 -       /*
6241 -        * Hmm.  We got an UDP packet to a port to which we
6242 -        * don't wanna listen.  Ignore it.
6243 -        */
6244 -       kfree_skb(skb);
6245 -       return 0;
6246 -
6247 -short_packet:
6248 -       LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From " NIPQUAD_FMT ":%u %d/%d to " NIPQUAD_FMT ":%u\n",
6249 -                      proto == IPPROTO_UDPLITE ? "-Lite" : "",
6250 -                      NIPQUAD(saddr),
6251 -                      ntohs(uh->source),
6252 -                      ulen,
6253 -                      skb->len,
6254 -                      NIPQUAD(daddr),
6255 -                      ntohs(uh->dest));
6256 -       goto drop;
6257 -
6258 -csum_error:
6259 -       /*
6260 -        * RFC1122: OK.  Discards the bad packet silently (as far as
6261 -        * the network is concerned, anyway) as per 4.1.3.4 (MUST).
6262 -        */
6263 -       LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From " NIPQUAD_FMT ":%u to " NIPQUAD_FMT ":%u ulen %d\n",
6264 -                      proto == IPPROTO_UDPLITE ? "-Lite" : "",
6265 -                      NIPQUAD(saddr),
6266 -                      ntohs(uh->source),
6267 -                      NIPQUAD(daddr),
6268 -                      ntohs(uh->dest),
6269 -                      ulen);
6270 -drop:
6271 -       UDP_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
6272 -       kfree_skb(skb);
6273 -       return 0;
6274 -}
6275 -
6276 -int udp_rcv(struct sk_buff *skb)
6277 -{
6278 -       return __udp4_lib_rcv(skb, udp_hash, IPPROTO_UDP);
6279 -}
6280 -
6281 -void udp_destroy_sock(struct sock *sk)
6282 -{
6283 -       lock_sock(sk);
6284 -       udp_flush_pending_frames(sk);
6285 -       release_sock(sk);
6286 -}
6287 -
6288 -/*
6289 - *     Socket option code for UDP
6290 - */
6291 -int udp_lib_setsockopt(struct sock *sk, int level, int optname,
6292 -                      char __user *optval, int optlen,
6293 -                      int (*push_pending_frames)(struct sock *))
6294 -{
6295 -       struct udp_sock *up = udp_sk(sk);
6296 -       int val;
6297 -       int err = 0;
6298 -       int is_udplite = IS_UDPLITE(sk);
6299 -
6300 -       if (optlen<sizeof(int))
6301 -               return -EINVAL;
6302 -
6303 -       if (get_user(val, (int __user *)optval))
6304 -               return -EFAULT;
6305 -
6306 -       switch (optname) {
6307 -       case UDP_CORK:
6308 -               if (val != 0) {
6309 -                       up->corkflag = 1;
6310 -               } else {
6311 -                       up->corkflag = 0;
6312 -                       lock_sock(sk);
6313 -                       (*push_pending_frames)(sk);
6314 -                       release_sock(sk);
6315 -               }
6316 -               break;
6317 -
6318 -       case UDP_ENCAP:
6319 -               switch (val) {
6320 -               case 0:
6321 -               case UDP_ENCAP_ESPINUDP:
6322 -               case UDP_ENCAP_ESPINUDP_NON_IKE:
6323 -                       up->encap_rcv = xfrm4_udp_encap_rcv;
6324 -                       /* FALLTHROUGH */
6325 -               case UDP_ENCAP_L2TPINUDP:
6326 -                       up->encap_type = val;
6327 -                       break;
6328 -               default:
6329 -                       err = -ENOPROTOOPT;
6330 -                       break;
6331 -               }
6332 -               break;
6333 -
6334 -       /*
6335 -        *      UDP-Lite's partial checksum coverage (RFC 3828).
6336 -        */
6337 -       /* The sender sets actual checksum coverage length via this option.
6338 -        * The case coverage > packet length is handled by send module. */
6339 -       case UDPLITE_SEND_CSCOV:
6340 -               if (!is_udplite)         /* Disable the option on UDP sockets */
6341 -                       return -ENOPROTOOPT;
6342 -               if (val != 0 && val < 8) /* Illegal coverage: use default (8) */
6343 -                       val = 8;
6344 -               else if (val > USHORT_MAX)
6345 -                       val = USHORT_MAX;
6346 -               up->pcslen = val;
6347 -               up->pcflag |= UDPLITE_SEND_CC;
6348 -               break;
6349 -
6350 -       /* The receiver specifies a minimum checksum coverage value. To make
6351 -        * sense, this should be set to at least 8 (as done below). If zero is
6352 -        * used, this again means full checksum coverage.                     */
6353 -       case UDPLITE_RECV_CSCOV:
6354 -               if (!is_udplite)         /* Disable the option on UDP sockets */
6355 -                       return -ENOPROTOOPT;
6356 -               if (val != 0 && val < 8) /* Avoid silly minimal values.       */
6357 -                       val = 8;
6358 -               else if (val > USHORT_MAX)
6359 -                       val = USHORT_MAX;
6360 -               up->pcrlen = val;
6361 -               up->pcflag |= UDPLITE_RECV_CC;
6362 -               break;
6363 -
6364 -       default:
6365 -               err = -ENOPROTOOPT;
6366 -               break;
6367 -       }
6368 -
6369 -       return err;
6370 -}
6371 -
6372 -int udp_setsockopt(struct sock *sk, int level, int optname,
6373 -                  char __user *optval, int optlen)
6374 -{
6375 -       if (level == SOL_UDP  ||  level == SOL_UDPLITE)
6376 -               return udp_lib_setsockopt(sk, level, optname, optval, optlen,
6377 -                                         udp_push_pending_frames);
6378 -       return ip_setsockopt(sk, level, optname, optval, optlen);
6379 -}
6380 -
6381 -#ifdef CONFIG_COMPAT
6382 -int compat_udp_setsockopt(struct sock *sk, int level, int optname,
6383 -                         char __user *optval, int optlen)
6384 -{
6385 -       if (level == SOL_UDP  ||  level == SOL_UDPLITE)
6386 -               return udp_lib_setsockopt(sk, level, optname, optval, optlen,
6387 -                                         udp_push_pending_frames);
6388 -       return compat_ip_setsockopt(sk, level, optname, optval, optlen);
6389 -}
6390 -#endif
6391 -
6392 -int udp_lib_getsockopt(struct sock *sk, int level, int optname,
6393 -                      char __user *optval, int __user *optlen)
6394 -{
6395 -       struct udp_sock *up = udp_sk(sk);
6396 -       int val, len;
6397 -
6398 -       if (get_user(len,optlen))
6399 -               return -EFAULT;
6400 -
6401 -       len = min_t(unsigned int, len, sizeof(int));
6402 -
6403 -       if (len < 0)
6404 -               return -EINVAL;
6405 -
6406 -       switch (optname) {
6407 -       case UDP_CORK:
6408 -               val = up->corkflag;
6409 -               break;
6410 -
6411 -       case UDP_ENCAP:
6412 -               val = up->encap_type;
6413 -               break;
6414 -
6415 -       /* The following two cannot be changed on UDP sockets, the return is
6416 -        * always 0 (which corresponds to the full checksum coverage of UDP). */
6417 -       case UDPLITE_SEND_CSCOV:
6418 -               val = up->pcslen;
6419 -               break;
6420 -
6421 -       case UDPLITE_RECV_CSCOV:
6422 -               val = up->pcrlen;
6423 -               break;
6424 -
6425 -       default:
6426 -               return -ENOPROTOOPT;
6427 -       }
6428 -
6429 -       if (put_user(len, optlen))
6430 -               return -EFAULT;
6431 -       if (copy_to_user(optval, &val,len))
6432 -               return -EFAULT;
6433 -       return 0;
6434 -}
6435 -
6436 -int udp_getsockopt(struct sock *sk, int level, int optname,
6437 -                  char __user *optval, int __user *optlen)
6438 -{
6439 -       if (level == SOL_UDP  ||  level == SOL_UDPLITE)
6440 -               return udp_lib_getsockopt(sk, level, optname, optval, optlen);
6441 -       return ip_getsockopt(sk, level, optname, optval, optlen);
6442 -}
6443 -
6444 -#ifdef CONFIG_COMPAT
6445 -int compat_udp_getsockopt(struct sock *sk, int level, int optname,
6446 -                                char __user *optval, int __user *optlen)
6447 -{
6448 -       if (level == SOL_UDP  ||  level == SOL_UDPLITE)
6449 -               return udp_lib_getsockopt(sk, level, optname, optval, optlen);
6450 -       return compat_ip_getsockopt(sk, level, optname, optval, optlen);
6451 -}
6452 -#endif
6453 -/**
6454 - *     udp_poll - wait for a UDP event.
6455 - *     @file - file struct
6456 - *     @sock - socket
6457 - *     @wait - poll table
6458 - *
6459 - *     This is same as datagram poll, except for the special case of
6460 - *     blocking sockets. If application is using a blocking fd
6461 - *     and a packet with checksum error is in the queue;
6462 - *     then it could get return from select indicating data available
6463 - *     but then block when reading it. Add special case code
6464 - *     to work around these arguably broken applications.
6465 - */
6466 -unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
6467 -{
6468 -       unsigned int mask = datagram_poll(file, sock, wait);
6469 -       struct sock *sk = sock->sk;
6470 -       int     is_lite = IS_UDPLITE(sk);
6471 -
6472 -       /* Check for false positives due to checksum errors */
6473 -       if ( (mask & POLLRDNORM) &&
6474 -            !(file->f_flags & O_NONBLOCK) &&
6475 -            !(sk->sk_shutdown & RCV_SHUTDOWN)){
6476 -               struct sk_buff_head *rcvq = &sk->sk_receive_queue;
6477 -               struct sk_buff *skb;
6478 -
6479 -               spin_lock_bh(&rcvq->lock);
6480 -               while ((skb = skb_peek(rcvq)) != NULL &&
6481 -                      udp_lib_checksum_complete(skb)) {
6482 -                       UDP_INC_STATS_BH(sock_net(sk),
6483 -                                       UDP_MIB_INERRORS, is_lite);
6484 -                       __skb_unlink(skb, rcvq);
6485 -                       kfree_skb(skb);
6486 -               }
6487 -               spin_unlock_bh(&rcvq->lock);
6488 -
6489 -               /* nothing to see, move along */
6490 -               if (skb == NULL)
6491 -                       mask &= ~(POLLIN | POLLRDNORM);
6492 -       }
6493 -
6494 -       return mask;
6495 -
6496 -}
6497 -
6498 -struct proto udp_prot = {
6499 -       .name              = "UDP",
6500 -       .owner             = THIS_MODULE,
6501 -       .close             = udp_lib_close,
6502 -       .connect           = ip4_datagram_connect,
6503 -       .disconnect        = udp_disconnect,
6504 -       .ioctl             = udp_ioctl,
6505 -       .destroy           = udp_destroy_sock,
6506 -       .setsockopt        = udp_setsockopt,
6507 -       .getsockopt        = udp_getsockopt,
6508 -       .sendmsg           = udp_sendmsg,
6509 -       .recvmsg           = udp_recvmsg,
6510 -       .sendpage          = udp_sendpage,
6511 -       .backlog_rcv       = __udp_queue_rcv_skb,
6512 -       .hash              = udp_lib_hash,
6513 -       .unhash            = udp_lib_unhash,
6514 -       .get_port          = udp_v4_get_port,
6515 -       .memory_allocated  = &udp_memory_allocated,
6516 -       .sysctl_mem        = sysctl_udp_mem,
6517 -       .sysctl_wmem       = &sysctl_udp_wmem_min,
6518 -       .sysctl_rmem       = &sysctl_udp_rmem_min,
6519 -       .obj_size          = sizeof(struct udp_sock),
6520 -       .h.udp_hash        = udp_hash,
6521 -#ifdef CONFIG_COMPAT
6522 -       .compat_setsockopt = compat_udp_setsockopt,
6523 -       .compat_getsockopt = compat_udp_getsockopt,
6524 -#endif
6525 -};
6526 -
6527 -/* ------------------------------------------------------------------------ */
6528 -#ifdef CONFIG_PROC_FS
6529 -
6530 -static struct sock *udp_get_first(struct seq_file *seq)
6531 -{
6532 -       struct sock *sk;
6533 -       struct udp_iter_state *state = seq->private;
6534 -       struct net *net = seq_file_net(seq);
6535 -
6536 -       for (state->bucket = 0; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) {
6537 -               struct hlist_node *node;
6538 -               sk_for_each(sk, node, state->hashtable + state->bucket) {
6539 -                       if (!net_eq(sock_net(sk), net))
6540 -                               continue;
6541 -                       if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
6542 -                               continue;
6543 -                       if (sk->sk_family == state->family)
6544 -                               goto found;
6545 -               }
6546 -       }
6547 -       sk = NULL;
6548 -found:
6549 -       return sk;
6550 -}
6551 -
6552 -static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk)
6553 -{
6554 -       struct udp_iter_state *state = seq->private;
6555 -       struct net *net = seq_file_net(seq);
6556 -
6557 -       do {
6558 -               sk = sk_next(sk);
6559 -try_again:
6560 -               ;
6561 -       } while (sk && (!net_eq(sock_net(sk), net) ||
6562 -               sk->sk_family != state->family ||
6563 -               !nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT)));
6564 -
6565 -       if (!sk && ++state->bucket < UDP_HTABLE_SIZE) {
6566 -               sk = sk_head(state->hashtable + state->bucket);
6567 -               goto try_again;
6568 -       }
6569 -       return sk;
6570 -}
6571 -
6572 -static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos)
6573 -{
6574 -       struct sock *sk = udp_get_first(seq);
6575 -
6576 -       if (sk)
6577 -               while (pos && (sk = udp_get_next(seq, sk)) != NULL)
6578 -                       --pos;
6579 -       return pos ? NULL : sk;
6580 -}
6581 -
6582 -static void *udp_seq_start(struct seq_file *seq, loff_t *pos)
6583 -       __acquires(udp_hash_lock)
6584 -{
6585 -       read_lock(&udp_hash_lock);
6586 -       return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN;
6587 -}
6588 -
6589 -static void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
6590 -{
6591 -       struct sock *sk;
6592 -
6593 -       if (v == SEQ_START_TOKEN)
6594 -               sk = udp_get_idx(seq, 0);
6595 -       else
6596 -               sk = udp_get_next(seq, v);
6597 -
6598 -       ++*pos;
6599 -       return sk;
6600 -}
6601 -
6602 -static void udp_seq_stop(struct seq_file *seq, void *v)
6603 -       __releases(udp_hash_lock)
6604 -{
6605 -       read_unlock(&udp_hash_lock);
6606 -}
6607 -
6608 -static int udp_seq_open(struct inode *inode, struct file *file)
6609 -{
6610 -       struct udp_seq_afinfo *afinfo = PDE(inode)->data;
6611 -       struct udp_iter_state *s;
6612 -       int err;
6613 -
6614 -       err = seq_open_net(inode, file, &afinfo->seq_ops,
6615 -                          sizeof(struct udp_iter_state));
6616 -       if (err < 0)
6617 -               return err;
6618 -
6619 -       s = ((struct seq_file *)file->private_data)->private;
6620 -       s->family               = afinfo->family;
6621 -       s->hashtable            = afinfo->hashtable;
6622 -       return err;
6623 -}
6624 -
6625 -/* ------------------------------------------------------------------------ */
6626 -int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo)
6627 -{
6628 -       struct proc_dir_entry *p;
6629 -       int rc = 0;
6630 -
6631 -       afinfo->seq_fops.open           = udp_seq_open;
6632 -       afinfo->seq_fops.read           = seq_read;
6633 -       afinfo->seq_fops.llseek         = seq_lseek;
6634 -       afinfo->seq_fops.release        = seq_release_net;
6635 -
6636 -       afinfo->seq_ops.start           = udp_seq_start;
6637 -       afinfo->seq_ops.next            = udp_seq_next;
6638 -       afinfo->seq_ops.stop            = udp_seq_stop;
6639 -
6640 -       p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
6641 -                            &afinfo->seq_fops, afinfo);
6642 -       if (!p)
6643 -               rc = -ENOMEM;
6644 -       return rc;
6645 -}
6646 -
6647 -void udp_proc_unregister(struct net *net, struct udp_seq_afinfo *afinfo)
6648 -{
6649 -       proc_net_remove(net, afinfo->name);
6650 -}
6651 -
6652 -/* ------------------------------------------------------------------------ */
6653 -static void udp4_format_sock(struct sock *sp, struct seq_file *f,
6654 -               int bucket, int *len)
6655 -{
6656 -       struct inet_sock *inet = inet_sk(sp);
6657 -       __be32 dest = inet->daddr;
6658 -       __be32 src  = inet->rcv_saddr;
6659 -       __u16 destp       = ntohs(inet->dport);
6660 -       __u16 srcp        = ntohs(inet->sport);
6661 -
6662 -       seq_printf(f, "%4d: %08X:%04X %08X:%04X"
6663 -               " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d%n",
6664 -               bucket,
6665 -               nx_map_sock_lback(current_nx_info(), src), srcp,
6666 -               nx_map_sock_lback(current_nx_info(), dest), destp,
6667 -               sp->sk_state,
6668 -               atomic_read(&sp->sk_wmem_alloc),
6669 -               atomic_read(&sp->sk_rmem_alloc),
6670 -               0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp),
6671 -               atomic_read(&sp->sk_refcnt), sp,
6672 -               atomic_read(&sp->sk_drops), len);
6673 -}
6674 -
6675 -int udp4_seq_show(struct seq_file *seq, void *v)
6676 -{
6677 -       if (v == SEQ_START_TOKEN)
6678 -               seq_printf(seq, "%-127s\n",
6679 -                          "  sl  local_address rem_address   st tx_queue "
6680 -                          "rx_queue tr tm->when retrnsmt   uid  timeout "
6681 -                          "inode ref pointer drops");
6682 -       else {
6683 -               struct udp_iter_state *state = seq->private;
6684 -               int len;
6685 -
6686 -               udp4_format_sock(v, seq, state->bucket, &len);
6687 -               seq_printf(seq, "%*s\n", 127 - len ,"");
6688 -       }
6689 -       return 0;
6690 -}
6691 -
6692 -/* ------------------------------------------------------------------------ */
6693 -static struct udp_seq_afinfo udp4_seq_afinfo = {
6694 -       .name           = "udp",
6695 -       .family         = AF_INET,
6696 -       .hashtable      = udp_hash,
6697 -       .seq_fops       = {
6698 -               .owner  =       THIS_MODULE,
6699 -       },
6700 -       .seq_ops        = {
6701 -               .show           = udp4_seq_show,
6702 -       },
6703 -};
6704 -
6705 -static int udp4_proc_init_net(struct net *net)
6706 -{
6707 -       return udp_proc_register(net, &udp4_seq_afinfo);
6708 -}
6709 -
6710 -static void udp4_proc_exit_net(struct net *net)
6711 -{
6712 -       udp_proc_unregister(net, &udp4_seq_afinfo);
6713 -}
6714 -
6715 -static struct pernet_operations udp4_net_ops = {
6716 -       .init = udp4_proc_init_net,
6717 -       .exit = udp4_proc_exit_net,
6718 -};
6719 -
6720 -int __init udp4_proc_init(void)
6721 -{
6722 -       return register_pernet_subsys(&udp4_net_ops);
6723 -}
6724 -
6725 -void udp4_proc_exit(void)
6726 -{
6727 -       unregister_pernet_subsys(&udp4_net_ops);
6728 -}
6729 -#endif /* CONFIG_PROC_FS */
6730 -
6731 -void __init udp_init(void)
6732 -{
6733 -       unsigned long limit;
6734 -
6735 -       /* Set the pressure threshold up by the same strategy of TCP. It is a
6736 -        * fraction of global memory that is up to 1/2 at 256 MB, decreasing
6737 -        * toward zero with the amount of memory, with a floor of 128 pages.
6738 -        */
6739 -       limit = min(nr_all_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT);
6740 -       limit = (limit * (nr_all_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11);
6741 -       limit = max(limit, 128UL);
6742 -       sysctl_udp_mem[0] = limit / 4 * 3;
6743 -       sysctl_udp_mem[1] = limit;
6744 -       sysctl_udp_mem[2] = sysctl_udp_mem[0] * 2;
6745 -
6746 -       sysctl_udp_rmem_min = SK_MEM_QUANTUM;
6747 -       sysctl_udp_wmem_min = SK_MEM_QUANTUM;
6748 -}
6749 -
6750 -EXPORT_SYMBOL(udp_disconnect);
6751 -EXPORT_SYMBOL(udp_hash);
6752 -EXPORT_SYMBOL(udp_hash_lock);
6753 -EXPORT_SYMBOL(udp_ioctl);
6754 -EXPORT_SYMBOL(udp_prot);
6755 -EXPORT_SYMBOL(udp_sendmsg);
6756 -EXPORT_SYMBOL(udp_lib_getsockopt);
6757 -EXPORT_SYMBOL(udp_lib_setsockopt);
6758 -EXPORT_SYMBOL(udp_poll);
6759 -EXPORT_SYMBOL(udp_lib_get_port);
6760 -
6761 -#ifdef CONFIG_PROC_FS
6762 -EXPORT_SYMBOL(udp_proc_register);
6763 -EXPORT_SYMBOL(udp_proc_unregister);
6764 -#endif
6765 diff -Nurb linux-2.6.27-524/net/packet/af_packet.c linux-2.6.27-525/net/packet/af_packet.c
6766 --- linux-2.6.27-524/net/packet/af_packet.c     2009-12-04 16:03:47.000000000 -0500
6767 +++ linux-2.6.27-525/net/packet/af_packet.c     2009-12-04 16:09:31.000000000 -0500
6768 @@ -77,6 +77,7 @@
6769  #include <linux/poll.h>
6770  #include <linux/module.h>
6771  #include <linux/init.h>
6772 +#include <linux/vs_network.h>
6773  #include <linux/mutex.h>
6774  
6775  #ifdef CONFIG_INET
6776 @@ -278,10 +279,53 @@
6777  
6778  static const struct proto_ops packet_ops_spkt;
6779  
6780 +extern DEFINE_PER_CPU(int, sknid_elevator);
6781 +
6782 +static inline unsigned int slice_check_and_elevate(struct sk_buff *skb, struct sock *sk) {
6783 +       /* This mechanism is quite involved, and caused us a lot of pain
6784 +        * including crashes and packet loss during the 4.2 rollout. This
6785 +        * function decides if a slice is allowed to see a given packet.
6786 +        * Unfortunately, the first time it is invoked for a packet it does not
6787 +        * have enough information to make this call, since xt_MARK has not had
6788 +        * a chance to tag it with the slice id.  There is also no way of
6789 +        * passing state between xt_MARK and this function through a packet --
6790 +        * because the skb gets cloned quite a few times between these two
6791 +        * points.  I'd rather not use skb_shared_info because it's treated as
6792 +        * a blob of memory, and so it would be quite hard to maintain.
6793 +        *
6794 +        * What we do is to keep a global variable (per CPU) that transfers the
6795 +        * required state between xt_MARK and af_packet.c. As an optimization,
6796 +        * this state transfer and the step that follows is only executed for
6797 +        * packets that first get dropped here. When we drop a packet, we mark
6798 +        * it for 'elevation' (that's what this trick is called). When xt_MARK
6799 +        * tags the packet with the right slice, it intercepts this mark and
6800 +        * sets the value of sknid_elevator. Next, the packet is sent back here
6801 +        * for a second round, this time with the xid tag set.
6802 +        */
6803 +
6804 +       int *elevator=&__get_cpu_var(sknid_elevator);
6805 +       int tag = skb->skb_tag;
6806 +
6807 +       if (sk->sk_nx_info && !(tag == 1 || sk->sk_nid == tag)) {
6808 +               if (skb->pkt_type==PACKET_HOST) {
6809 +                       *elevator=-2; /* Rejecting this packet. Mark it for elevation in xt_MARK */
6810 +               }
6811 +               return 0;
6812 +       }
6813 +       else if (!sk->sk_nx_info && (*elevator>0)) {
6814 +               /* Root has already seen this packet once, since it has been elevated */
6815 +               return 0;
6816 +       }
6817 +
6818 +       return 1;
6819 +}
6820 +
6821  static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt, struct net_device *orig_dev)
6822  {
6823         struct sock *sk;
6824         struct sockaddr_pkt *spkt;
6825 +       int tag = skb->skb_tag;
6826 +
6827  
6828         /*
6829          *      When we registered the protocol we saved the socket in the data
6830 @@ -301,6 +345,16 @@
6831          *      so that this procedure is noop.
6832          */
6833  
6834 +       /* 
6835 +        * (18:05:41) daniel_hozac: where?
6836 +        * (18:05:58) daniel_hozac: we already have filters on PF_PACKET, don't we?
6837 +        * (18:05:58) er: in packet_rcv_skpt
6838 +        * (18:07:33) daniel_hozac: oh, that's evil. 
6839 +        */
6840 +
6841 +       if (!slice_check_and_elevate(skb, sk))
6842 +               return 0;
6843 +
6844         if (skb->pkt_type == PACKET_LOOPBACK)
6845                 goto out;
6846  
6847 @@ -359,6 +413,9 @@
6848         __be16 proto=0;
6849         int err;
6850  
6851 +       if (!nx_capable(CAP_NET_RAW, NXC_RAW_SEND))
6852 +               return -EPERM;
6853 +
6854         /*
6855          *      Get and verify the address.
6856          */
6857 @@ -451,11 +508,16 @@
6858         return err;
6859  }
6860  
6861 +
6862 +
6863  static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
6864                                       unsigned int res)
6865  {
6866         struct sk_filter *filter;
6867  
6868 +       if (!slice_check_and_elevate(skb, sk)) 
6869 +               return 0;
6870 +
6871         rcu_read_lock_bh();
6872         filter = rcu_dereference(sk->sk_filter);
6873         if (filter != NULL)
6874 @@ -775,6 +837,9 @@
6875         unsigned char *addr;
6876         int ifindex, err, reserve = 0;
6877  
6878 +       if (!nx_capable(CAP_NET_RAW, NXC_RAW_SEND)) 
6879 +               return -EPERM;
6880 +
6881         /*
6882          *      Get and verify the address.
6883          */
6884 @@ -941,6 +1006,7 @@
6885  
6886         po->num = protocol;
6887         po->prot_hook.type = protocol;
6888 +       po->prot_hook.sknid_elevator = 1;
6889         po->prot_hook.dev = dev;
6890  
6891         po->ifindex = dev ? dev->ifindex : 0;
6892 @@ -1039,8 +1105,9 @@
6893         __be16 proto = (__force __be16)protocol; /* weird, but documented */
6894         int err;
6895  
6896 -       if (!capable(CAP_NET_RAW))
6897 +       if (!nx_capable(CAP_NET_RAW, NXC_RAW_SOCKET))
6898                 return -EPERM;
6899 +               
6900         if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
6901             sock->type != SOCK_PACKET)
6902                 return -ESOCKTNOSUPPORT;
6903 @@ -1072,6 +1139,7 @@
6904         spin_lock_init(&po->bind_lock);
6905         mutex_init(&po->pg_vec_lock);
6906         po->prot_hook.func = packet_rcv;
6907 +       po->prot_hook.sknid_elevator = 1;
6908  
6909         if (sock->type == SOCK_PACKET)
6910                 po->prot_hook.func = packet_rcv_spkt;