-diff -Nurb linux-2.6.27-524/include/linux/netdevice.h linux-2.6.27-525/include/linux/netdevice.h
---- linux-2.6.27-524/include/linux/netdevice.h 2008-10-09 18:13:53.000000000 -0400
-+++ linux-2.6.27-525/include/linux/netdevice.h 2009-12-04 16:03:56.000000000 -0500
-@@ -857,6 +857,7 @@
+diff -NurpP --exclude '*.orig' --exclude '*.rej' linux-2.6.27.10-vs2.3.x-PS-522-523-524/include/linux/netdevice.h linux-2.6.27.10-vs2.3.x-PS-522-523-524-525/include/linux/netdevice.h
+--- linux-2.6.27.10-vs2.3.x-PS-522-523-524/include/linux/netdevice.h 2008-10-13 14:52:09.000000000 +0200
++++ linux-2.6.27.10-vs2.3.x-PS-522-523-524-525/include/linux/netdevice.h 2009-01-21 03:38:41.000000000 +0100
+@@ -857,6 +857,7 @@ static inline void netif_napi_del(struct
struct packet_type {
__be16 type; /* This is really htons(ether_type). */
struct net_device *dev; /* NULL is wildcarded here */
int (*func) (struct sk_buff *,
struct net_device *,
struct packet_type *,
-diff -Nurb linux-2.6.27-524/net/core/dev.c linux-2.6.27-525/net/core/dev.c
---- linux-2.6.27-524/net/core/dev.c 2009-12-04 16:03:48.000000000 -0500
-+++ linux-2.6.27-525/net/core/dev.c 2009-12-04 16:05:48.000000000 -0500
+diff -NurpP --exclude '*.orig' --exclude '*.rej' linux-2.6.27.10-vs2.3.x-PS-522-523-524/net/core/dev.c linux-2.6.27.10-vs2.3.x-PS-522-523-524-525/net/core/dev.c
+--- linux-2.6.27.10-vs2.3.x-PS-522-523-524/net/core/dev.c 2008-12-19 12:09:14.000000000 +0100
++++ linux-2.6.27.10-vs2.3.x-PS-522-523-524-525/net/core/dev.c 2009-01-21 03:43:19.000000000 +0100
@@ -99,6 +99,8 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/if_bridge.h>
#include <linux/if_macvlan.h>
#include <net/dst.h>
-@@ -1318,7 +1320,7 @@
+@@ -1318,7 +1320,7 @@ static void dev_queue_xmit_nit(struct sk
if ((ptype->dev == dev || !ptype->dev) &&
(ptype->af_packet_priv == NULL ||
(struct sock *)ptype->af_packet_priv != skb->sk)) {
if (!skb2)
break;
-@@ -2170,6 +2172,10 @@
+@@ -2170,6 +2172,10 @@ void netif_nit_deliver(struct sk_buff *s
rcu_read_unlock();
}
/**
* netif_receive_skb - process receive buffer from network
* @skb: buffer to process
-@@ -2191,8 +2197,11 @@
+@@ -2191,8 +2197,11 @@ int netif_receive_skb(struct sk_buff *sk
struct net_device *orig_dev;
struct net_device *null_or_orig;
int ret = NET_RX_DROP;
-+ int *cur_elevator = &__get_cpu_var(sknid_elevator);
++ int *cur_elevator = &__get_cpu_var(sknid_elevator);
__be16 type;
-+ *cur_elevator = 0;
++ *cur_elevator = 0;
+
- if (skb->vlan_tci && vlan_hwaccel_do_receive(skb))
- return NET_RX_SUCCESS;
-
-@@ -2272,7 +2281,27 @@
+ /* if we've gotten here through NAPI, check netpoll */
+ if (netpoll_receive_skb(skb))
+ return NET_RX_DROP;
+@@ -2269,7 +2278,27 @@ ncls:
}
if (pt_prev) {
} else {
kfree_skb(skb);
/* Jamal, now you will not able to escape explaining
-@@ -4895,6 +4924,7 @@
+@@ -4892,6 +4921,7 @@ EXPORT_SYMBOL(unregister_netdevice_notif
EXPORT_SYMBOL(net_enable_timestamp);
EXPORT_SYMBOL(net_disable_timestamp);
EXPORT_SYMBOL(dev_get_flags);
#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
EXPORT_SYMBOL(br_handle_frame_hook);
-diff -Nurb linux-2.6.27-524/net/core/skbuff.c.orig linux-2.6.27-525/net/core/skbuff.c.orig
---- linux-2.6.27-524/net/core/skbuff.c.orig 2009-12-04 16:03:47.000000000 -0500
-+++ linux-2.6.27-525/net/core/skbuff.c.orig 1969-12-31 19:00:00.000000000 -0500
-@@ -1,2594 +0,0 @@
--/*
-- * Routines having to do with the 'struct sk_buff' memory handlers.
-- *
-- * Authors: Alan Cox <iiitac@pyr.swan.ac.uk>
-- * Florian La Roche <rzsfl@rz.uni-sb.de>
-- *
-- * Fixes:
-- * Alan Cox : Fixed the worst of the load
-- * balancer bugs.
-- * Dave Platt : Interrupt stacking fix.
-- * Richard Kooijman : Timestamp fixes.
-- * Alan Cox : Changed buffer format.
-- * Alan Cox : destructor hook for AF_UNIX etc.
-- * Linus Torvalds : Better skb_clone.
-- * Alan Cox : Added skb_copy.
-- * Alan Cox : Added all the changed routines Linus
-- * only put in the headers
-- * Ray VanTassle : Fixed --skb->lock in free
-- * Alan Cox : skb_copy copy arp field
-- * Andi Kleen : slabified it.
-- * Robert Olsson : Removed skb_head_pool
-- *
-- * NOTE:
-- * The __skb_ routines should be called with interrupts
-- * disabled, or you better be *real* sure that the operation is atomic
-- * with respect to whatever list is being frobbed (e.g. via lock_sock()
-- * or via disabling bottom half handlers, etc).
-- *
-- * This program is free software; you can redistribute it and/or
-- * modify it under the terms of the GNU General Public License
-- * as published by the Free Software Foundation; either version
-- * 2 of the License, or (at your option) any later version.
-- */
--
--/*
-- * The functions in this file will not compile correctly with gcc 2.4.x
-- */
--
--#include <linux/module.h>
--#include <linux/types.h>
--#include <linux/kernel.h>
--#include <linux/mm.h>
--#include <linux/interrupt.h>
--#include <linux/in.h>
--#include <linux/inet.h>
--#include <linux/slab.h>
--#include <linux/netdevice.h>
--#ifdef CONFIG_NET_CLS_ACT
--#include <net/pkt_sched.h>
--#endif
--#include <linux/string.h>
--#include <linux/skbuff.h>
--#include <linux/splice.h>
--#include <linux/cache.h>
--#include <linux/rtnetlink.h>
--#include <linux/init.h>
--#include <linux/scatterlist.h>
--
--#include <net/protocol.h>
--#include <net/dst.h>
--#include <net/sock.h>
--#include <net/checksum.h>
--#include <net/xfrm.h>
--
--#include <asm/uaccess.h>
--#include <asm/system.h>
--
--#include "kmap_skb.h"
--
--static struct kmem_cache *skbuff_head_cache __read_mostly;
--static struct kmem_cache *skbuff_fclone_cache __read_mostly;
--
--static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
-- struct pipe_buffer *buf)
--{
-- put_page(buf->page);
--}
--
--static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
-- struct pipe_buffer *buf)
--{
-- get_page(buf->page);
--}
--
--static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
-- struct pipe_buffer *buf)
--{
-- return 1;
--}
--
--
--/* Pipe buffer operations for a socket. */
--static struct pipe_buf_operations sock_pipe_buf_ops = {
-- .can_merge = 0,
-- .map = generic_pipe_buf_map,
-- .unmap = generic_pipe_buf_unmap,
-- .confirm = generic_pipe_buf_confirm,
-- .release = sock_pipe_buf_release,
-- .steal = sock_pipe_buf_steal,
-- .get = sock_pipe_buf_get,
--};
--
--/*
-- * Keep out-of-line to prevent kernel bloat.
-- * __builtin_return_address is not used because it is not always
-- * reliable.
-- */
--
--/**
-- * skb_over_panic - private function
-- * @skb: buffer
-- * @sz: size
-- * @here: address
-- *
-- * Out of line support code for skb_put(). Not user callable.
-- */
--void skb_over_panic(struct sk_buff *skb, int sz, void *here)
--{
-- printk(KERN_EMERG "skb_over_panic: text:%p len:%d put:%d head:%p "
-- "data:%p tail:%#lx end:%#lx dev:%s\n",
-- here, skb->len, sz, skb->head, skb->data,
-- (unsigned long)skb->tail, (unsigned long)skb->end,
-- skb->dev ? skb->dev->name : "<NULL>");
-- BUG();
--}
--
--/**
-- * skb_under_panic - private function
-- * @skb: buffer
-- * @sz: size
-- * @here: address
-- *
-- * Out of line support code for skb_push(). Not user callable.
-- */
--
--void skb_under_panic(struct sk_buff *skb, int sz, void *here)
--{
-- printk(KERN_EMERG "skb_under_panic: text:%p len:%d put:%d head:%p "
-- "data:%p tail:%#lx end:%#lx dev:%s\n",
-- here, skb->len, sz, skb->head, skb->data,
-- (unsigned long)skb->tail, (unsigned long)skb->end,
-- skb->dev ? skb->dev->name : "<NULL>");
-- BUG();
--}
--
--/* Allocate a new skbuff. We do this ourselves so we can fill in a few
-- * 'private' fields and also do memory statistics to find all the
-- * [BEEP] leaks.
-- *
-- */
--
--/**
-- * __alloc_skb - allocate a network buffer
-- * @size: size to allocate
-- * @gfp_mask: allocation mask
-- * @fclone: allocate from fclone cache instead of head cache
-- * and allocate a cloned (child) skb
-- * @node: numa node to allocate memory on
-- *
-- * Allocate a new &sk_buff. The returned buffer has no headroom and a
-- * tail room of size bytes. The object has a reference count of one.
-- * The return is the buffer. On a failure the return is %NULL.
-- *
-- * Buffers may only be allocated from interrupts using a @gfp_mask of
-- * %GFP_ATOMIC.
-- */
--struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
-- int fclone, int node)
--{
-- struct kmem_cache *cache;
-- struct skb_shared_info *shinfo;
-- struct sk_buff *skb;
-- u8 *data;
--
-- cache = fclone ? skbuff_fclone_cache : skbuff_head_cache;
--
-- /* Get the HEAD */
-- skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node);
-- if (!skb)
-- goto out;
--
-- size = SKB_DATA_ALIGN(size);
-- data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info),
-- gfp_mask, node);
-- if (!data)
-- goto nodata;
--
-- /*
-- * Only clear those fields we need to clear, not those that we will
-- * actually initialise below. Hence, don't put any more fields after
-- * the tail pointer in struct sk_buff!
-- */
-- memset(skb, 0, offsetof(struct sk_buff, tail));
-- skb->truesize = size + sizeof(struct sk_buff);
-- atomic_set(&skb->users, 1);
-- skb->head = data;
-- skb->data = data;
-- skb_reset_tail_pointer(skb);
-- skb->end = skb->tail + size;
-- /* make sure we initialize shinfo sequentially */
-- shinfo = skb_shinfo(skb);
-- atomic_set(&shinfo->dataref, 1);
-- shinfo->nr_frags = 0;
-- shinfo->gso_size = 0;
-- shinfo->gso_segs = 0;
-- shinfo->gso_type = 0;
-- shinfo->ip6_frag_id = 0;
-- shinfo->frag_list = NULL;
--
-- if (fclone) {
-- struct sk_buff *child = skb + 1;
-- atomic_t *fclone_ref = (atomic_t *) (child + 1);
--
-- skb->fclone = SKB_FCLONE_ORIG;
-- atomic_set(fclone_ref, 1);
--
-- child->fclone = SKB_FCLONE_UNAVAILABLE;
-- }
--out:
-- return skb;
--nodata:
-- kmem_cache_free(cache, skb);
-- skb = NULL;
-- goto out;
--}
--
--/**
-- * __netdev_alloc_skb - allocate an skbuff for rx on a specific device
-- * @dev: network device to receive on
-- * @length: length to allocate
-- * @gfp_mask: get_free_pages mask, passed to alloc_skb
-- *
-- * Allocate a new &sk_buff and assign it a usage count of one. The
-- * buffer has unspecified headroom built in. Users should allocate
-- * the headroom they think they need without accounting for the
-- * built in space. The built in space is used for optimisations.
-- *
-- * %NULL is returned if there is no free memory.
-- */
--struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
-- unsigned int length, gfp_t gfp_mask)
--{
-- int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1;
-- struct sk_buff *skb;
--
-- skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, node);
-- if (likely(skb)) {
-- skb_reserve(skb, NET_SKB_PAD);
-- skb->dev = dev;
-- }
-- return skb;
--}
--
--/**
-- * dev_alloc_skb - allocate an skbuff for receiving
-- * @length: length to allocate
-- *
-- * Allocate a new &sk_buff and assign it a usage count of one. The
-- * buffer has unspecified headroom built in. Users should allocate
-- * the headroom they think they need without accounting for the
-- * built in space. The built in space is used for optimisations.
-- *
-- * %NULL is returned if there is no free memory. Although this function
-- * allocates memory it can be called from an interrupt.
-- */
--struct sk_buff *dev_alloc_skb(unsigned int length)
--{
-- /*
-- * There is more code here than it seems:
-- * __dev_alloc_skb is an inline
-- */
-- return __dev_alloc_skb(length, GFP_ATOMIC);
--}
--EXPORT_SYMBOL(dev_alloc_skb);
--
--static void skb_drop_list(struct sk_buff **listp)
--{
-- struct sk_buff *list = *listp;
--
-- *listp = NULL;
--
-- do {
-- struct sk_buff *this = list;
-- list = list->next;
-- kfree_skb(this);
-- } while (list);
--}
--
--static inline void skb_drop_fraglist(struct sk_buff *skb)
--{
-- skb_drop_list(&skb_shinfo(skb)->frag_list);
--}
--
--static void skb_clone_fraglist(struct sk_buff *skb)
--{
-- struct sk_buff *list;
--
-- for (list = skb_shinfo(skb)->frag_list; list; list = list->next)
-- skb_get(list);
--}
--
--static void skb_release_data(struct sk_buff *skb)
--{
-- if (!skb->cloned ||
-- !atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1,
-- &skb_shinfo(skb)->dataref)) {
-- if (skb_shinfo(skb)->nr_frags) {
-- int i;
-- for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
-- put_page(skb_shinfo(skb)->frags[i].page);
-- }
--
-- if (skb_shinfo(skb)->frag_list)
-- skb_drop_fraglist(skb);
--
-- kfree(skb->head);
-- }
--}
--
--/*
-- * Free an skbuff by memory without cleaning the state.
-- */
--static void kfree_skbmem(struct sk_buff *skb)
--{
-- struct sk_buff *other;
-- atomic_t *fclone_ref;
--
-- switch (skb->fclone) {
-- case SKB_FCLONE_UNAVAILABLE:
-- kmem_cache_free(skbuff_head_cache, skb);
-- break;
--
-- case SKB_FCLONE_ORIG:
-- fclone_ref = (atomic_t *) (skb + 2);
-- if (atomic_dec_and_test(fclone_ref))
-- kmem_cache_free(skbuff_fclone_cache, skb);
-- break;
--
-- case SKB_FCLONE_CLONE:
-- fclone_ref = (atomic_t *) (skb + 1);
-- other = skb - 1;
--
-- /* The clone portion is available for
-- * fast-cloning again.
-- */
-- skb->fclone = SKB_FCLONE_UNAVAILABLE;
--
-- if (atomic_dec_and_test(fclone_ref))
-- kmem_cache_free(skbuff_fclone_cache, other);
-- break;
-- }
--}
--
--/* Free everything but the sk_buff shell. */
--static void skb_release_all(struct sk_buff *skb)
--{
-- dst_release(skb->dst);
--#ifdef CONFIG_XFRM
-- secpath_put(skb->sp);
--#endif
-- if (skb->destructor) {
-- WARN_ON(in_irq());
-- skb->destructor(skb);
-- }
--#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
-- nf_conntrack_put(skb->nfct);
-- nf_conntrack_put_reasm(skb->nfct_reasm);
--#endif
--#ifdef CONFIG_BRIDGE_NETFILTER
-- nf_bridge_put(skb->nf_bridge);
--#endif
--/* XXX: IS this still necessary? - JHS */
--#ifdef CONFIG_NET_SCHED
-- skb->tc_index = 0;
--#ifdef CONFIG_NET_CLS_ACT
-- skb->tc_verd = 0;
--#endif
--#endif
-- skb_release_data(skb);
--}
--
--/**
-- * __kfree_skb - private function
-- * @skb: buffer
-- *
-- * Free an sk_buff. Release anything attached to the buffer.
-- * Clean the state. This is an internal helper function. Users should
-- * always call kfree_skb
-- */
--
--void __kfree_skb(struct sk_buff *skb)
--{
-- skb_release_all(skb);
-- kfree_skbmem(skb);
--}
--
--/**
-- * kfree_skb - free an sk_buff
-- * @skb: buffer to free
-- *
-- * Drop a reference to the buffer and free it if the usage count has
-- * hit zero.
-- */
--void kfree_skb(struct sk_buff *skb)
--{
-- if (unlikely(!skb))
-- return;
-- if (likely(atomic_read(&skb->users) == 1))
-- smp_rmb();
-- else if (likely(!atomic_dec_and_test(&skb->users)))
-- return;
-- __kfree_skb(skb);
--}
--
--static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
--{
-- new->tstamp = old->tstamp;
-- new->dev = old->dev;
-- new->transport_header = old->transport_header;
-- new->network_header = old->network_header;
-- new->mac_header = old->mac_header;
-- new->dst = dst_clone(old->dst);
--#ifdef CONFIG_INET
-- new->sp = secpath_get(old->sp);
--#endif
-- memcpy(new->cb, old->cb, sizeof(old->cb));
-- new->csum_start = old->csum_start;
-- new->csum_offset = old->csum_offset;
-- new->local_df = old->local_df;
-- new->pkt_type = old->pkt_type;
-- new->ip_summed = old->ip_summed;
-- skb_copy_queue_mapping(new, old);
-- new->priority = old->priority;
--#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
-- new->ipvs_property = old->ipvs_property;
--#endif
-- new->protocol = old->protocol;
-- new->mark = old->mark;
-- __nf_copy(new, old);
--#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
-- defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
-- new->nf_trace = old->nf_trace;
--#endif
--#ifdef CONFIG_NET_SCHED
-- new->tc_index = old->tc_index;
--#ifdef CONFIG_NET_CLS_ACT
-- new->tc_verd = old->tc_verd;
--#endif
--#endif
-- new->vlan_tci = old->vlan_tci;
--
-- skb_copy_secmark(new, old);
--}
--
--static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
--{
--#define C(x) n->x = skb->x
--
-- n->next = n->prev = NULL;
-- n->sk = NULL;
-- __copy_skb_header(n, skb);
--
-- C(len);
-- C(data_len);
-- C(mac_len);
-- n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len;
-- n->cloned = 1;
-- n->nohdr = 0;
-- n->destructor = NULL;
-- C(iif);
-- C(tail);
-- C(end);
-- C(head);
-- C(data);
-- C(truesize);
--#if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE)
-- C(do_not_encrypt);
--#endif
-- atomic_set(&n->users, 1);
--
-- atomic_inc(&(skb_shinfo(skb)->dataref));
-- skb->cloned = 1;
--
-- return n;
--#undef C
--}
--
--/**
-- * skb_morph - morph one skb into another
-- * @dst: the skb to receive the contents
-- * @src: the skb to supply the contents
-- *
-- * This is identical to skb_clone except that the target skb is
-- * supplied by the user.
-- *
-- * The target skb is returned upon exit.
-- */
--struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src)
--{
-- skb_release_all(dst);
-- return __skb_clone(dst, src);
--}
--EXPORT_SYMBOL_GPL(skb_morph);
--
--/**
-- * skb_clone - duplicate an sk_buff
-- * @skb: buffer to clone
-- * @gfp_mask: allocation priority
-- *
-- * Duplicate an &sk_buff. The new one is not owned by a socket. Both
-- * copies share the same packet data but not structure. The new
-- * buffer has a reference count of 1. If the allocation fails the
-- * function returns %NULL otherwise the new buffer is returned.
-- *
-- * If this function is called from an interrupt gfp_mask() must be
-- * %GFP_ATOMIC.
-- */
--
--struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
--{
-- struct sk_buff *n;
--
-- n = skb + 1;
-- if (skb->fclone == SKB_FCLONE_ORIG &&
-- n->fclone == SKB_FCLONE_UNAVAILABLE) {
-- atomic_t *fclone_ref = (atomic_t *) (n + 1);
-- n->fclone = SKB_FCLONE_CLONE;
-- atomic_inc(fclone_ref);
-- } else {
-- n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
-- if (!n)
-- return NULL;
-- n->fclone = SKB_FCLONE_UNAVAILABLE;
-- }
--
-- return __skb_clone(n, skb);
--}
--
--static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
--{
--#ifndef NET_SKBUFF_DATA_USES_OFFSET
-- /*
-- * Shift between the two data areas in bytes
-- */
-- unsigned long offset = new->data - old->data;
--#endif
--
-- __copy_skb_header(new, old);
--
--#ifndef NET_SKBUFF_DATA_USES_OFFSET
-- /* {transport,network,mac}_header are relative to skb->head */
-- new->transport_header += offset;
-- new->network_header += offset;
-- new->mac_header += offset;
--#endif
-- skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size;
-- skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs;
-- skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type;
--}
--
--/**
-- * skb_copy - create private copy of an sk_buff
-- * @skb: buffer to copy
-- * @gfp_mask: allocation priority
-- *
-- * Make a copy of both an &sk_buff and its data. This is used when the
-- * caller wishes to modify the data and needs a private copy of the
-- * data to alter. Returns %NULL on failure or the pointer to the buffer
-- * on success. The returned buffer has a reference count of 1.
-- *
-- * As by-product this function converts non-linear &sk_buff to linear
-- * one, so that &sk_buff becomes completely private and caller is allowed
-- * to modify all the data of returned buffer. This means that this
-- * function is not recommended for use in circumstances when only
-- * header is going to be modified. Use pskb_copy() instead.
-- */
--
--struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
--{
-- int headerlen = skb->data - skb->head;
-- /*
-- * Allocate the copy buffer
-- */
-- struct sk_buff *n;
--#ifdef NET_SKBUFF_DATA_USES_OFFSET
-- n = alloc_skb(skb->end + skb->data_len, gfp_mask);
--#else
-- n = alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask);
--#endif
-- if (!n)
-- return NULL;
--
-- /* Set the data pointer */
-- skb_reserve(n, headerlen);
-- /* Set the tail pointer and length */
-- skb_put(n, skb->len);
--
-- if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len))
-- BUG();
--
-- copy_skb_header(n, skb);
-- return n;
--}
--
--
--/**
-- * pskb_copy - create copy of an sk_buff with private head.
-- * @skb: buffer to copy
-- * @gfp_mask: allocation priority
-- *
-- * Make a copy of both an &sk_buff and part of its data, located
-- * in header. Fragmented data remain shared. This is used when
-- * the caller wishes to modify only header of &sk_buff and needs
-- * private copy of the header to alter. Returns %NULL on failure
-- * or the pointer to the buffer on success.
-- * The returned buffer has a reference count of 1.
-- */
--
--struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
--{
-- /*
-- * Allocate the copy buffer
-- */
-- struct sk_buff *n;
--#ifdef NET_SKBUFF_DATA_USES_OFFSET
-- n = alloc_skb(skb->end, gfp_mask);
--#else
-- n = alloc_skb(skb->end - skb->head, gfp_mask);
--#endif
-- if (!n)
-- goto out;
--
-- /* Set the data pointer */
-- skb_reserve(n, skb->data - skb->head);
-- /* Set the tail pointer and length */
-- skb_put(n, skb_headlen(skb));
-- /* Copy the bytes */
-- skb_copy_from_linear_data(skb, n->data, n->len);
--
-- n->truesize += skb->data_len;
-- n->data_len = skb->data_len;
-- n->len = skb->len;
--
-- if (skb_shinfo(skb)->nr_frags) {
-- int i;
--
-- for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-- skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
-- get_page(skb_shinfo(n)->frags[i].page);
-- }
-- skb_shinfo(n)->nr_frags = i;
-- }
--
-- if (skb_shinfo(skb)->frag_list) {
-- skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
-- skb_clone_fraglist(n);
-- }
--
-- copy_skb_header(n, skb);
--out:
-- return n;
--}
--
--/**
-- * pskb_expand_head - reallocate header of &sk_buff
-- * @skb: buffer to reallocate
-- * @nhead: room to add at head
-- * @ntail: room to add at tail
-- * @gfp_mask: allocation priority
-- *
-- * Expands (or creates identical copy, if &nhead and &ntail are zero)
-- * header of skb. &sk_buff itself is not changed. &sk_buff MUST have
-- * reference count of 1. Returns zero in the case of success or error,
-- * if expansion failed. In the last case, &sk_buff is not changed.
-- *
-- * All the pointers pointing into skb header may change and must be
-- * reloaded after call to this function.
-- */
--
--int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
-- gfp_t gfp_mask)
--{
-- int i;
-- u8 *data;
--#ifdef NET_SKBUFF_DATA_USES_OFFSET
-- int size = nhead + skb->end + ntail;
--#else
-- int size = nhead + (skb->end - skb->head) + ntail;
--#endif
-- long off;
--
-- if (skb_shared(skb))
-- BUG();
--
-- size = SKB_DATA_ALIGN(size);
--
-- data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
-- if (!data)
-- goto nodata;
--
-- /* Copy only real data... and, alas, header. This should be
-- * optimized for the cases when header is void. */
--#ifdef NET_SKBUFF_DATA_USES_OFFSET
-- memcpy(data + nhead, skb->head, skb->tail);
--#else
-- memcpy(data + nhead, skb->head, skb->tail - skb->head);
--#endif
-- memcpy(data + size, skb_end_pointer(skb),
-- sizeof(struct skb_shared_info));
--
-- for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
-- get_page(skb_shinfo(skb)->frags[i].page);
--
-- if (skb_shinfo(skb)->frag_list)
-- skb_clone_fraglist(skb);
--
-- skb_release_data(skb);
--
-- off = (data + nhead) - skb->head;
--
-- skb->head = data;
-- skb->data += off;
--#ifdef NET_SKBUFF_DATA_USES_OFFSET
-- skb->end = size;
-- off = nhead;
--#else
-- skb->end = skb->head + size;
--#endif
-- /* {transport,network,mac}_header and tail are relative to skb->head */
-- skb->tail += off;
-- skb->transport_header += off;
-- skb->network_header += off;
-- skb->mac_header += off;
-- skb->csum_start += nhead;
-- skb->cloned = 0;
-- skb->hdr_len = 0;
-- skb->nohdr = 0;
-- atomic_set(&skb_shinfo(skb)->dataref, 1);
-- return 0;
--
--nodata:
-- return -ENOMEM;
--}
--
--/* Make private copy of skb with writable head and some headroom */
--
--struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
--{
-- struct sk_buff *skb2;
-- int delta = headroom - skb_headroom(skb);
--
-- if (delta <= 0)
-- skb2 = pskb_copy(skb, GFP_ATOMIC);
-- else {
-- skb2 = skb_clone(skb, GFP_ATOMIC);
-- if (skb2 && pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0,
-- GFP_ATOMIC)) {
-- kfree_skb(skb2);
-- skb2 = NULL;
-- }
-- }
-- return skb2;
--}
--
--
--/**
-- * skb_copy_expand - copy and expand sk_buff
-- * @skb: buffer to copy
-- * @newheadroom: new free bytes at head
-- * @newtailroom: new free bytes at tail
-- * @gfp_mask: allocation priority
-- *
-- * Make a copy of both an &sk_buff and its data and while doing so
-- * allocate additional space.
-- *
-- * This is used when the caller wishes to modify the data and needs a
-- * private copy of the data to alter as well as more space for new fields.
-- * Returns %NULL on failure or the pointer to the buffer
-- * on success. The returned buffer has a reference count of 1.
-- *
-- * You must pass %GFP_ATOMIC as the allocation priority if this function
-- * is called from an interrupt.
-- */
--struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
-- int newheadroom, int newtailroom,
-- gfp_t gfp_mask)
--{
-- /*
-- * Allocate the copy buffer
-- */
-- struct sk_buff *n = alloc_skb(newheadroom + skb->len + newtailroom,
-- gfp_mask);
-- int oldheadroom = skb_headroom(skb);
-- int head_copy_len, head_copy_off;
-- int off;
--
-- if (!n)
-- return NULL;
--
-- skb_reserve(n, newheadroom);
--
-- /* Set the tail pointer and length */
-- skb_put(n, skb->len);
--
-- head_copy_len = oldheadroom;
-- head_copy_off = 0;
-- if (newheadroom <= head_copy_len)
-- head_copy_len = newheadroom;
-- else
-- head_copy_off = newheadroom - head_copy_len;
--
-- /* Copy the linear header and data. */
-- if (skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off,
-- skb->len + head_copy_len))
-- BUG();
--
-- copy_skb_header(n, skb);
--
-- off = newheadroom - oldheadroom;
-- n->csum_start += off;
--#ifdef NET_SKBUFF_DATA_USES_OFFSET
-- n->transport_header += off;
-- n->network_header += off;
-- n->mac_header += off;
--#endif
--
-- return n;
--}
--
--/**
-- * skb_pad - zero pad the tail of an skb
-- * @skb: buffer to pad
-- * @pad: space to pad
-- *
-- * Ensure that a buffer is followed by a padding area that is zero
-- * filled. Used by network drivers which may DMA or transfer data
-- * beyond the buffer end onto the wire.
-- *
-- * May return error in out of memory cases. The skb is freed on error.
-- */
--
--int skb_pad(struct sk_buff *skb, int pad)
--{
-- int err;
-- int ntail;
--
-- /* If the skbuff is non linear tailroom is always zero.. */
-- if (!skb_cloned(skb) && skb_tailroom(skb) >= pad) {
-- memset(skb->data+skb->len, 0, pad);
-- return 0;
-- }
--
-- ntail = skb->data_len + pad - (skb->end - skb->tail);
-- if (likely(skb_cloned(skb) || ntail > 0)) {
-- err = pskb_expand_head(skb, 0, ntail, GFP_ATOMIC);
-- if (unlikely(err))
-- goto free_skb;
-- }
--
-- /* FIXME: The use of this function with non-linear skb's really needs
-- * to be audited.
-- */
-- err = skb_linearize(skb);
-- if (unlikely(err))
-- goto free_skb;
--
-- memset(skb->data + skb->len, 0, pad);
-- return 0;
--
--free_skb:
-- kfree_skb(skb);
-- return err;
--}
--
--/**
-- * skb_put - add data to a buffer
-- * @skb: buffer to use
-- * @len: amount of data to add
-- *
-- * This function extends the used data area of the buffer. If this would
-- * exceed the total buffer size the kernel will panic. A pointer to the
-- * first byte of the extra data is returned.
-- */
--unsigned char *skb_put(struct sk_buff *skb, unsigned int len)
--{
-- unsigned char *tmp = skb_tail_pointer(skb);
-- SKB_LINEAR_ASSERT(skb);
-- skb->tail += len;
-- skb->len += len;
-- if (unlikely(skb->tail > skb->end))
-- skb_over_panic(skb, len, __builtin_return_address(0));
-- return tmp;
--}
--EXPORT_SYMBOL(skb_put);
--
--/**
-- * skb_push - add data to the start of a buffer
-- * @skb: buffer to use
-- * @len: amount of data to add
-- *
-- * This function extends the used data area of the buffer at the buffer
-- * start. If this would exceed the total buffer headroom the kernel will
-- * panic. A pointer to the first byte of the extra data is returned.
-- */
--unsigned char *skb_push(struct sk_buff *skb, unsigned int len)
--{
-- skb->data -= len;
-- skb->len += len;
-- if (unlikely(skb->data<skb->head))
-- skb_under_panic(skb, len, __builtin_return_address(0));
-- return skb->data;
--}
--EXPORT_SYMBOL(skb_push);
--
--/**
-- * skb_pull - remove data from the start of a buffer
-- * @skb: buffer to use
-- * @len: amount of data to remove
-- *
-- * This function removes data from the start of a buffer, returning
-- * the memory to the headroom. A pointer to the next data in the buffer
-- * is returned. Once the data has been pulled future pushes will overwrite
-- * the old data.
-- */
--unsigned char *skb_pull(struct sk_buff *skb, unsigned int len)
--{
-- return unlikely(len > skb->len) ? NULL : __skb_pull(skb, len);
--}
--EXPORT_SYMBOL(skb_pull);
--
--/**
-- * skb_trim - remove end from a buffer
-- * @skb: buffer to alter
-- * @len: new length
-- *
-- * Cut the length of a buffer down by removing data from the tail. If
-- * the buffer is already under the length specified it is not modified.
-- * The skb must be linear.
-- */
--void skb_trim(struct sk_buff *skb, unsigned int len)
--{
-- if (skb->len > len)
-- __skb_trim(skb, len);
--}
--EXPORT_SYMBOL(skb_trim);
--
--/* Trims skb to length len. It can change skb pointers.
-- */
--
--int ___pskb_trim(struct sk_buff *skb, unsigned int len)
--{
-- struct sk_buff **fragp;
-- struct sk_buff *frag;
-- int offset = skb_headlen(skb);
-- int nfrags = skb_shinfo(skb)->nr_frags;
-- int i;
-- int err;
--
-- if (skb_cloned(skb) &&
-- unlikely((err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))))
-- return err;
--
-- i = 0;
-- if (offset >= len)
-- goto drop_pages;
--
-- for (; i < nfrags; i++) {
-- int end = offset + skb_shinfo(skb)->frags[i].size;
--
-- if (end < len) {
-- offset = end;
-- continue;
-- }
--
-- skb_shinfo(skb)->frags[i++].size = len - offset;
--
--drop_pages:
-- skb_shinfo(skb)->nr_frags = i;
--
-- for (; i < nfrags; i++)
-- put_page(skb_shinfo(skb)->frags[i].page);
--
-- if (skb_shinfo(skb)->frag_list)
-- skb_drop_fraglist(skb);
-- goto done;
-- }
--
-- for (fragp = &skb_shinfo(skb)->frag_list; (frag = *fragp);
-- fragp = &frag->next) {
-- int end = offset + frag->len;
--
-- if (skb_shared(frag)) {
-- struct sk_buff *nfrag;
--
-- nfrag = skb_clone(frag, GFP_ATOMIC);
-- if (unlikely(!nfrag))
-- return -ENOMEM;
--
-- nfrag->next = frag->next;
-- kfree_skb(frag);
-- frag = nfrag;
-- *fragp = frag;
-- }
--
-- if (end < len) {
-- offset = end;
-- continue;
-- }
--
-- if (end > len &&
-- unlikely((err = pskb_trim(frag, len - offset))))
-- return err;
--
-- if (frag->next)
-- skb_drop_list(&frag->next);
-- break;
-- }
--
--done:
-- if (len > skb_headlen(skb)) {
-- skb->data_len -= skb->len - len;
-- skb->len = len;
-- } else {
-- skb->len = len;
-- skb->data_len = 0;
-- skb_set_tail_pointer(skb, len);
-- }
--
-- return 0;
--}
--
--/**
-- * __pskb_pull_tail - advance tail of skb header
-- * @skb: buffer to reallocate
-- * @delta: number of bytes to advance tail
-- *
-- * The function makes a sense only on a fragmented &sk_buff,
-- * it expands header moving its tail forward and copying necessary
-- * data from fragmented part.
-- *
-- * &sk_buff MUST have reference count of 1.
-- *
-- * Returns %NULL (and &sk_buff does not change) if pull failed
-- * or value of new tail of skb in the case of success.
-- *
-- * All the pointers pointing into skb header may change and must be
-- * reloaded after call to this function.
-- */
--
--/* Moves tail of skb head forward, copying data from fragmented part,
-- * when it is necessary.
-- * 1. It may fail due to malloc failure.
-- * 2. It may change skb pointers.
-- *
-- * It is pretty complicated. Luckily, it is called only in exceptional cases.
-- */
--unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta)
--{
-- /* If skb has not enough free space at tail, get new one
-- * plus 128 bytes for future expansions. If we have enough
-- * room at tail, reallocate without expansion only if skb is cloned.
-- */
-- int i, k, eat = (skb->tail + delta) - skb->end;
--
-- if (eat > 0 || skb_cloned(skb)) {
-- if (pskb_expand_head(skb, 0, eat > 0 ? eat + 128 : 0,
-- GFP_ATOMIC))
-- return NULL;
-- }
--
-- if (skb_copy_bits(skb, skb_headlen(skb), skb_tail_pointer(skb), delta))
-- BUG();
--
-- /* Optimization: no fragments, no reasons to preestimate
-- * size of pulled pages. Superb.
-- */
-- if (!skb_shinfo(skb)->frag_list)
-- goto pull_pages;
--
-- /* Estimate size of pulled pages. */
-- eat = delta;
-- for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-- if (skb_shinfo(skb)->frags[i].size >= eat)
-- goto pull_pages;
-- eat -= skb_shinfo(skb)->frags[i].size;
-- }
--
-- /* If we need update frag list, we are in troubles.
-- * Certainly, it possible to add an offset to skb data,
-- * but taking into account that pulling is expected to
-- * be very rare operation, it is worth to fight against
-- * further bloating skb head and crucify ourselves here instead.
-- * Pure masohism, indeed. 8)8)
-- */
-- if (eat) {
-- struct sk_buff *list = skb_shinfo(skb)->frag_list;
-- struct sk_buff *clone = NULL;
-- struct sk_buff *insp = NULL;
--
-- do {
-- BUG_ON(!list);
--
-- if (list->len <= eat) {
-- /* Eaten as whole. */
-- eat -= list->len;
-- list = list->next;
-- insp = list;
-- } else {
-- /* Eaten partially. */
--
-- if (skb_shared(list)) {
-- /* Sucks! We need to fork list. :-( */
-- clone = skb_clone(list, GFP_ATOMIC);
-- if (!clone)
-- return NULL;
-- insp = list->next;
-- list = clone;
-- } else {
-- /* This may be pulled without
-- * problems. */
-- insp = list;
-- }
-- if (!pskb_pull(list, eat)) {
-- if (clone)
-- kfree_skb(clone);
-- return NULL;
-- }
-- break;
-- }
-- } while (eat);
--
-- /* Free pulled out fragments. */
-- while ((list = skb_shinfo(skb)->frag_list) != insp) {
-- skb_shinfo(skb)->frag_list = list->next;
-- kfree_skb(list);
-- }
-- /* And insert new clone at head. */
-- if (clone) {
-- clone->next = list;
-- skb_shinfo(skb)->frag_list = clone;
-- }
-- }
-- /* Success! Now we may commit changes to skb data. */
--
--pull_pages:
-- eat = delta;
-- k = 0;
-- for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-- if (skb_shinfo(skb)->frags[i].size <= eat) {
-- put_page(skb_shinfo(skb)->frags[i].page);
-- eat -= skb_shinfo(skb)->frags[i].size;
-- } else {
-- skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
-- if (eat) {
-- skb_shinfo(skb)->frags[k].page_offset += eat;
-- skb_shinfo(skb)->frags[k].size -= eat;
-- eat = 0;
-- }
-- k++;
-- }
-- }
-- skb_shinfo(skb)->nr_frags = k;
--
-- skb->tail += delta;
-- skb->data_len -= delta;
--
-- return skb_tail_pointer(skb);
--}
--
--/* Copy some data bits from skb to kernel buffer. */
--
--int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
--{
-- int i, copy;
-- int start = skb_headlen(skb);
--
-- if (offset > (int)skb->len - len)
-- goto fault;
--
-- /* Copy header. */
-- if ((copy = start - offset) > 0) {
-- if (copy > len)
-- copy = len;
-- skb_copy_from_linear_data_offset(skb, offset, to, copy);
-- if ((len -= copy) == 0)
-- return 0;
-- offset += copy;
-- to += copy;
-- }
--
-- for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-- int end;
--
-- WARN_ON(start > offset + len);
--
-- end = start + skb_shinfo(skb)->frags[i].size;
-- if ((copy = end - offset) > 0) {
-- u8 *vaddr;
--
-- if (copy > len)
-- copy = len;
--
-- vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
-- memcpy(to,
-- vaddr + skb_shinfo(skb)->frags[i].page_offset+
-- offset - start, copy);
-- kunmap_skb_frag(vaddr);
--
-- if ((len -= copy) == 0)
-- return 0;
-- offset += copy;
-- to += copy;
-- }
-- start = end;
-- }
--
-- if (skb_shinfo(skb)->frag_list) {
-- struct sk_buff *list = skb_shinfo(skb)->frag_list;
--
-- for (; list; list = list->next) {
-- int end;
--
-- WARN_ON(start > offset + len);
--
-- end = start + list->len;
-- if ((copy = end - offset) > 0) {
-- if (copy > len)
-- copy = len;
-- if (skb_copy_bits(list, offset - start,
-- to, copy))
-- goto fault;
-- if ((len -= copy) == 0)
-- return 0;
-- offset += copy;
-- to += copy;
-- }
-- start = end;
-- }
-- }
-- if (!len)
-- return 0;
--
--fault:
-- return -EFAULT;
--}
--
--/*
-- * Callback from splice_to_pipe(), if we need to release some pages
-- * at the end of the spd in case we error'ed out in filling the pipe.
-- */
--static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
--{
-- put_page(spd->pages[i]);
--}
--
--static inline struct page *linear_to_page(struct page *page, unsigned int len,
-- unsigned int offset)
--{
-- struct page *p = alloc_pages(GFP_KERNEL, 0);
--
-- if (!p)
-- return NULL;
-- memcpy(page_address(p) + offset, page_address(page) + offset, len);
--
-- return p;
--}
--
--/*
-- * Fill page/offset/length into spd, if it can hold more pages.
-- */
--static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page,
-- unsigned int len, unsigned int offset,
-- struct sk_buff *skb, int linear)
--{
-- if (unlikely(spd->nr_pages == PIPE_BUFFERS))
-- return 1;
--
-- if (linear) {
-- page = linear_to_page(page, len, offset);
-- if (!page)
-- return 1;
-- } else
-- get_page(page);
--
-- spd->pages[spd->nr_pages] = page;
-- spd->partial[spd->nr_pages].len = len;
-- spd->partial[spd->nr_pages].offset = offset;
-- spd->nr_pages++;
--
-- return 0;
--}
--
--static inline void __segment_seek(struct page **page, unsigned int *poff,
-- unsigned int *plen, unsigned int off)
--{
-- *poff += off;
-- *page += *poff / PAGE_SIZE;
-- *poff = *poff % PAGE_SIZE;
-- *plen -= off;
--}
--
--static inline int __splice_segment(struct page *page, unsigned int poff,
-- unsigned int plen, unsigned int *off,
-- unsigned int *len, struct sk_buff *skb,
-- struct splice_pipe_desc *spd, int linear)
--{
-- if (!*len)
-- return 1;
--
-- /* skip this segment if already processed */
-- if (*off >= plen) {
-- *off -= plen;
-- return 0;
-- }
--
-- /* ignore any bits we already processed */
-- if (*off) {
-- __segment_seek(&page, &poff, &plen, *off);
-- *off = 0;
-- }
--
-- do {
-- unsigned int flen = min(*len, plen);
--
-- /* the linear region may spread across several pages */
-- flen = min_t(unsigned int, flen, PAGE_SIZE - poff);
--
-- if (spd_fill_page(spd, page, flen, poff, skb, linear))
-- return 1;
--
-- __segment_seek(&page, &poff, &plen, flen);
-- *len -= flen;
--
-- } while (*len && plen);
--
-- return 0;
--}
--
--/*
-- * Map linear and fragment data from the skb to spd. It reports failure if the
-- * pipe is full or if we already spliced the requested length.
-- */
--static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
-- unsigned int *len,
-- struct splice_pipe_desc *spd)
--{
-- int seg;
--
-- /*
-- * map the linear part
-- */
-- if (__splice_segment(virt_to_page(skb->data),
-- (unsigned long) skb->data & (PAGE_SIZE - 1),
-- skb_headlen(skb),
-- offset, len, skb, spd, 1))
-- return 1;
--
-- /*
-- * then map the fragments
-- */
-- for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) {
-- const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
--
-- if (__splice_segment(f->page, f->page_offset, f->size,
-- offset, len, skb, spd, 0))
-- return 1;
-- }
--
-- return 0;
--}
--
--/*
-- * Map data from the skb to a pipe. Should handle both the linear part,
-- * the fragments, and the frag list. It does NOT handle frag lists within
-- * the frag list, if such a thing exists. We'd probably need to recurse to
-- * handle that cleanly.
-- */
--int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
-- struct pipe_inode_info *pipe, unsigned int tlen,
-- unsigned int flags)
--{
-- struct partial_page partial[PIPE_BUFFERS];
-- struct page *pages[PIPE_BUFFERS];
-- struct splice_pipe_desc spd = {
-- .pages = pages,
-- .partial = partial,
-- .flags = flags,
-- .ops = &sock_pipe_buf_ops,
-- .spd_release = sock_spd_release,
-- };
--
-- /*
-- * __skb_splice_bits() only fails if the output has no room left,
-- * so no point in going over the frag_list for the error case.
-- */
-- if (__skb_splice_bits(skb, &offset, &tlen, &spd))
-- goto done;
-- else if (!tlen)
-- goto done;
--
-- /*
-- * now see if we have a frag_list to map
-- */
-- if (skb_shinfo(skb)->frag_list) {
-- struct sk_buff *list = skb_shinfo(skb)->frag_list;
--
-- for (; list && tlen; list = list->next) {
-- if (__skb_splice_bits(list, &offset, &tlen, &spd))
-- break;
-- }
-- }
--
--done:
-- if (spd.nr_pages) {
-- struct sock *sk = skb->sk;
-- int ret;
--
-- /*
-- * Drop the socket lock, otherwise we have reverse
-- * locking dependencies between sk_lock and i_mutex
-- * here as compared to sendfile(). We enter here
-- * with the socket lock held, and splice_to_pipe() will
-- * grab the pipe inode lock. For sendfile() emulation,
-- * we call into ->sendpage() with the i_mutex lock held
-- * and networking will grab the socket lock.
-- */
-- release_sock(sk);
-- ret = splice_to_pipe(pipe, &spd);
-- lock_sock(sk);
-- return ret;
-- }
--
-- return 0;
--}
--
--/**
-- * skb_store_bits - store bits from kernel buffer to skb
-- * @skb: destination buffer
-- * @offset: offset in destination
-- * @from: source buffer
-- * @len: number of bytes to copy
-- *
-- * Copy the specified number of bytes from the source buffer to the
-- * destination skb. This function handles all the messy bits of
-- * traversing fragment lists and such.
-- */
--
--int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len)
--{
-- int i, copy;
-- int start = skb_headlen(skb);
--
-- if (offset > (int)skb->len - len)
-- goto fault;
--
-- if ((copy = start - offset) > 0) {
-- if (copy > len)
-- copy = len;
-- skb_copy_to_linear_data_offset(skb, offset, from, copy);
-- if ((len -= copy) == 0)
-- return 0;
-- offset += copy;
-- from += copy;
-- }
--
-- for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-- skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
-- int end;
--
-- WARN_ON(start > offset + len);
--
-- end = start + frag->size;
-- if ((copy = end - offset) > 0) {
-- u8 *vaddr;
--
-- if (copy > len)
-- copy = len;
--
-- vaddr = kmap_skb_frag(frag);
-- memcpy(vaddr + frag->page_offset + offset - start,
-- from, copy);
-- kunmap_skb_frag(vaddr);
--
-- if ((len -= copy) == 0)
-- return 0;
-- offset += copy;
-- from += copy;
-- }
-- start = end;
-- }
--
-- if (skb_shinfo(skb)->frag_list) {
-- struct sk_buff *list = skb_shinfo(skb)->frag_list;
--
-- for (; list; list = list->next) {
-- int end;
--
-- WARN_ON(start > offset + len);
--
-- end = start + list->len;
-- if ((copy = end - offset) > 0) {
-- if (copy > len)
-- copy = len;
-- if (skb_store_bits(list, offset - start,
-- from, copy))
-- goto fault;
-- if ((len -= copy) == 0)
-- return 0;
-- offset += copy;
-- from += copy;
-- }
-- start = end;
-- }
-- }
-- if (!len)
-- return 0;
--
--fault:
-- return -EFAULT;
--}
--
--EXPORT_SYMBOL(skb_store_bits);
--
--/* Checksum skb data. */
--
--__wsum skb_checksum(const struct sk_buff *skb, int offset,
-- int len, __wsum csum)
--{
-- int start = skb_headlen(skb);
-- int i, copy = start - offset;
-- int pos = 0;
--
-- /* Checksum header. */
-- if (copy > 0) {
-- if (copy > len)
-- copy = len;
-- csum = csum_partial(skb->data + offset, copy, csum);
-- if ((len -= copy) == 0)
-- return csum;
-- offset += copy;
-- pos = copy;
-- }
--
-- for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-- int end;
--
-- WARN_ON(start > offset + len);
--
-- end = start + skb_shinfo(skb)->frags[i].size;
-- if ((copy = end - offset) > 0) {
-- __wsum csum2;
-- u8 *vaddr;
-- skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
--
-- if (copy > len)
-- copy = len;
-- vaddr = kmap_skb_frag(frag);
-- csum2 = csum_partial(vaddr + frag->page_offset +
-- offset - start, copy, 0);
-- kunmap_skb_frag(vaddr);
-- csum = csum_block_add(csum, csum2, pos);
-- if (!(len -= copy))
-- return csum;
-- offset += copy;
-- pos += copy;
-- }
-- start = end;
-- }
--
-- if (skb_shinfo(skb)->frag_list) {
-- struct sk_buff *list = skb_shinfo(skb)->frag_list;
--
-- for (; list; list = list->next) {
-- int end;
--
-- WARN_ON(start > offset + len);
--
-- end = start + list->len;
-- if ((copy = end - offset) > 0) {
-- __wsum csum2;
-- if (copy > len)
-- copy = len;
-- csum2 = skb_checksum(list, offset - start,
-- copy, 0);
-- csum = csum_block_add(csum, csum2, pos);
-- if ((len -= copy) == 0)
-- return csum;
-- offset += copy;
-- pos += copy;
-- }
-- start = end;
-- }
-- }
-- BUG_ON(len);
--
-- return csum;
--}
--
--/* Both of above in one bottle. */
--
--__wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
-- u8 *to, int len, __wsum csum)
--{
-- int start = skb_headlen(skb);
-- int i, copy = start - offset;
-- int pos = 0;
--
-- /* Copy header. */
-- if (copy > 0) {
-- if (copy > len)
-- copy = len;
-- csum = csum_partial_copy_nocheck(skb->data + offset, to,
-- copy, csum);
-- if ((len -= copy) == 0)
-- return csum;
-- offset += copy;
-- to += copy;
-- pos = copy;
-- }
--
-- for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-- int end;
--
-- WARN_ON(start > offset + len);
--
-- end = start + skb_shinfo(skb)->frags[i].size;
-- if ((copy = end - offset) > 0) {
-- __wsum csum2;
-- u8 *vaddr;
-- skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
--
-- if (copy > len)
-- copy = len;
-- vaddr = kmap_skb_frag(frag);
-- csum2 = csum_partial_copy_nocheck(vaddr +
-- frag->page_offset +
-- offset - start, to,
-- copy, 0);
-- kunmap_skb_frag(vaddr);
-- csum = csum_block_add(csum, csum2, pos);
-- if (!(len -= copy))
-- return csum;
-- offset += copy;
-- to += copy;
-- pos += copy;
-- }
-- start = end;
-- }
--
-- if (skb_shinfo(skb)->frag_list) {
-- struct sk_buff *list = skb_shinfo(skb)->frag_list;
--
-- for (; list; list = list->next) {
-- __wsum csum2;
-- int end;
--
-- WARN_ON(start > offset + len);
--
-- end = start + list->len;
-- if ((copy = end - offset) > 0) {
-- if (copy > len)
-- copy = len;
-- csum2 = skb_copy_and_csum_bits(list,
-- offset - start,
-- to, copy, 0);
-- csum = csum_block_add(csum, csum2, pos);
-- if ((len -= copy) == 0)
-- return csum;
-- offset += copy;
-- to += copy;
-- pos += copy;
-- }
-- start = end;
-- }
-- }
-- BUG_ON(len);
-- return csum;
--}
--
--void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
--{
-- __wsum csum;
-- long csstart;
--
-- if (skb->ip_summed == CHECKSUM_PARTIAL)
-- csstart = skb->csum_start - skb_headroom(skb);
-- else
-- csstart = skb_headlen(skb);
--
-- BUG_ON(csstart > skb_headlen(skb));
--
-- skb_copy_from_linear_data(skb, to, csstart);
--
-- csum = 0;
-- if (csstart != skb->len)
-- csum = skb_copy_and_csum_bits(skb, csstart, to + csstart,
-- skb->len - csstart, 0);
--
-- if (skb->ip_summed == CHECKSUM_PARTIAL) {
-- long csstuff = csstart + skb->csum_offset;
--
-- *((__sum16 *)(to + csstuff)) = csum_fold(csum);
-- }
--}
--
--/**
-- * skb_dequeue - remove from the head of the queue
-- * @list: list to dequeue from
-- *
-- * Remove the head of the list. The list lock is taken so the function
-- * may be used safely with other locking list functions. The head item is
-- * returned or %NULL if the list is empty.
-- */
--
--struct sk_buff *skb_dequeue(struct sk_buff_head *list)
--{
-- unsigned long flags;
-- struct sk_buff *result;
--
-- spin_lock_irqsave(&list->lock, flags);
-- result = __skb_dequeue(list);
-- spin_unlock_irqrestore(&list->lock, flags);
-- return result;
--}
--
--/**
-- * skb_dequeue_tail - remove from the tail of the queue
-- * @list: list to dequeue from
-- *
-- * Remove the tail of the list. The list lock is taken so the function
-- * may be used safely with other locking list functions. The tail item is
-- * returned or %NULL if the list is empty.
-- */
--struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list)
--{
-- unsigned long flags;
-- struct sk_buff *result;
--
-- spin_lock_irqsave(&list->lock, flags);
-- result = __skb_dequeue_tail(list);
-- spin_unlock_irqrestore(&list->lock, flags);
-- return result;
--}
--
--/**
-- * skb_queue_purge - empty a list
-- * @list: list to empty
-- *
-- * Delete all buffers on an &sk_buff list. Each buffer is removed from
-- * the list and one reference dropped. This function takes the list
-- * lock and is atomic with respect to other list locking functions.
-- */
--void skb_queue_purge(struct sk_buff_head *list)
--{
-- struct sk_buff *skb;
-- while ((skb = skb_dequeue(list)) != NULL)
-- kfree_skb(skb);
--}
--
--/**
-- * skb_queue_head - queue a buffer at the list head
-- * @list: list to use
-- * @newsk: buffer to queue
-- *
-- * Queue a buffer at the start of the list. This function takes the
-- * list lock and can be used safely with other locking &sk_buff functions
-- * safely.
-- *
-- * A buffer cannot be placed on two lists at the same time.
-- */
--void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk)
--{
-- unsigned long flags;
--
-- spin_lock_irqsave(&list->lock, flags);
-- __skb_queue_head(list, newsk);
-- spin_unlock_irqrestore(&list->lock, flags);
--}
--
--/**
-- * skb_queue_tail - queue a buffer at the list tail
-- * @list: list to use
-- * @newsk: buffer to queue
-- *
-- * Queue a buffer at the tail of the list. This function takes the
-- * list lock and can be used safely with other locking &sk_buff functions
-- * safely.
-- *
-- * A buffer cannot be placed on two lists at the same time.
-- */
--void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk)
--{
-- unsigned long flags;
--
-- spin_lock_irqsave(&list->lock, flags);
-- __skb_queue_tail(list, newsk);
-- spin_unlock_irqrestore(&list->lock, flags);
--}
--
--/**
-- * skb_unlink - remove a buffer from a list
-- * @skb: buffer to remove
-- * @list: list to use
-- *
-- * Remove a packet from a list. The list locks are taken and this
-- * function is atomic with respect to other list locked calls
-- *
-- * You must know what list the SKB is on.
-- */
--void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
--{
-- unsigned long flags;
--
-- spin_lock_irqsave(&list->lock, flags);
-- __skb_unlink(skb, list);
-- spin_unlock_irqrestore(&list->lock, flags);
--}
--
--/**
-- * skb_append - append a buffer
-- * @old: buffer to insert after
-- * @newsk: buffer to insert
-- * @list: list to use
-- *
-- * Place a packet after a given packet in a list. The list locks are taken
-- * and this function is atomic with respect to other list locked calls.
-- * A buffer cannot be placed on two lists at the same time.
-- */
--void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list)
--{
-- unsigned long flags;
--
-- spin_lock_irqsave(&list->lock, flags);
-- __skb_queue_after(list, old, newsk);
-- spin_unlock_irqrestore(&list->lock, flags);
--}
--
--
--/**
-- * skb_insert - insert a buffer
-- * @old: buffer to insert before
-- * @newsk: buffer to insert
-- * @list: list to use
-- *
-- * Place a packet before a given packet in a list. The list locks are
-- * taken and this function is atomic with respect to other list locked
-- * calls.
-- *
-- * A buffer cannot be placed on two lists at the same time.
-- */
--void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list)
--{
-- unsigned long flags;
--
-- spin_lock_irqsave(&list->lock, flags);
-- __skb_insert(newsk, old->prev, old, list);
-- spin_unlock_irqrestore(&list->lock, flags);
--}
--
--static inline void skb_split_inside_header(struct sk_buff *skb,
-- struct sk_buff* skb1,
-- const u32 len, const int pos)
--{
-- int i;
--
-- skb_copy_from_linear_data_offset(skb, len, skb_put(skb1, pos - len),
-- pos - len);
-- /* And move data appendix as is. */
-- for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
-- skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i];
--
-- skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags;
-- skb_shinfo(skb)->nr_frags = 0;
-- skb1->data_len = skb->data_len;
-- skb1->len += skb1->data_len;
-- skb->data_len = 0;
-- skb->len = len;
-- skb_set_tail_pointer(skb, len);
--}
--
--static inline void skb_split_no_header(struct sk_buff *skb,
-- struct sk_buff* skb1,
-- const u32 len, int pos)
--{
-- int i, k = 0;
-- const int nfrags = skb_shinfo(skb)->nr_frags;
--
-- skb_shinfo(skb)->nr_frags = 0;
-- skb1->len = skb1->data_len = skb->len - len;
-- skb->len = len;
-- skb->data_len = len - pos;
--
-- for (i = 0; i < nfrags; i++) {
-- int size = skb_shinfo(skb)->frags[i].size;
--
-- if (pos + size > len) {
-- skb_shinfo(skb1)->frags[k] = skb_shinfo(skb)->frags[i];
--
-- if (pos < len) {
-- /* Split frag.
-- * We have two variants in this case:
-- * 1. Move all the frag to the second
-- * part, if it is possible. F.e.
-- * this approach is mandatory for TUX,
-- * where splitting is expensive.
-- * 2. Split is accurately. We make this.
-- */
-- get_page(skb_shinfo(skb)->frags[i].page);
-- skb_shinfo(skb1)->frags[0].page_offset += len - pos;
-- skb_shinfo(skb1)->frags[0].size -= len - pos;
-- skb_shinfo(skb)->frags[i].size = len - pos;
-- skb_shinfo(skb)->nr_frags++;
-- }
-- k++;
-- } else
-- skb_shinfo(skb)->nr_frags++;
-- pos += size;
-- }
-- skb_shinfo(skb1)->nr_frags = k;
--}
--
--/**
-- * skb_split - Split fragmented skb to two parts at length len.
-- * @skb: the buffer to split
-- * @skb1: the buffer to receive the second part
-- * @len: new length for skb
-- */
--void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
--{
-- int pos = skb_headlen(skb);
--
-- if (len < pos) /* Split line is inside header. */
-- skb_split_inside_header(skb, skb1, len, pos);
-- else /* Second chunk has no header, nothing to copy. */
-- skb_split_no_header(skb, skb1, len, pos);
--}
--
--/**
-- * skb_prepare_seq_read - Prepare a sequential read of skb data
-- * @skb: the buffer to read
-- * @from: lower offset of data to be read
-- * @to: upper offset of data to be read
-- * @st: state variable
-- *
-- * Initializes the specified state variable. Must be called before
-- * invoking skb_seq_read() for the first time.
-- */
--void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from,
-- unsigned int to, struct skb_seq_state *st)
--{
-- st->lower_offset = from;
-- st->upper_offset = to;
-- st->root_skb = st->cur_skb = skb;
-- st->frag_idx = st->stepped_offset = 0;
-- st->frag_data = NULL;
--}
--
--/**
-- * skb_seq_read - Sequentially read skb data
-- * @consumed: number of bytes consumed by the caller so far
-- * @data: destination pointer for data to be returned
-- * @st: state variable
-- *
-- * Reads a block of skb data at &consumed relative to the
-- * lower offset specified to skb_prepare_seq_read(). Assigns
-- * the head of the data block to &data and returns the length
-- * of the block or 0 if the end of the skb data or the upper
-- * offset has been reached.
-- *
-- * The caller is not required to consume all of the data
-- * returned, i.e. &consumed is typically set to the number
-- * of bytes already consumed and the next call to
-- * skb_seq_read() will return the remaining part of the block.
-- *
-- * Note 1: The size of each block of data returned can be arbitary,
-- * this limitation is the cost for zerocopy seqeuental
-- * reads of potentially non linear data.
-- *
-- * Note 2: Fragment lists within fragments are not implemented
-- * at the moment, state->root_skb could be replaced with
-- * a stack for this purpose.
-- */
--unsigned int skb_seq_read(unsigned int consumed, const u8 **data,
-- struct skb_seq_state *st)
--{
-- unsigned int block_limit, abs_offset = consumed + st->lower_offset;
-- skb_frag_t *frag;
--
-- if (unlikely(abs_offset >= st->upper_offset))
-- return 0;
--
--next_skb:
-- block_limit = skb_headlen(st->cur_skb) + st->stepped_offset;
--
-- if (abs_offset < block_limit && !st->frag_data) {
-- *data = st->cur_skb->data + (abs_offset - st->stepped_offset);
-- return block_limit - abs_offset;
-- }
--
-- if (st->frag_idx == 0 && !st->frag_data)
-- st->stepped_offset += skb_headlen(st->cur_skb);
--
-- while (st->frag_idx < skb_shinfo(st->cur_skb)->nr_frags) {
-- frag = &skb_shinfo(st->cur_skb)->frags[st->frag_idx];
-- block_limit = frag->size + st->stepped_offset;
--
-- if (abs_offset < block_limit) {
-- if (!st->frag_data)
-- st->frag_data = kmap_skb_frag(frag);
--
-- *data = (u8 *) st->frag_data + frag->page_offset +
-- (abs_offset - st->stepped_offset);
--
-- return block_limit - abs_offset;
-- }
--
-- if (st->frag_data) {
-- kunmap_skb_frag(st->frag_data);
-- st->frag_data = NULL;
-- }
--
-- st->frag_idx++;
-- st->stepped_offset += frag->size;
-- }
--
-- if (st->frag_data) {
-- kunmap_skb_frag(st->frag_data);
-- st->frag_data = NULL;
-- }
--
-- if (st->root_skb == st->cur_skb &&
-- skb_shinfo(st->root_skb)->frag_list) {
-- st->cur_skb = skb_shinfo(st->root_skb)->frag_list;
-- st->frag_idx = 0;
-- goto next_skb;
-- } else if (st->cur_skb->next) {
-- st->cur_skb = st->cur_skb->next;
-- st->frag_idx = 0;
-- goto next_skb;
-- }
--
-- return 0;
--}
--
--/**
-- * skb_abort_seq_read - Abort a sequential read of skb data
-- * @st: state variable
-- *
-- * Must be called if skb_seq_read() was not called until it
-- * returned 0.
-- */
--void skb_abort_seq_read(struct skb_seq_state *st)
--{
-- if (st->frag_data)
-- kunmap_skb_frag(st->frag_data);
--}
--
--#define TS_SKB_CB(state) ((struct skb_seq_state *) &((state)->cb))
--
--static unsigned int skb_ts_get_next_block(unsigned int offset, const u8 **text,
-- struct ts_config *conf,
-- struct ts_state *state)
--{
-- return skb_seq_read(offset, text, TS_SKB_CB(state));
--}
--
--static void skb_ts_finish(struct ts_config *conf, struct ts_state *state)
--{
-- skb_abort_seq_read(TS_SKB_CB(state));
--}
--
--/**
-- * skb_find_text - Find a text pattern in skb data
-- * @skb: the buffer to look in
-- * @from: search offset
-- * @to: search limit
-- * @config: textsearch configuration
-- * @state: uninitialized textsearch state variable
-- *
-- * Finds a pattern in the skb data according to the specified
-- * textsearch configuration. Use textsearch_next() to retrieve
-- * subsequent occurrences of the pattern. Returns the offset
-- * to the first occurrence or UINT_MAX if no match was found.
-- */
--unsigned int skb_find_text(struct sk_buff *skb, unsigned int from,
-- unsigned int to, struct ts_config *config,
-- struct ts_state *state)
--{
-- unsigned int ret;
--
-- config->get_next_block = skb_ts_get_next_block;
-- config->finish = skb_ts_finish;
--
-- skb_prepare_seq_read(skb, from, to, TS_SKB_CB(state));
--
-- ret = textsearch_find(config, state);
-- return (ret <= to - from ? ret : UINT_MAX);
--}
--
--/**
-- * skb_append_datato_frags: - append the user data to a skb
-- * @sk: sock structure
-- * @skb: skb structure to be appened with user data.
-- * @getfrag: call back function to be used for getting the user data
-- * @from: pointer to user message iov
-- * @length: length of the iov message
-- *
-- * Description: This procedure append the user data in the fragment part
-- * of the skb if any page alloc fails user this procedure returns -ENOMEM
-- */
--int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
-- int (*getfrag)(void *from, char *to, int offset,
-- int len, int odd, struct sk_buff *skb),
-- void *from, int length)
--{
-- int frg_cnt = 0;
-- skb_frag_t *frag = NULL;
-- struct page *page = NULL;
-- int copy, left;
-- int offset = 0;
-- int ret;
--
-- do {
-- /* Return error if we don't have space for new frag */
-- frg_cnt = skb_shinfo(skb)->nr_frags;
-- if (frg_cnt >= MAX_SKB_FRAGS)
-- return -EFAULT;
--
-- /* allocate a new page for next frag */
-- page = alloc_pages(sk->sk_allocation, 0);
--
-- /* If alloc_page fails just return failure and caller will
-- * free previous allocated pages by doing kfree_skb()
-- */
-- if (page == NULL)
-- return -ENOMEM;
--
-- /* initialize the next frag */
-- sk->sk_sndmsg_page = page;
-- sk->sk_sndmsg_off = 0;
-- skb_fill_page_desc(skb, frg_cnt, page, 0, 0);
-- skb->truesize += PAGE_SIZE;
-- atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
--
-- /* get the new initialized frag */
-- frg_cnt = skb_shinfo(skb)->nr_frags;
-- frag = &skb_shinfo(skb)->frags[frg_cnt - 1];
--
-- /* copy the user data to page */
-- left = PAGE_SIZE - frag->page_offset;
-- copy = (length > left)? left : length;
--
-- ret = getfrag(from, (page_address(frag->page) +
-- frag->page_offset + frag->size),
-- offset, copy, 0, skb);
-- if (ret < 0)
-- return -EFAULT;
--
-- /* copy was successful so update the size parameters */
-- sk->sk_sndmsg_off += copy;
-- frag->size += copy;
-- skb->len += copy;
-- skb->data_len += copy;
-- offset += copy;
-- length -= copy;
--
-- } while (length > 0);
--
-- return 0;
--}
--
--/**
-- * skb_pull_rcsum - pull skb and update receive checksum
-- * @skb: buffer to update
-- * @len: length of data pulled
-- *
-- * This function performs an skb_pull on the packet and updates
-- * the CHECKSUM_COMPLETE checksum. It should be used on
-- * receive path processing instead of skb_pull unless you know
-- * that the checksum difference is zero (e.g., a valid IP header)
-- * or you are setting ip_summed to CHECKSUM_NONE.
-- */
--unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len)
--{
-- BUG_ON(len > skb->len);
-- skb->len -= len;
-- BUG_ON(skb->len < skb->data_len);
-- skb_postpull_rcsum(skb, skb->data, len);
-- return skb->data += len;
--}
--
--EXPORT_SYMBOL_GPL(skb_pull_rcsum);
--
--/**
-- * skb_segment - Perform protocol segmentation on skb.
-- * @skb: buffer to segment
-- * @features: features for the output path (see dev->features)
-- *
-- * This function performs segmentation on the given skb. It returns
-- * a pointer to the first in a list of new skbs for the segments.
-- * In case of error it returns ERR_PTR(err).
-- */
--struct sk_buff *skb_segment(struct sk_buff *skb, int features)
--{
-- struct sk_buff *segs = NULL;
-- struct sk_buff *tail = NULL;
-- unsigned int mss = skb_shinfo(skb)->gso_size;
-- unsigned int doffset = skb->data - skb_mac_header(skb);
-- unsigned int offset = doffset;
-- unsigned int headroom;
-- unsigned int len;
-- int sg = features & NETIF_F_SG;
-- int nfrags = skb_shinfo(skb)->nr_frags;
-- int err = -ENOMEM;
-- int i = 0;
-- int pos;
--
-- __skb_push(skb, doffset);
-- headroom = skb_headroom(skb);
-- pos = skb_headlen(skb);
--
-- do {
-- struct sk_buff *nskb;
-- skb_frag_t *frag;
-- int hsize;
-- int k;
-- int size;
--
-- len = skb->len - offset;
-- if (len > mss)
-- len = mss;
--
-- hsize = skb_headlen(skb) - offset;
-- if (hsize < 0)
-- hsize = 0;
-- if (hsize > len || !sg)
-- hsize = len;
--
-- nskb = alloc_skb(hsize + doffset + headroom, GFP_ATOMIC);
-- if (unlikely(!nskb))
-- goto err;
--
-- if (segs)
-- tail->next = nskb;
-- else
-- segs = nskb;
-- tail = nskb;
--
-- __copy_skb_header(nskb, skb);
-- nskb->mac_len = skb->mac_len;
--
-- skb_reserve(nskb, headroom);
-- skb_reset_mac_header(nskb);
-- skb_set_network_header(nskb, skb->mac_len);
-- nskb->transport_header = (nskb->network_header +
-- skb_network_header_len(skb));
-- skb_copy_from_linear_data(skb, skb_put(nskb, doffset),
-- doffset);
-- if (!sg) {
-- nskb->ip_summed = CHECKSUM_NONE;
-- nskb->csum = skb_copy_and_csum_bits(skb, offset,
-- skb_put(nskb, len),
-- len, 0);
-- continue;
-- }
--
-- frag = skb_shinfo(nskb)->frags;
-- k = 0;
--
-- skb_copy_from_linear_data_offset(skb, offset,
-- skb_put(nskb, hsize), hsize);
--
-- while (pos < offset + len) {
-- BUG_ON(i >= nfrags);
--
-- *frag = skb_shinfo(skb)->frags[i];
-- get_page(frag->page);
-- size = frag->size;
--
-- if (pos < offset) {
-- frag->page_offset += offset - pos;
-- frag->size -= offset - pos;
-- }
--
-- k++;
--
-- if (pos + size <= offset + len) {
-- i++;
-- pos += size;
-- } else {
-- frag->size -= pos + size - (offset + len);
-- break;
-- }
--
-- frag++;
-- }
--
-- skb_shinfo(nskb)->nr_frags = k;
-- nskb->data_len = len - hsize;
-- nskb->len += nskb->data_len;
-- nskb->truesize += nskb->data_len;
-- } while ((offset += len) < skb->len);
--
-- return segs;
--
--err:
-- while ((skb = segs)) {
-- segs = skb->next;
-- kfree_skb(skb);
-- }
-- return ERR_PTR(err);
--}
--
--EXPORT_SYMBOL_GPL(skb_segment);
--
--void __init skb_init(void)
--{
-- skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
-- sizeof(struct sk_buff),
-- 0,
-- SLAB_HWCACHE_ALIGN|SLAB_PANIC,
-- NULL);
-- skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache",
-- (2*sizeof(struct sk_buff)) +
-- sizeof(atomic_t),
-- 0,
-- SLAB_HWCACHE_ALIGN|SLAB_PANIC,
-- NULL);
--}
--
--/**
-- * skb_to_sgvec - Fill a scatter-gather list from a socket buffer
-- * @skb: Socket buffer containing the buffers to be mapped
-- * @sg: The scatter-gather list to map into
-- * @offset: The offset into the buffer's contents to start mapping
-- * @len: Length of buffer space to be mapped
-- *
-- * Fill the specified scatter-gather list with mappings/pointers into a
-- * region of the buffer space attached to a socket buffer.
-- */
--static int
--__skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
--{
-- int start = skb_headlen(skb);
-- int i, copy = start - offset;
-- int elt = 0;
--
-- if (copy > 0) {
-- if (copy > len)
-- copy = len;
-- sg_set_buf(sg, skb->data + offset, copy);
-- elt++;
-- if ((len -= copy) == 0)
-- return elt;
-- offset += copy;
-- }
--
-- for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-- int end;
--
-- WARN_ON(start > offset + len);
--
-- end = start + skb_shinfo(skb)->frags[i].size;
-- if ((copy = end - offset) > 0) {
-- skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
--
-- if (copy > len)
-- copy = len;
-- sg_set_page(&sg[elt], frag->page, copy,
-- frag->page_offset+offset-start);
-- elt++;
-- if (!(len -= copy))
-- return elt;
-- offset += copy;
-- }
-- start = end;
-- }
--
-- if (skb_shinfo(skb)->frag_list) {
-- struct sk_buff *list = skb_shinfo(skb)->frag_list;
--
-- for (; list; list = list->next) {
-- int end;
--
-- WARN_ON(start > offset + len);
--
-- end = start + list->len;
-- if ((copy = end - offset) > 0) {
-- if (copy > len)
-- copy = len;
-- elt += __skb_to_sgvec(list, sg+elt, offset - start,
-- copy);
-- if ((len -= copy) == 0)
-- return elt;
-- offset += copy;
-- }
-- start = end;
-- }
-- }
-- BUG_ON(len);
-- return elt;
--}
--
--int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
--{
-- int nsg = __skb_to_sgvec(skb, sg, offset, len);
--
-- sg_mark_end(&sg[nsg - 1]);
--
-- return nsg;
--}
--
--/**
-- * skb_cow_data - Check that a socket buffer's data buffers are writable
-- * @skb: The socket buffer to check.
-- * @tailbits: Amount of trailing space to be added
-- * @trailer: Returned pointer to the skb where the @tailbits space begins
-- *
-- * Make sure that the data buffers attached to a socket buffer are
-- * writable. If they are not, private copies are made of the data buffers
-- * and the socket buffer is set to use these instead.
-- *
-- * If @tailbits is given, make sure that there is space to write @tailbits
-- * bytes of data beyond current end of socket buffer. @trailer will be
-- * set to point to the skb in which this space begins.
-- *
-- * The number of scatterlist elements required to completely map the
-- * COW'd and extended socket buffer will be returned.
-- */
--int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
--{
-- int copyflag;
-- int elt;
-- struct sk_buff *skb1, **skb_p;
--
-- /* If skb is cloned or its head is paged, reallocate
-- * head pulling out all the pages (pages are considered not writable
-- * at the moment even if they are anonymous).
-- */
-- if ((skb_cloned(skb) || skb_shinfo(skb)->nr_frags) &&
-- __pskb_pull_tail(skb, skb_pagelen(skb)-skb_headlen(skb)) == NULL)
-- return -ENOMEM;
--
-- /* Easy case. Most of packets will go this way. */
-- if (!skb_shinfo(skb)->frag_list) {
-- /* A little of trouble, not enough of space for trailer.
-- * This should not happen, when stack is tuned to generate
-- * good frames. OK, on miss we reallocate and reserve even more
-- * space, 128 bytes is fair. */
--
-- if (skb_tailroom(skb) < tailbits &&
-- pskb_expand_head(skb, 0, tailbits-skb_tailroom(skb)+128, GFP_ATOMIC))
-- return -ENOMEM;
--
-- /* Voila! */
-- *trailer = skb;
-- return 1;
-- }
--
-- /* Misery. We are in troubles, going to mincer fragments... */
--
-- elt = 1;
-- skb_p = &skb_shinfo(skb)->frag_list;
-- copyflag = 0;
--
-- while ((skb1 = *skb_p) != NULL) {
-- int ntail = 0;
--
-- /* The fragment is partially pulled by someone,
-- * this can happen on input. Copy it and everything
-- * after it. */
--
-- if (skb_shared(skb1))
-- copyflag = 1;
--
-- /* If the skb is the last, worry about trailer. */
--
-- if (skb1->next == NULL && tailbits) {
-- if (skb_shinfo(skb1)->nr_frags ||
-- skb_shinfo(skb1)->frag_list ||
-- skb_tailroom(skb1) < tailbits)
-- ntail = tailbits + 128;
-- }
--
-- if (copyflag ||
-- skb_cloned(skb1) ||
-- ntail ||
-- skb_shinfo(skb1)->nr_frags ||
-- skb_shinfo(skb1)->frag_list) {
-- struct sk_buff *skb2;
--
-- /* Fuck, we are miserable poor guys... */
-- if (ntail == 0)
-- skb2 = skb_copy(skb1, GFP_ATOMIC);
-- else
-- skb2 = skb_copy_expand(skb1,
-- skb_headroom(skb1),
-- ntail,
-- GFP_ATOMIC);
-- if (unlikely(skb2 == NULL))
-- return -ENOMEM;
--
-- if (skb1->sk)
-- skb_set_owner_w(skb2, skb1->sk);
--
-- /* Looking around. Are we still alive?
-- * OK, link new skb, drop old one */
--
-- skb2->next = skb1->next;
-- *skb_p = skb2;
-- kfree_skb(skb1);
-- skb1 = skb2;
-- }
-- elt++;
-- *trailer = skb1;
-- skb_p = &skb1->next;
-- }
--
-- return elt;
--}
--
--/**
-- * skb_partial_csum_set - set up and verify partial csum values for packet
-- * @skb: the skb to set
-- * @start: the number of bytes after skb->data to start checksumming.
-- * @off: the offset from start to place the checksum.
-- *
-- * For untrusted partially-checksummed packets, we need to make sure the values
-- * for skb->csum_start and skb->csum_offset are valid so we don't oops.
-- *
-- * This function checks and sets those values and skb->ip_summed: if this
-- * returns false you should drop the packet.
-- */
--bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off)
--{
-- if (unlikely(start > skb->len - 2) ||
-- unlikely((int)start + off > skb->len - 2)) {
-- if (net_ratelimit())
-- printk(KERN_WARNING
-- "bad partial csum: csum=%u/%u len=%u\n",
-- start, off, skb->len);
-- return false;
-- }
-- skb->ip_summed = CHECKSUM_PARTIAL;
-- skb->csum_start = skb_headroom(skb) + start;
-- skb->csum_offset = off;
-- return true;
--}
--
--void __skb_warn_lro_forwarding(const struct sk_buff *skb)
--{
-- if (net_ratelimit())
-- pr_warning("%s: received packets cannot be forwarded"
-- " while LRO is enabled\n", skb->dev->name);
--}
--
--EXPORT_SYMBOL(___pskb_trim);
--EXPORT_SYMBOL(__kfree_skb);
--EXPORT_SYMBOL(kfree_skb);
--EXPORT_SYMBOL(__pskb_pull_tail);
--EXPORT_SYMBOL(__alloc_skb);
--EXPORT_SYMBOL(__netdev_alloc_skb);
--EXPORT_SYMBOL(pskb_copy);
--EXPORT_SYMBOL(pskb_expand_head);
--EXPORT_SYMBOL(skb_checksum);
--EXPORT_SYMBOL(skb_clone);
--EXPORT_SYMBOL(skb_copy);
--EXPORT_SYMBOL(skb_copy_and_csum_bits);
--EXPORT_SYMBOL(skb_copy_and_csum_dev);
--EXPORT_SYMBOL(skb_copy_bits);
--EXPORT_SYMBOL(skb_copy_expand);
--EXPORT_SYMBOL(skb_over_panic);
--EXPORT_SYMBOL(skb_pad);
--EXPORT_SYMBOL(skb_realloc_headroom);
--EXPORT_SYMBOL(skb_under_panic);
--EXPORT_SYMBOL(skb_dequeue);
--EXPORT_SYMBOL(skb_dequeue_tail);
--EXPORT_SYMBOL(skb_insert);
--EXPORT_SYMBOL(skb_queue_purge);
--EXPORT_SYMBOL(skb_queue_head);
--EXPORT_SYMBOL(skb_queue_tail);
--EXPORT_SYMBOL(skb_unlink);
--EXPORT_SYMBOL(skb_append);
--EXPORT_SYMBOL(skb_split);
--EXPORT_SYMBOL(skb_prepare_seq_read);
--EXPORT_SYMBOL(skb_seq_read);
--EXPORT_SYMBOL(skb_abort_seq_read);
--EXPORT_SYMBOL(skb_find_text);
--EXPORT_SYMBOL(skb_append_datato_frags);
--EXPORT_SYMBOL(__skb_warn_lro_forwarding);
--
--EXPORT_SYMBOL_GPL(skb_to_sgvec);
--EXPORT_SYMBOL_GPL(skb_cow_data);
--EXPORT_SYMBOL_GPL(skb_partial_csum_set);
-diff -Nurb linux-2.6.27-524/net/core/sock.c.orig linux-2.6.27-525/net/core/sock.c.orig
---- linux-2.6.27-524/net/core/sock.c.orig 2009-12-04 16:03:48.000000000 -0500
-+++ linux-2.6.27-525/net/core/sock.c.orig 1969-12-31 19:00:00.000000000 -0500
-@@ -1,2301 +0,0 @@
--/*
-- * INET An implementation of the TCP/IP protocol suite for the LINUX
-- * operating system. INET is implemented using the BSD Socket
-- * interface as the means of communication with the user level.
-- *
-- * Generic socket support routines. Memory allocators, socket lock/release
-- * handler for protocols to use and generic option handler.
-- *
-- *
-- * Authors: Ross Biro
-- * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
-- * Florian La Roche, <flla@stud.uni-sb.de>
-- * Alan Cox, <A.Cox@swansea.ac.uk>
-- *
-- * Fixes:
-- * Alan Cox : Numerous verify_area() problems
-- * Alan Cox : Connecting on a connecting socket
-- * now returns an error for tcp.
-- * Alan Cox : sock->protocol is set correctly.
-- * and is not sometimes left as 0.
-- * Alan Cox : connect handles icmp errors on a
-- * connect properly. Unfortunately there
-- * is a restart syscall nasty there. I
-- * can't match BSD without hacking the C
-- * library. Ideas urgently sought!
-- * Alan Cox : Disallow bind() to addresses that are
-- * not ours - especially broadcast ones!!
-- * Alan Cox : Socket 1024 _IS_ ok for users. (fencepost)
-- * Alan Cox : sock_wfree/sock_rfree don't destroy sockets,
-- * instead they leave that for the DESTROY timer.
-- * Alan Cox : Clean up error flag in accept
-- * Alan Cox : TCP ack handling is buggy, the DESTROY timer
-- * was buggy. Put a remove_sock() in the handler
-- * for memory when we hit 0. Also altered the timer
-- * code. The ACK stuff can wait and needs major
-- * TCP layer surgery.
-- * Alan Cox : Fixed TCP ack bug, removed remove sock
-- * and fixed timer/inet_bh race.
-- * Alan Cox : Added zapped flag for TCP
-- * Alan Cox : Move kfree_skb into skbuff.c and tidied up surplus code
-- * Alan Cox : for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
-- * Alan Cox : kfree_s calls now are kfree_skbmem so we can track skb resources
-- * Alan Cox : Supports socket option broadcast now as does udp. Packet and raw need fixing.
-- * Alan Cox : Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
-- * Rick Sladkey : Relaxed UDP rules for matching packets.
-- * C.E.Hawkins : IFF_PROMISC/SIOCGHWADDR support
-- * Pauline Middelink : identd support
-- * Alan Cox : Fixed connect() taking signals I think.
-- * Alan Cox : SO_LINGER supported
-- * Alan Cox : Error reporting fixes
-- * Anonymous : inet_create tidied up (sk->reuse setting)
-- * Alan Cox : inet sockets don't set sk->type!
-- * Alan Cox : Split socket option code
-- * Alan Cox : Callbacks
-- * Alan Cox : Nagle flag for Charles & Johannes stuff
-- * Alex : Removed restriction on inet fioctl
-- * Alan Cox : Splitting INET from NET core
-- * Alan Cox : Fixed bogus SO_TYPE handling in getsockopt()
-- * Adam Caldwell : Missing return in SO_DONTROUTE/SO_DEBUG code
-- * Alan Cox : Split IP from generic code
-- * Alan Cox : New kfree_skbmem()
-- * Alan Cox : Make SO_DEBUG superuser only.
-- * Alan Cox : Allow anyone to clear SO_DEBUG
-- * (compatibility fix)
-- * Alan Cox : Added optimistic memory grabbing for AF_UNIX throughput.
-- * Alan Cox : Allocator for a socket is settable.
-- * Alan Cox : SO_ERROR includes soft errors.
-- * Alan Cox : Allow NULL arguments on some SO_ opts
-- * Alan Cox : Generic socket allocation to make hooks
-- * easier (suggested by Craig Metz).
-- * Michael Pall : SO_ERROR returns positive errno again
-- * Steve Whitehouse: Added default destructor to free
-- * protocol private data.
-- * Steve Whitehouse: Added various other default routines
-- * common to several socket families.
-- * Chris Evans : Call suser() check last on F_SETOWN
-- * Jay Schulist : Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
-- * Andi Kleen : Add sock_kmalloc()/sock_kfree_s()
-- * Andi Kleen : Fix write_space callback
-- * Chris Evans : Security fixes - signedness again
-- * Arnaldo C. Melo : cleanups, use skb_queue_purge
-- *
-- * To Fix:
-- *
-- *
-- * This program is free software; you can redistribute it and/or
-- * modify it under the terms of the GNU General Public License
-- * as published by the Free Software Foundation; either version
-- * 2 of the License, or (at your option) any later version.
-- */
--
--#include <linux/capability.h>
--#include <linux/errno.h>
--#include <linux/types.h>
--#include <linux/socket.h>
--#include <linux/in.h>
--#include <linux/kernel.h>
--#include <linux/module.h>
--#include <linux/proc_fs.h>
--#include <linux/seq_file.h>
--#include <linux/sched.h>
--#include <linux/timer.h>
--#include <linux/string.h>
--#include <linux/sockios.h>
--#include <linux/net.h>
--#include <linux/mm.h>
--#include <linux/slab.h>
--#include <linux/interrupt.h>
--#include <linux/poll.h>
--#include <linux/tcp.h>
--#include <linux/init.h>
--#include <linux/highmem.h>
--
--#include <asm/uaccess.h>
--#include <asm/system.h>
--
--#include <linux/netdevice.h>
--#include <net/protocol.h>
--#include <linux/skbuff.h>
--#include <net/net_namespace.h>
--#include <net/request_sock.h>
--#include <net/sock.h>
--#include <net/xfrm.h>
--#include <linux/ipsec.h>
--
--#include <linux/filter.h>
--#include <linux/vs_socket.h>
--#include <linux/vs_limit.h>
--#include <linux/vs_context.h>
--#include <linux/vs_network.h>
--
--#ifdef CONFIG_INET
--#include <net/tcp.h>
--#endif
--
--/*
-- * Each address family might have different locking rules, so we have
-- * one slock key per address family:
-- */
--static struct lock_class_key af_family_keys[AF_MAX];
--static struct lock_class_key af_family_slock_keys[AF_MAX];
--
--#ifdef CONFIG_DEBUG_LOCK_ALLOC
--/*
-- * Make lock validator output more readable. (we pre-construct these
-- * strings build-time, so that runtime initialization of socket
-- * locks is fast):
-- */
--static const char *af_family_key_strings[AF_MAX+1] = {
-- "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX" , "sk_lock-AF_INET" ,
-- "sk_lock-AF_AX25" , "sk_lock-AF_IPX" , "sk_lock-AF_APPLETALK",
-- "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE" , "sk_lock-AF_ATMPVC" ,
-- "sk_lock-AF_X25" , "sk_lock-AF_INET6" , "sk_lock-AF_ROSE" ,
-- "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI" , "sk_lock-AF_SECURITY" ,
-- "sk_lock-AF_KEY" , "sk_lock-AF_NETLINK" , "sk_lock-AF_PACKET" ,
-- "sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" ,
-- "sk_lock-21" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" ,
-- "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" ,
-- "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" ,
-- "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" ,
-- "sk_lock-AF_RXRPC" , "sk_lock-AF_MAX"
--};
--static const char *af_family_slock_key_strings[AF_MAX+1] = {
-- "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" ,
-- "slock-AF_AX25" , "slock-AF_IPX" , "slock-AF_APPLETALK",
-- "slock-AF_NETROM", "slock-AF_BRIDGE" , "slock-AF_ATMPVC" ,
-- "slock-AF_X25" , "slock-AF_INET6" , "slock-AF_ROSE" ,
-- "slock-AF_DECnet", "slock-AF_NETBEUI" , "slock-AF_SECURITY" ,
-- "slock-AF_KEY" , "slock-AF_NETLINK" , "slock-AF_PACKET" ,
-- "slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" ,
-- "slock-21" , "slock-AF_SNA" , "slock-AF_IRDA" ,
-- "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" ,
-- "slock-27" , "slock-28" , "slock-AF_CAN" ,
-- "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" ,
-- "slock-AF_RXRPC" , "slock-AF_MAX"
--};
--static const char *af_family_clock_key_strings[AF_MAX+1] = {
-- "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" ,
-- "clock-AF_AX25" , "clock-AF_IPX" , "clock-AF_APPLETALK",
-- "clock-AF_NETROM", "clock-AF_BRIDGE" , "clock-AF_ATMPVC" ,
-- "clock-AF_X25" , "clock-AF_INET6" , "clock-AF_ROSE" ,
-- "clock-AF_DECnet", "clock-AF_NETBEUI" , "clock-AF_SECURITY" ,
-- "clock-AF_KEY" , "clock-AF_NETLINK" , "clock-AF_PACKET" ,
-- "clock-AF_ASH" , "clock-AF_ECONET" , "clock-AF_ATMSVC" ,
-- "clock-21" , "clock-AF_SNA" , "clock-AF_IRDA" ,
-- "clock-AF_PPPOX" , "clock-AF_WANPIPE" , "clock-AF_LLC" ,
-- "clock-27" , "clock-28" , "clock-AF_CAN" ,
-- "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" ,
-- "clock-AF_RXRPC" , "clock-AF_MAX"
--};
--#endif
--
--/*
-- * sk_callback_lock locking rules are per-address-family,
-- * so split the lock classes by using a per-AF key:
-- */
--static struct lock_class_key af_callback_keys[AF_MAX];
--
--/* Take into consideration the size of the struct sk_buff overhead in the
-- * determination of these values, since that is non-constant across
-- * platforms. This makes socket queueing behavior and performance
-- * not depend upon such differences.
-- */
--#define _SK_MEM_PACKETS 256
--#define _SK_MEM_OVERHEAD (sizeof(struct sk_buff) + 256)
--#define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
--#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
--
--/* Run time adjustable parameters. */
--__u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
--__u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
--__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
--__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
--
--/* Maximal space eaten by iovec or ancilliary data plus some space */
--int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
--
--static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
--{
-- struct timeval tv;
--
-- if (optlen < sizeof(tv))
-- return -EINVAL;
-- if (copy_from_user(&tv, optval, sizeof(tv)))
-- return -EFAULT;
-- if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
-- return -EDOM;
--
-- if (tv.tv_sec < 0) {
-- static int warned __read_mostly;
--
-- *timeo_p = 0;
-- if (warned < 10 && net_ratelimit()) {
-- warned++;
-- printk(KERN_INFO "sock_set_timeout: `%s' (pid %d) "
-- "tries to set negative timeout\n",
-- current->comm, task_pid_nr(current));
-- }
-- return 0;
-- }
-- *timeo_p = MAX_SCHEDULE_TIMEOUT;
-- if (tv.tv_sec == 0 && tv.tv_usec == 0)
-- return 0;
-- if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
-- *timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
-- return 0;
--}
--
--static void sock_warn_obsolete_bsdism(const char *name)
--{
-- static int warned;
-- static char warncomm[TASK_COMM_LEN];
-- if (strcmp(warncomm, current->comm) && warned < 5) {
-- strcpy(warncomm, current->comm);
-- printk(KERN_WARNING "process `%s' is using obsolete "
-- "%s SO_BSDCOMPAT\n", warncomm, name);
-- warned++;
-- }
--}
--
--static void sock_disable_timestamp(struct sock *sk)
--{
-- if (sock_flag(sk, SOCK_TIMESTAMP)) {
-- sock_reset_flag(sk, SOCK_TIMESTAMP);
-- net_disable_timestamp();
-- }
--}
--
--
--int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
--{
-- int err = 0;
-- int skb_len;
--
-- /* Cast sk->rcvbuf to unsigned... It's pointless, but reduces
-- number of warnings when compiling with -W --ANK
-- */
-- if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
-- (unsigned)sk->sk_rcvbuf) {
-- err = -ENOMEM;
-- goto out;
-- }
--
-- err = sk_filter(sk, skb);
-- if (err)
-- goto out;
--
-- if (!sk_rmem_schedule(sk, skb->truesize)) {
-- err = -ENOBUFS;
-- goto out;
-- }
--
-- skb->dev = NULL;
-- skb_set_owner_r(skb, sk);
--
-- /* Cache the SKB length before we tack it onto the receive
-- * queue. Once it is added it no longer belongs to us and
-- * may be freed by other threads of control pulling packets
-- * from the queue.
-- */
-- skb_len = skb->len;
--
-- skb_queue_tail(&sk->sk_receive_queue, skb);
--
-- if (!sock_flag(sk, SOCK_DEAD))
-- sk->sk_data_ready(sk, skb_len);
--out:
-- return err;
--}
--EXPORT_SYMBOL(sock_queue_rcv_skb);
--
--int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
--{
-- int rc = NET_RX_SUCCESS;
--
-- if (sk_filter(sk, skb))
-- goto discard_and_relse;
--
-- skb->dev = NULL;
--
-- if (nested)
-- bh_lock_sock_nested(sk);
-- else
-- bh_lock_sock(sk);
-- if (!sock_owned_by_user(sk)) {
-- /*
-- * trylock + unlock semantics:
-- */
-- mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
--
-- rc = sk->sk_backlog_rcv(sk, skb);
--
-- mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
-- } else
-- sk_add_backlog(sk, skb);
-- bh_unlock_sock(sk);
--out:
-- sock_put(sk);
-- return rc;
--discard_and_relse:
-- kfree_skb(skb);
-- goto out;
--}
--EXPORT_SYMBOL(sk_receive_skb);
--
--struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
--{
-- struct dst_entry *dst = sk->sk_dst_cache;
--
-- if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
-- sk->sk_dst_cache = NULL;
-- dst_release(dst);
-- return NULL;
-- }
--
-- return dst;
--}
--EXPORT_SYMBOL(__sk_dst_check);
--
--struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
--{
-- struct dst_entry *dst = sk_dst_get(sk);
--
-- if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
-- sk_dst_reset(sk);
-- dst_release(dst);
-- return NULL;
-- }
--
-- return dst;
--}
--EXPORT_SYMBOL(sk_dst_check);
--
--static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen)
--{
-- int ret = -ENOPROTOOPT;
--#ifdef CONFIG_NETDEVICES
-- struct net *net = sock_net(sk);
-- char devname[IFNAMSIZ];
-- int index;
--
-- /* Sorry... */
-- ret = -EPERM;
-- if (!capable(CAP_NET_RAW))
-- goto out;
--
-- ret = -EINVAL;
-- if (optlen < 0)
-- goto out;
--
-- /* Bind this socket to a particular device like "eth0",
-- * as specified in the passed interface name. If the
-- * name is "" or the option length is zero the socket
-- * is not bound.
-- */
-- if (optlen > IFNAMSIZ - 1)
-- optlen = IFNAMSIZ - 1;
-- memset(devname, 0, sizeof(devname));
--
-- ret = -EFAULT;
-- if (copy_from_user(devname, optval, optlen))
-- goto out;
--
-- if (devname[0] == '\0') {
-- index = 0;
-- } else {
-- struct net_device *dev = dev_get_by_name(net, devname);
--
-- ret = -ENODEV;
-- if (!dev)
-- goto out;
--
-- index = dev->ifindex;
-- dev_put(dev);
-- }
--
-- lock_sock(sk);
-- sk->sk_bound_dev_if = index;
-- sk_dst_reset(sk);
-- release_sock(sk);
--
-- ret = 0;
--
--out:
--#endif
--
-- return ret;
--}
--
--static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
--{
-- if (valbool)
-- sock_set_flag(sk, bit);
-- else
-- sock_reset_flag(sk, bit);
--}
--
--/*
-- * This is meant for all protocols to use and covers goings on
-- * at the socket level. Everything here is generic.
-- */
--
--int sock_setsockopt(struct socket *sock, int level, int optname,
-- char __user *optval, int optlen)
--{
-- struct sock *sk=sock->sk;
-- int val;
-- int valbool;
-- struct linger ling;
-- int ret = 0;
--
-- /*
-- * Options without arguments
-- */
--
-- if (optname == SO_BINDTODEVICE)
-- return sock_bindtodevice(sk, optval, optlen);
--
-- if (optlen < sizeof(int))
-- return -EINVAL;
--
-- if (get_user(val, (int __user *)optval))
-- return -EFAULT;
--
-- valbool = val?1:0;
--
-- lock_sock(sk);
--
-- switch(optname) {
-- case SO_DEBUG:
-- if (val && !capable(CAP_NET_ADMIN)) {
-- ret = -EACCES;
-- } else
-- sock_valbool_flag(sk, SOCK_DBG, valbool);
-- break;
-- case SO_REUSEADDR:
-- sk->sk_reuse = valbool;
-- break;
-- case SO_TYPE:
-- case SO_ERROR:
-- ret = -ENOPROTOOPT;
-- break;
-- case SO_DONTROUTE:
-- sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
-- break;
-- case SO_BROADCAST:
-- sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
-- break;
-- case SO_SNDBUF:
-- /* Don't error on this BSD doesn't and if you think
-- about it this is right. Otherwise apps have to
-- play 'guess the biggest size' games. RCVBUF/SNDBUF
-- are treated in BSD as hints */
--
-- if (val > sysctl_wmem_max)
-- val = sysctl_wmem_max;
--set_sndbuf:
-- sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
-- if ((val * 2) < SOCK_MIN_SNDBUF)
-- sk->sk_sndbuf = SOCK_MIN_SNDBUF;
-- else
-- sk->sk_sndbuf = val * 2;
--
-- /*
-- * Wake up sending tasks if we
-- * upped the value.
-- */
-- sk->sk_write_space(sk);
-- break;
--
-- case SO_SNDBUFFORCE:
-- if (!capable(CAP_NET_ADMIN)) {
-- ret = -EPERM;
-- break;
-- }
-- goto set_sndbuf;
--
-- case SO_RCVBUF:
-- /* Don't error on this BSD doesn't and if you think
-- about it this is right. Otherwise apps have to
-- play 'guess the biggest size' games. RCVBUF/SNDBUF
-- are treated in BSD as hints */
--
-- if (val > sysctl_rmem_max)
-- val = sysctl_rmem_max;
--set_rcvbuf:
-- sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
-- /*
-- * We double it on the way in to account for
-- * "struct sk_buff" etc. overhead. Applications
-- * assume that the SO_RCVBUF setting they make will
-- * allow that much actual data to be received on that
-- * socket.
-- *
-- * Applications are unaware that "struct sk_buff" and
-- * other overheads allocate from the receive buffer
-- * during socket buffer allocation.
-- *
-- * And after considering the possible alternatives,
-- * returning the value we actually used in getsockopt
-- * is the most desirable behavior.
-- */
-- if ((val * 2) < SOCK_MIN_RCVBUF)
-- sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
-- else
-- sk->sk_rcvbuf = val * 2;
-- break;
--
-- case SO_RCVBUFFORCE:
-- if (!capable(CAP_NET_ADMIN)) {
-- ret = -EPERM;
-- break;
-- }
-- goto set_rcvbuf;
--
-- case SO_KEEPALIVE:
--#ifdef CONFIG_INET
-- if (sk->sk_protocol == IPPROTO_TCP)
-- tcp_set_keepalive(sk, valbool);
--#endif
-- sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
-- break;
--
-- case SO_OOBINLINE:
-- sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
-- break;
--
-- case SO_NO_CHECK:
-- sk->sk_no_check = valbool;
-- break;
--
-- case SO_PRIORITY:
-- if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
-- sk->sk_priority = val;
-- else
-- ret = -EPERM;
-- break;
--
-- case SO_LINGER:
-- if (optlen < sizeof(ling)) {
-- ret = -EINVAL; /* 1003.1g */
-- break;
-- }
-- if (copy_from_user(&ling,optval,sizeof(ling))) {
-- ret = -EFAULT;
-- break;
-- }
-- if (!ling.l_onoff)
-- sock_reset_flag(sk, SOCK_LINGER);
-- else {
--#if (BITS_PER_LONG == 32)
-- if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
-- sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
-- else
--#endif
-- sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
-- sock_set_flag(sk, SOCK_LINGER);
-- }
-- break;
--
-- case SO_BSDCOMPAT:
-- sock_warn_obsolete_bsdism("setsockopt");
-- break;
--
-- case SO_PASSCRED:
-- if (valbool)
-- set_bit(SOCK_PASSCRED, &sock->flags);
-- else
-- clear_bit(SOCK_PASSCRED, &sock->flags);
-- break;
--
-- case SO_TIMESTAMP:
-- case SO_TIMESTAMPNS:
-- if (valbool) {
-- if (optname == SO_TIMESTAMP)
-- sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
-- else
-- sock_set_flag(sk, SOCK_RCVTSTAMPNS);
-- sock_set_flag(sk, SOCK_RCVTSTAMP);
-- sock_enable_timestamp(sk);
-- } else {
-- sock_reset_flag(sk, SOCK_RCVTSTAMP);
-- sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
-- }
-- break;
--
-- case SO_RCVLOWAT:
-- if (val < 0)
-- val = INT_MAX;
-- sk->sk_rcvlowat = val ? : 1;
-- break;
--
-- case SO_RCVTIMEO:
-- ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
-- break;
--
-- case SO_SNDTIMEO:
-- ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
-- break;
--
-- case SO_ATTACH_FILTER:
-- ret = -EINVAL;
-- if (optlen == sizeof(struct sock_fprog)) {
-- struct sock_fprog fprog;
--
-- ret = -EFAULT;
-- if (copy_from_user(&fprog, optval, sizeof(fprog)))
-- break;
--
-- ret = sk_attach_filter(&fprog, sk);
-- }
-- break;
--
-- case SO_DETACH_FILTER:
-- ret = sk_detach_filter(sk);
-- break;
--
-- case SO_PASSSEC:
-- if (valbool)
-- set_bit(SOCK_PASSSEC, &sock->flags);
-- else
-- clear_bit(SOCK_PASSSEC, &sock->flags);
-- break;
-- case SO_MARK:
-- if (!capable(CAP_NET_ADMIN))
-- ret = -EPERM;
-- else {
-- sk->sk_mark = val;
-- }
-- break;
--
-- /* We implement the SO_SNDLOWAT etc to
-- not be settable (1003.1g 5.3) */
-- default:
-- ret = -ENOPROTOOPT;
-- break;
-- }
-- release_sock(sk);
-- return ret;
--}
--
--
--int sock_getsockopt(struct socket *sock, int level, int optname,
-- char __user *optval, int __user *optlen)
--{
-- struct sock *sk = sock->sk;
--
-- union {
-- int val;
-- struct linger ling;
-- struct timeval tm;
-- } v;
--
-- unsigned int lv = sizeof(int);
-- int len;
--
-- if (get_user(len, optlen))
-- return -EFAULT;
-- if (len < 0)
-- return -EINVAL;
--
-- memset(&v, 0, sizeof(v));
--
-- switch(optname) {
-- case SO_DEBUG:
-- v.val = sock_flag(sk, SOCK_DBG);
-- break;
--
-- case SO_DONTROUTE:
-- v.val = sock_flag(sk, SOCK_LOCALROUTE);
-- break;
--
-- case SO_BROADCAST:
-- v.val = !!sock_flag(sk, SOCK_BROADCAST);
-- break;
--
-- case SO_SNDBUF:
-- v.val = sk->sk_sndbuf;
-- break;
--
-- case SO_RCVBUF:
-- v.val = sk->sk_rcvbuf;
-- break;
--
-- case SO_REUSEADDR:
-- v.val = sk->sk_reuse;
-- break;
--
-- case SO_KEEPALIVE:
-- v.val = !!sock_flag(sk, SOCK_KEEPOPEN);
-- break;
--
-- case SO_TYPE:
-- v.val = sk->sk_type;
-- break;
--
-- case SO_ERROR:
-- v.val = -sock_error(sk);
-- if (v.val==0)
-- v.val = xchg(&sk->sk_err_soft, 0);
-- break;
--
-- case SO_OOBINLINE:
-- v.val = !!sock_flag(sk, SOCK_URGINLINE);
-- break;
--
-- case SO_NO_CHECK:
-- v.val = sk->sk_no_check;
-- break;
--
-- case SO_PRIORITY:
-- v.val = sk->sk_priority;
-- break;
--
-- case SO_LINGER:
-- lv = sizeof(v.ling);
-- v.ling.l_onoff = !!sock_flag(sk, SOCK_LINGER);
-- v.ling.l_linger = sk->sk_lingertime / HZ;
-- break;
--
-- case SO_BSDCOMPAT:
-- sock_warn_obsolete_bsdism("getsockopt");
-- break;
--
-- case SO_TIMESTAMP:
-- v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
-- !sock_flag(sk, SOCK_RCVTSTAMPNS);
-- break;
--
-- case SO_TIMESTAMPNS:
-- v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
-- break;
--
-- case SO_RCVTIMEO:
-- lv=sizeof(struct timeval);
-- if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
-- v.tm.tv_sec = 0;
-- v.tm.tv_usec = 0;
-- } else {
-- v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
-- v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
-- }
-- break;
--
-- case SO_SNDTIMEO:
-- lv=sizeof(struct timeval);
-- if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
-- v.tm.tv_sec = 0;
-- v.tm.tv_usec = 0;
-- } else {
-- v.tm.tv_sec = sk->sk_sndtimeo / HZ;
-- v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
-- }
-- break;
--
-- case SO_RCVLOWAT:
-- v.val = sk->sk_rcvlowat;
-- break;
--
-- case SO_SNDLOWAT:
-- v.val=1;
-- break;
--
-- case SO_PASSCRED:
-- v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0;
-- break;
--
-- case SO_PEERCRED:
-- if (len > sizeof(sk->sk_peercred))
-- len = sizeof(sk->sk_peercred);
-- if (copy_to_user(optval, &sk->sk_peercred, len))
-- return -EFAULT;
-- goto lenout;
--
-- case SO_PEERNAME:
-- {
-- char address[128];
--
-- if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
-- return -ENOTCONN;
-- if (lv < len)
-- return -EINVAL;
-- if (copy_to_user(optval, address, len))
-- return -EFAULT;
-- goto lenout;
-- }
--
-- /* Dubious BSD thing... Probably nobody even uses it, but
-- * the UNIX standard wants it for whatever reason... -DaveM
-- */
-- case SO_ACCEPTCONN:
-- v.val = sk->sk_state == TCP_LISTEN;
-- break;
--
-- case SO_PASSSEC:
-- v.val = test_bit(SOCK_PASSSEC, &sock->flags) ? 1 : 0;
-- break;
--
-- case SO_PEERSEC:
-- return security_socket_getpeersec_stream(sock, optval, optlen, len);
--
-- case SO_MARK:
-- v.val = sk->sk_mark;
-- break;
--
-- default:
-- return -ENOPROTOOPT;
-- }
--
-- if (len > lv)
-- len = lv;
-- if (copy_to_user(optval, &v, len))
-- return -EFAULT;
--lenout:
-- if (put_user(len, optlen))
-- return -EFAULT;
-- return 0;
--}
--
--/*
-- * Initialize an sk_lock.
-- *
-- * (We also register the sk_lock with the lock validator.)
-- */
--static inline void sock_lock_init(struct sock *sk)
--{
-- sock_lock_init_class_and_name(sk,
-- af_family_slock_key_strings[sk->sk_family],
-- af_family_slock_keys + sk->sk_family,
-- af_family_key_strings[sk->sk_family],
-- af_family_keys + sk->sk_family);
--}
--
--static void sock_copy(struct sock *nsk, const struct sock *osk)
--{
--#ifdef CONFIG_SECURITY_NETWORK
-- void *sptr = nsk->sk_security;
--#endif
--
-- memcpy(nsk, osk, osk->sk_prot->obj_size);
--#ifdef CONFIG_SECURITY_NETWORK
-- nsk->sk_security = sptr;
-- security_sk_clone(osk, nsk);
--#endif
--}
--
--static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
-- int family)
--{
-- struct sock *sk;
-- struct kmem_cache *slab;
--
-- slab = prot->slab;
-- if (slab != NULL)
-- sk = kmem_cache_alloc(slab, priority);
-- else
-- sk = kmalloc(prot->obj_size, priority);
--
-- if (sk != NULL) {
-- if (security_sk_alloc(sk, family, priority))
-- goto out_free;
--
-- if (!try_module_get(prot->owner))
-- goto out_free_sec;
-- }
-- sock_vx_init(sk);
-- sock_nx_init(sk);
--
-- return sk;
--
--out_free_sec:
-- security_sk_free(sk);
--out_free:
-- if (slab != NULL)
-- kmem_cache_free(slab, sk);
-- else
-- kfree(sk);
-- return NULL;
--}
--
--static void sk_prot_free(struct proto *prot, struct sock *sk)
--{
-- struct kmem_cache *slab;
-- struct module *owner;
--
-- owner = prot->owner;
-- slab = prot->slab;
--
-- security_sk_free(sk);
-- if (slab != NULL)
-- kmem_cache_free(slab, sk);
-- else
-- kfree(sk);
-- module_put(owner);
--}
--
--/**
-- * sk_alloc - All socket objects are allocated here
-- * @net: the applicable net namespace
-- * @family: protocol family
-- * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
-- * @prot: struct proto associated with this new sock instance
-- */
--struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
-- struct proto *prot)
--{
-- struct sock *sk;
--
-- sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
-- if (sk) {
-- sk->sk_family = family;
-- /*
-- * See comment in struct sock definition to understand
-- * why we need sk_prot_creator -acme
-- */
-- sk->sk_prot = sk->sk_prot_creator = prot;
-- sock_lock_init(sk);
-- sock_net_set(sk, get_net(net));
-- }
--
-- return sk;
--}
--
--void sk_free(struct sock *sk)
--{
-- struct sk_filter *filter;
--
-- if (sk->sk_destruct)
-- sk->sk_destruct(sk);
--
-- filter = rcu_dereference(sk->sk_filter);
-- if (filter) {
-- sk_filter_uncharge(sk, filter);
-- rcu_assign_pointer(sk->sk_filter, NULL);
-- }
--
-- sock_disable_timestamp(sk);
--
-- if (atomic_read(&sk->sk_omem_alloc))
-- printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
-- __func__, atomic_read(&sk->sk_omem_alloc));
--
-- put_net(sock_net(sk));
-- vx_sock_dec(sk);
-- clr_vx_info(&sk->sk_vx_info);
-- sk->sk_xid = -1;
-- clr_nx_info(&sk->sk_nx_info);
-- sk->sk_nid = -1;
-- sk_prot_free(sk->sk_prot_creator, sk);
--}
--
--/*
-- * Last sock_put should drop referrence to sk->sk_net. It has already
-- * been dropped in sk_change_net. Taking referrence to stopping namespace
-- * is not an option.
-- * Take referrence to a socket to remove it from hash _alive_ and after that
-- * destroy it in the context of init_net.
-- */
--void sk_release_kernel(struct sock *sk)
--{
-- if (sk == NULL || sk->sk_socket == NULL)
-- return;
--
-- sock_hold(sk);
-- sock_release(sk->sk_socket);
-- release_net(sock_net(sk));
-- sock_net_set(sk, get_net(&init_net));
-- sock_put(sk);
--}
--EXPORT_SYMBOL(sk_release_kernel);
--
--struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
--{
-- struct sock *newsk;
--
-- newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family);
-- if (newsk != NULL) {
-- struct sk_filter *filter;
--
-- sock_copy(newsk, sk);
--
-- /* SANITY */
-- get_net(sock_net(newsk));
-- sock_vx_init(newsk);
-- sock_nx_init(newsk);
-- sk_node_init(&newsk->sk_node);
-- sock_lock_init(newsk);
-- bh_lock_sock(newsk);
-- newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
--
-- atomic_set(&newsk->sk_rmem_alloc, 0);
-- atomic_set(&newsk->sk_wmem_alloc, 0);
-- atomic_set(&newsk->sk_omem_alloc, 0);
-- skb_queue_head_init(&newsk->sk_receive_queue);
-- skb_queue_head_init(&newsk->sk_write_queue);
--#ifdef CONFIG_NET_DMA
-- skb_queue_head_init(&newsk->sk_async_wait_queue);
--#endif
--
-- rwlock_init(&newsk->sk_dst_lock);
-- rwlock_init(&newsk->sk_callback_lock);
-- lockdep_set_class_and_name(&newsk->sk_callback_lock,
-- af_callback_keys + newsk->sk_family,
-- af_family_clock_key_strings[newsk->sk_family]);
--
-- newsk->sk_dst_cache = NULL;
-- newsk->sk_wmem_queued = 0;
-- newsk->sk_forward_alloc = 0;
-- newsk->sk_send_head = NULL;
-- newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
--
-- sock_reset_flag(newsk, SOCK_DONE);
-- skb_queue_head_init(&newsk->sk_error_queue);
--
-- filter = newsk->sk_filter;
-- if (filter != NULL)
-- sk_filter_charge(newsk, filter);
--
-- if (unlikely(xfrm_sk_clone_policy(newsk))) {
-- /* It is still raw copy of parent, so invalidate
-- * destructor and make plain sk_free() */
-- newsk->sk_destruct = NULL;
-- sk_free(newsk);
-- newsk = NULL;
-- goto out;
-- }
--
-- newsk->sk_err = 0;
-- newsk->sk_priority = 0;
-- atomic_set(&newsk->sk_refcnt, 2);
--
-- set_vx_info(&newsk->sk_vx_info, sk->sk_vx_info);
-- newsk->sk_xid = sk->sk_xid;
-- vx_sock_inc(newsk);
-- set_nx_info(&newsk->sk_nx_info, sk->sk_nx_info);
-- newsk->sk_nid = sk->sk_nid;
--
-- /*
-- * Increment the counter in the same struct proto as the master
-- * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
-- * is the same as sk->sk_prot->socks, as this field was copied
-- * with memcpy).
-- *
-- * This _changes_ the previous behaviour, where
-- * tcp_create_openreq_child always was incrementing the
-- * equivalent to tcp_prot->socks (inet_sock_nr), so this have
-- * to be taken into account in all callers. -acme
-- */
-- sk_refcnt_debug_inc(newsk);
-- sk_set_socket(newsk, NULL);
-- newsk->sk_sleep = NULL;
--
-- if (newsk->sk_prot->sockets_allocated)
-- atomic_inc(newsk->sk_prot->sockets_allocated);
-- }
--out:
-- return newsk;
--}
--
--EXPORT_SYMBOL_GPL(sk_clone);
--
--void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
--{
-- __sk_dst_set(sk, dst);
-- sk->sk_route_caps = dst->dev->features;
-- if (sk->sk_route_caps & NETIF_F_GSO)
-- sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
-- if (sk_can_gso(sk)) {
-- if (dst->header_len) {
-- sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
-- } else {
-- sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
-- sk->sk_gso_max_size = dst->dev->gso_max_size;
-- }
-- }
--}
--EXPORT_SYMBOL_GPL(sk_setup_caps);
--
--void __init sk_init(void)
--{
-- if (num_physpages <= 4096) {
-- sysctl_wmem_max = 32767;
-- sysctl_rmem_max = 32767;
-- sysctl_wmem_default = 32767;
-- sysctl_rmem_default = 32767;
-- } else if (num_physpages >= 131072) {
-- sysctl_wmem_max = 131071;
-- sysctl_rmem_max = 131071;
-- }
--}
--
--/*
-- * Simple resource managers for sockets.
-- */
--
--
--/*
-- * Write buffer destructor automatically called from kfree_skb.
-- */
--void sock_wfree(struct sk_buff *skb)
--{
-- struct sock *sk = skb->sk;
--
-- /* In case it might be waiting for more memory. */
-- atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
-- if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE))
-- sk->sk_write_space(sk);
-- sock_put(sk);
--}
--
--/*
-- * Read buffer destructor automatically called from kfree_skb.
-- */
--void sock_rfree(struct sk_buff *skb)
--{
-- struct sock *sk = skb->sk;
--
-- atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
-- sk_mem_uncharge(skb->sk, skb->truesize);
--}
--
--
--int sock_i_uid(struct sock *sk)
--{
-- int uid;
--
-- read_lock(&sk->sk_callback_lock);
-- uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0;
-- read_unlock(&sk->sk_callback_lock);
-- return uid;
--}
--
--unsigned long sock_i_ino(struct sock *sk)
--{
-- unsigned long ino;
--
-- read_lock(&sk->sk_callback_lock);
-- ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
-- read_unlock(&sk->sk_callback_lock);
-- return ino;
--}
--
--/*
-- * Allocate a skb from the socket's send buffer.
-- */
--struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
-- gfp_t priority)
--{
-- if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
-- struct sk_buff * skb = alloc_skb(size, priority);
-- if (skb) {
-- skb_set_owner_w(skb, sk);
-- return skb;
-- }
-- }
-- return NULL;
--}
--
--/*
-- * Allocate a skb from the socket's receive buffer.
-- */
--struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
-- gfp_t priority)
--{
-- if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
-- struct sk_buff *skb = alloc_skb(size, priority);
-- if (skb) {
-- skb_set_owner_r(skb, sk);
-- return skb;
-- }
-- }
-- return NULL;
--}
--
--/*
-- * Allocate a memory block from the socket's option memory buffer.
-- */
--void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
--{
-- if ((unsigned)size <= sysctl_optmem_max &&
-- atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
-- void *mem;
-- /* First do the add, to avoid the race if kmalloc
-- * might sleep.
-- */
-- atomic_add(size, &sk->sk_omem_alloc);
-- mem = kmalloc(size, priority);
-- if (mem)
-- return mem;
-- atomic_sub(size, &sk->sk_omem_alloc);
-- }
-- return NULL;
--}
--
--/*
-- * Free an option memory block.
-- */
--void sock_kfree_s(struct sock *sk, void *mem, int size)
--{
-- kfree(mem);
-- atomic_sub(size, &sk->sk_omem_alloc);
--}
--
--/* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
-- I think, these locks should be removed for datagram sockets.
-- */
--static long sock_wait_for_wmem(struct sock * sk, long timeo)
--{
-- DEFINE_WAIT(wait);
--
-- clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
-- for (;;) {
-- if (!timeo)
-- break;
-- if (signal_pending(current))
-- break;
-- set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
-- prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
-- if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
-- break;
-- if (sk->sk_shutdown & SEND_SHUTDOWN)
-- break;
-- if (sk->sk_err)
-- break;
-- timeo = schedule_timeout(timeo);
-- }
-- finish_wait(sk->sk_sleep, &wait);
-- return timeo;
--}
--
--
--/*
-- * Generic send/receive buffer handlers
-- */
--
--static struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
-- unsigned long header_len,
-- unsigned long data_len,
-- int noblock, int *errcode)
--{
-- struct sk_buff *skb;
-- gfp_t gfp_mask;
-- long timeo;
-- int err;
--
-- gfp_mask = sk->sk_allocation;
-- if (gfp_mask & __GFP_WAIT)
-- gfp_mask |= __GFP_REPEAT;
--
-- timeo = sock_sndtimeo(sk, noblock);
-- while (1) {
-- err = sock_error(sk);
-- if (err != 0)
-- goto failure;
--
-- err = -EPIPE;
-- if (sk->sk_shutdown & SEND_SHUTDOWN)
-- goto failure;
--
-- if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
-- skb = alloc_skb(header_len, gfp_mask);
-- if (skb) {
-- int npages;
-- int i;
--
-- /* No pages, we're done... */
-- if (!data_len)
-- break;
--
-- npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
-- skb->truesize += data_len;
-- skb_shinfo(skb)->nr_frags = npages;
-- for (i = 0; i < npages; i++) {
-- struct page *page;
-- skb_frag_t *frag;
--
-- page = alloc_pages(sk->sk_allocation, 0);
-- if (!page) {
-- err = -ENOBUFS;
-- skb_shinfo(skb)->nr_frags = i;
-- kfree_skb(skb);
-- goto failure;
-- }
--
-- frag = &skb_shinfo(skb)->frags[i];
-- frag->page = page;
-- frag->page_offset = 0;
-- frag->size = (data_len >= PAGE_SIZE ?
-- PAGE_SIZE :
-- data_len);
-- data_len -= PAGE_SIZE;
-- }
--
-- /* Full success... */
-- break;
-- }
-- err = -ENOBUFS;
-- goto failure;
-- }
-- set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
-- set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
-- err = -EAGAIN;
-- if (!timeo)
-- goto failure;
-- if (signal_pending(current))
-- goto interrupted;
-- timeo = sock_wait_for_wmem(sk, timeo);
-- }
--
-- skb_set_owner_w(skb, sk);
-- return skb;
--
--interrupted:
-- err = sock_intr_errno(timeo);
--failure:
-- *errcode = err;
-- return NULL;
--}
--
--struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
-- int noblock, int *errcode)
--{
-- return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
--}
--
--static void __lock_sock(struct sock *sk)
--{
-- DEFINE_WAIT(wait);
--
-- for (;;) {
-- prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
-- TASK_UNINTERRUPTIBLE);
-- spin_unlock_bh(&sk->sk_lock.slock);
-- schedule();
-- spin_lock_bh(&sk->sk_lock.slock);
-- if (!sock_owned_by_user(sk))
-- break;
-- }
-- finish_wait(&sk->sk_lock.wq, &wait);
--}
--
--static void __release_sock(struct sock *sk)
--{
-- struct sk_buff *skb = sk->sk_backlog.head;
--
-- do {
-- sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
-- bh_unlock_sock(sk);
--
-- do {
-- struct sk_buff *next = skb->next;
--
-- skb->next = NULL;
-- sk->sk_backlog_rcv(sk, skb);
--
-- /*
-- * We are in process context here with softirqs
-- * disabled, use cond_resched_softirq() to preempt.
-- * This is safe to do because we've taken the backlog
-- * queue private:
-- */
-- cond_resched_softirq();
--
-- skb = next;
-- } while (skb != NULL);
--
-- bh_lock_sock(sk);
-- } while ((skb = sk->sk_backlog.head) != NULL);
--}
--
--/**
-- * sk_wait_data - wait for data to arrive at sk_receive_queue
-- * @sk: sock to wait on
-- * @timeo: for how long
-- *
-- * Now socket state including sk->sk_err is changed only under lock,
-- * hence we may omit checks after joining wait queue.
-- * We check receive queue before schedule() only as optimization;
-- * it is very likely that release_sock() added new data.
-- */
--int sk_wait_data(struct sock *sk, long *timeo)
--{
-- int rc;
-- DEFINE_WAIT(wait);
--
-- prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
-- set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
-- rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
-- clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
-- finish_wait(sk->sk_sleep, &wait);
-- return rc;
--}
--
--EXPORT_SYMBOL(sk_wait_data);
--
--/**
-- * __sk_mem_schedule - increase sk_forward_alloc and memory_allocated
-- * @sk: socket
-- * @size: memory size to allocate
-- * @kind: allocation type
-- *
-- * If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means
-- * rmem allocation. This function assumes that protocols which have
-- * memory_pressure use sk_wmem_queued as write buffer accounting.
-- */
--int __sk_mem_schedule(struct sock *sk, int size, int kind)
--{
-- struct proto *prot = sk->sk_prot;
-- int amt = sk_mem_pages(size);
-- int allocated;
--
-- sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
-- allocated = atomic_add_return(amt, prot->memory_allocated);
--
-- /* Under limit. */
-- if (allocated <= prot->sysctl_mem[0]) {
-- if (prot->memory_pressure && *prot->memory_pressure)
-- *prot->memory_pressure = 0;
-- return 1;
-- }
--
-- /* Under pressure. */
-- if (allocated > prot->sysctl_mem[1])
-- if (prot->enter_memory_pressure)
-- prot->enter_memory_pressure(sk);
--
-- /* Over hard limit. */
-- if (allocated > prot->sysctl_mem[2])
-- goto suppress_allocation;
--
-- /* guarantee minimum buffer size under pressure */
-- if (kind == SK_MEM_RECV) {
-- if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0])
-- return 1;
-- } else { /* SK_MEM_SEND */
-- if (sk->sk_type == SOCK_STREAM) {
-- if (sk->sk_wmem_queued < prot->sysctl_wmem[0])
-- return 1;
-- } else if (atomic_read(&sk->sk_wmem_alloc) <
-- prot->sysctl_wmem[0])
-- return 1;
-- }
--
-- if (prot->memory_pressure) {
-- if (!*prot->memory_pressure ||
-- prot->sysctl_mem[2] > atomic_read(prot->sockets_allocated) *
-- sk_mem_pages(sk->sk_wmem_queued +
-- atomic_read(&sk->sk_rmem_alloc) +
-- sk->sk_forward_alloc))
-- return 1;
-- }
--
--suppress_allocation:
--
-- if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) {
-- sk_stream_moderate_sndbuf(sk);
--
-- /* Fail only if socket is _under_ its sndbuf.
-- * In this case we cannot block, so that we have to fail.
-- */
-- if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
-- return 1;
-- }
--
-- /* Alas. Undo changes. */
-- sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
-- atomic_sub(amt, prot->memory_allocated);
-- return 0;
--}
--
--EXPORT_SYMBOL(__sk_mem_schedule);
--
--/**
-- * __sk_reclaim - reclaim memory_allocated
-- * @sk: socket
-- */
--void __sk_mem_reclaim(struct sock *sk)
--{
-- struct proto *prot = sk->sk_prot;
--
-- atomic_sub(sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT,
-- prot->memory_allocated);
-- sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1;
--
-- if (prot->memory_pressure && *prot->memory_pressure &&
-- (atomic_read(prot->memory_allocated) < prot->sysctl_mem[0]))
-- *prot->memory_pressure = 0;
--}
--
--EXPORT_SYMBOL(__sk_mem_reclaim);
--
--
--/*
-- * Set of default routines for initialising struct proto_ops when
-- * the protocol does not support a particular function. In certain
-- * cases where it makes no sense for a protocol to have a "do nothing"
-- * function, some default processing is provided.
-- */
--
--int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
--{
-- return -EOPNOTSUPP;
--}
--
--int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
-- int len, int flags)
--{
-- return -EOPNOTSUPP;
--}
--
--int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
--{
-- return -EOPNOTSUPP;
--}
--
--int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
--{
-- return -EOPNOTSUPP;
--}
--
--int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
-- int *len, int peer)
--{
-- return -EOPNOTSUPP;
--}
--
--unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt)
--{
-- return 0;
--}
--
--int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
--{
-- return -EOPNOTSUPP;
--}
--
--int sock_no_listen(struct socket *sock, int backlog)
--{
-- return -EOPNOTSUPP;
--}
--
--int sock_no_shutdown(struct socket *sock, int how)
--{
-- return -EOPNOTSUPP;
--}
--
--int sock_no_setsockopt(struct socket *sock, int level, int optname,
-- char __user *optval, int optlen)
--{
-- return -EOPNOTSUPP;
--}
--
--int sock_no_getsockopt(struct socket *sock, int level, int optname,
-- char __user *optval, int __user *optlen)
--{
-- return -EOPNOTSUPP;
--}
--
--int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
-- size_t len)
--{
-- return -EOPNOTSUPP;
--}
--
--int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
-- size_t len, int flags)
--{
-- return -EOPNOTSUPP;
--}
--
--int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
--{
-- /* Mirror missing mmap method error code */
-- return -ENODEV;
--}
--
--ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
--{
-- ssize_t res;
-- struct msghdr msg = {.msg_flags = flags};
-- struct kvec iov;
-- char *kaddr = kmap(page);
-- iov.iov_base = kaddr + offset;
-- iov.iov_len = size;
-- res = kernel_sendmsg(sock, &msg, &iov, 1, size);
-- kunmap(page);
-- return res;
--}
--
--/*
-- * Default Socket Callbacks
-- */
--
--static void sock_def_wakeup(struct sock *sk)
--{
-- read_lock(&sk->sk_callback_lock);
-- if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
-- wake_up_interruptible_all(sk->sk_sleep);
-- read_unlock(&sk->sk_callback_lock);
--}
--
--static void sock_def_error_report(struct sock *sk)
--{
-- read_lock(&sk->sk_callback_lock);
-- if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
-- wake_up_interruptible(sk->sk_sleep);
-- sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
-- read_unlock(&sk->sk_callback_lock);
--}
--
--static void sock_def_readable(struct sock *sk, int len)
--{
-- read_lock(&sk->sk_callback_lock);
-- if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
-- wake_up_interruptible_sync(sk->sk_sleep);
-- sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
-- read_unlock(&sk->sk_callback_lock);
--}
--
--static void sock_def_write_space(struct sock *sk)
--{
-- read_lock(&sk->sk_callback_lock);
--
-- /* Do not wake up a writer until he can make "significant"
-- * progress. --DaveM
-- */
-- if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
-- if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
-- wake_up_interruptible_sync(sk->sk_sleep);
--
-- /* Should agree with poll, otherwise some programs break */
-- if (sock_writeable(sk))
-- sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
-- }
--
-- read_unlock(&sk->sk_callback_lock);
--}
--
--static void sock_def_destruct(struct sock *sk)
--{
-- kfree(sk->sk_protinfo);
--}
--
--void sk_send_sigurg(struct sock *sk)
--{
-- if (sk->sk_socket && sk->sk_socket->file)
-- if (send_sigurg(&sk->sk_socket->file->f_owner))
-- sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI);
--}
--
--void sk_reset_timer(struct sock *sk, struct timer_list* timer,
-- unsigned long expires)
--{
-- if (!mod_timer(timer, expires))
-- sock_hold(sk);
--}
--
--EXPORT_SYMBOL(sk_reset_timer);
--
--void sk_stop_timer(struct sock *sk, struct timer_list* timer)
--{
-- if (timer_pending(timer) && del_timer(timer))
-- __sock_put(sk);
--}
--
--EXPORT_SYMBOL(sk_stop_timer);
--
--void sock_init_data(struct socket *sock, struct sock *sk)
--{
-- skb_queue_head_init(&sk->sk_receive_queue);
-- skb_queue_head_init(&sk->sk_write_queue);
-- skb_queue_head_init(&sk->sk_error_queue);
--#ifdef CONFIG_NET_DMA
-- skb_queue_head_init(&sk->sk_async_wait_queue);
--#endif
--
-- sk->sk_send_head = NULL;
--
-- init_timer(&sk->sk_timer);
--
-- sk->sk_allocation = GFP_KERNEL;
-- sk->sk_rcvbuf = sysctl_rmem_default;
-- sk->sk_sndbuf = sysctl_wmem_default;
-- sk->sk_state = TCP_CLOSE;
-- sk_set_socket(sk, sock);
--
-- sock_set_flag(sk, SOCK_ZAPPED);
--
-- if (sock) {
-- sk->sk_type = sock->type;
-- sk->sk_sleep = &sock->wait;
-- sock->sk = sk;
-- } else
-- sk->sk_sleep = NULL;
--
-- rwlock_init(&sk->sk_dst_lock);
-- rwlock_init(&sk->sk_callback_lock);
-- lockdep_set_class_and_name(&sk->sk_callback_lock,
-- af_callback_keys + sk->sk_family,
-- af_family_clock_key_strings[sk->sk_family]);
--
-- sk->sk_state_change = sock_def_wakeup;
-- sk->sk_data_ready = sock_def_readable;
-- sk->sk_write_space = sock_def_write_space;
-- sk->sk_error_report = sock_def_error_report;
-- sk->sk_destruct = sock_def_destruct;
--
-- sk->sk_sndmsg_page = NULL;
-- sk->sk_sndmsg_off = 0;
--
-- sk->sk_peercred.pid = 0;
-- sk->sk_peercred.uid = -1;
-- sk->sk_peercred.gid = -1;
-- sk->sk_write_pending = 0;
-- sk->sk_rcvlowat = 1;
-- sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
-- sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
--
-- sk->sk_stamp = ktime_set(-1L, 0);
--
-- set_vx_info(&sk->sk_vx_info, current->vx_info);
-- sk->sk_xid = vx_current_xid();
-- vx_sock_inc(sk);
-- set_nx_info(&sk->sk_nx_info, current->nx_info);
-- sk->sk_nid = nx_current_nid();
-- atomic_set(&sk->sk_refcnt, 1);
-- atomic_set(&sk->sk_drops, 0);
--}
--
--void lock_sock_nested(struct sock *sk, int subclass)
--{
-- might_sleep();
-- spin_lock_bh(&sk->sk_lock.slock);
-- if (sk->sk_lock.owned)
-- __lock_sock(sk);
-- sk->sk_lock.owned = 1;
-- spin_unlock(&sk->sk_lock.slock);
-- /*
-- * The sk_lock has mutex_lock() semantics here:
-- */
-- mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
-- local_bh_enable();
--}
--
--EXPORT_SYMBOL(lock_sock_nested);
--
--void release_sock(struct sock *sk)
--{
-- /*
-- * The sk_lock has mutex_unlock() semantics:
-- */
-- mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
--
-- spin_lock_bh(&sk->sk_lock.slock);
-- if (sk->sk_backlog.tail)
-- __release_sock(sk);
-- sk->sk_lock.owned = 0;
-- if (waitqueue_active(&sk->sk_lock.wq))
-- wake_up(&sk->sk_lock.wq);
-- spin_unlock_bh(&sk->sk_lock.slock);
--}
--EXPORT_SYMBOL(release_sock);
--
--int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
--{
-- struct timeval tv;
-- if (!sock_flag(sk, SOCK_TIMESTAMP))
-- sock_enable_timestamp(sk);
-- tv = ktime_to_timeval(sk->sk_stamp);
-- if (tv.tv_sec == -1)
-- return -ENOENT;
-- if (tv.tv_sec == 0) {
-- sk->sk_stamp = ktime_get_real();
-- tv = ktime_to_timeval(sk->sk_stamp);
-- }
-- return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
--}
--EXPORT_SYMBOL(sock_get_timestamp);
--
--int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
--{
-- struct timespec ts;
-- if (!sock_flag(sk, SOCK_TIMESTAMP))
-- sock_enable_timestamp(sk);
-- ts = ktime_to_timespec(sk->sk_stamp);
-- if (ts.tv_sec == -1)
-- return -ENOENT;
-- if (ts.tv_sec == 0) {
-- sk->sk_stamp = ktime_get_real();
-- ts = ktime_to_timespec(sk->sk_stamp);
-- }
-- return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
--}
--EXPORT_SYMBOL(sock_get_timestampns);
--
--void sock_enable_timestamp(struct sock *sk)
--{
-- if (!sock_flag(sk, SOCK_TIMESTAMP)) {
-- sock_set_flag(sk, SOCK_TIMESTAMP);
-- net_enable_timestamp();
-- }
--}
--
--/*
-- * Get a socket option on an socket.
-- *
-- * FIX: POSIX 1003.1g is very ambiguous here. It states that
-- * asynchronous errors should be reported by getsockopt. We assume
-- * this means if you specify SO_ERROR (otherwise whats the point of it).
-- */
--int sock_common_getsockopt(struct socket *sock, int level, int optname,
-- char __user *optval, int __user *optlen)
--{
-- struct sock *sk = sock->sk;
--
-- return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
--}
--
--EXPORT_SYMBOL(sock_common_getsockopt);
--
--#ifdef CONFIG_COMPAT
--int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
-- char __user *optval, int __user *optlen)
--{
-- struct sock *sk = sock->sk;
--
-- if (sk->sk_prot->compat_getsockopt != NULL)
-- return sk->sk_prot->compat_getsockopt(sk, level, optname,
-- optval, optlen);
-- return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
--}
--EXPORT_SYMBOL(compat_sock_common_getsockopt);
--#endif
--
--int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
-- struct msghdr *msg, size_t size, int flags)
--{
-- struct sock *sk = sock->sk;
-- int addr_len = 0;
-- int err;
--
-- err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
-- flags & ~MSG_DONTWAIT, &addr_len);
-- if (err >= 0)
-- msg->msg_namelen = addr_len;
-- return err;
--}
--
--EXPORT_SYMBOL(sock_common_recvmsg);
--
--/*
-- * Set socket options on an inet socket.
-- */
--int sock_common_setsockopt(struct socket *sock, int level, int optname,
-- char __user *optval, int optlen)
--{
-- struct sock *sk = sock->sk;
--
-- return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
--}
--
--EXPORT_SYMBOL(sock_common_setsockopt);
--
--#ifdef CONFIG_COMPAT
--int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
-- char __user *optval, int optlen)
--{
-- struct sock *sk = sock->sk;
--
-- if (sk->sk_prot->compat_setsockopt != NULL)
-- return sk->sk_prot->compat_setsockopt(sk, level, optname,
-- optval, optlen);
-- return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
--}
--EXPORT_SYMBOL(compat_sock_common_setsockopt);
--#endif
--
--void sk_common_release(struct sock *sk)
--{
-- if (sk->sk_prot->destroy)
-- sk->sk_prot->destroy(sk);
--
-- /*
-- * Observation: when sock_common_release is called, processes have
-- * no access to socket. But net still has.
-- * Step one, detach it from networking:
-- *
-- * A. Remove from hash tables.
-- */
--
-- sk->sk_prot->unhash(sk);
--
-- /*
-- * In this point socket cannot receive new packets, but it is possible
-- * that some packets are in flight because some CPU runs receiver and
-- * did hash table lookup before we unhashed socket. They will achieve
-- * receive queue and will be purged by socket destructor.
-- *
-- * Also we still have packets pending on receive queue and probably,
-- * our own packets waiting in device queues. sock_destroy will drain
-- * receive queue, but transmitted packets will delay socket destruction
-- * until the last reference will be released.
-- */
--
-- sock_orphan(sk);
--
-- xfrm_sk_free_policy(sk);
--
-- sk_refcnt_debug_release(sk);
-- sock_put(sk);
--}
--
--EXPORT_SYMBOL(sk_common_release);
--
--static DEFINE_RWLOCK(proto_list_lock);
--static LIST_HEAD(proto_list);
--
--#ifdef CONFIG_PROC_FS
--#define PROTO_INUSE_NR 64 /* should be enough for the first time */
--struct prot_inuse {
-- int val[PROTO_INUSE_NR];
--};
--
--static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
--
--#ifdef CONFIG_NET_NS
--void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
--{
-- int cpu = smp_processor_id();
-- per_cpu_ptr(net->core.inuse, cpu)->val[prot->inuse_idx] += val;
--}
--EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
--
--int sock_prot_inuse_get(struct net *net, struct proto *prot)
--{
-- int cpu, idx = prot->inuse_idx;
-- int res = 0;
--
-- for_each_possible_cpu(cpu)
-- res += per_cpu_ptr(net->core.inuse, cpu)->val[idx];
--
-- return res >= 0 ? res : 0;
--}
--EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
--
--static int sock_inuse_init_net(struct net *net)
--{
-- net->core.inuse = alloc_percpu(struct prot_inuse);
-- return net->core.inuse ? 0 : -ENOMEM;
--}
--
--static void sock_inuse_exit_net(struct net *net)
--{
-- free_percpu(net->core.inuse);
--}
--
--static struct pernet_operations net_inuse_ops = {
-- .init = sock_inuse_init_net,
-- .exit = sock_inuse_exit_net,
--};
--
--static __init int net_inuse_init(void)
--{
-- if (register_pernet_subsys(&net_inuse_ops))
-- panic("Cannot initialize net inuse counters");
--
-- return 0;
--}
--
--core_initcall(net_inuse_init);
--#else
--static DEFINE_PER_CPU(struct prot_inuse, prot_inuse);
--
--void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
--{
-- __get_cpu_var(prot_inuse).val[prot->inuse_idx] += val;
--}
--EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
--
--int sock_prot_inuse_get(struct net *net, struct proto *prot)
--{
-- int cpu, idx = prot->inuse_idx;
-- int res = 0;
--
-- for_each_possible_cpu(cpu)
-- res += per_cpu(prot_inuse, cpu).val[idx];
--
-- return res >= 0 ? res : 0;
--}
--EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
--#endif
--
--static void assign_proto_idx(struct proto *prot)
--{
-- prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR);
--
-- if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) {
-- printk(KERN_ERR "PROTO_INUSE_NR exhausted\n");
-- return;
-- }
--
-- set_bit(prot->inuse_idx, proto_inuse_idx);
--}
--
--static void release_proto_idx(struct proto *prot)
--{
-- if (prot->inuse_idx != PROTO_INUSE_NR - 1)
-- clear_bit(prot->inuse_idx, proto_inuse_idx);
--}
--#else
--static inline void assign_proto_idx(struct proto *prot)
--{
--}
--
--static inline void release_proto_idx(struct proto *prot)
--{
--}
--#endif
--
--int proto_register(struct proto *prot, int alloc_slab)
--{
-- char *request_sock_slab_name = NULL;
-- char *timewait_sock_slab_name;
--
-- if (alloc_slab) {
-- prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
-- SLAB_HWCACHE_ALIGN, NULL);
--
-- if (prot->slab == NULL) {
-- printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
-- prot->name);
-- goto out;
-- }
--
-- if (prot->rsk_prot != NULL) {
-- static const char mask[] = "request_sock_%s";
--
-- request_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
-- if (request_sock_slab_name == NULL)
-- goto out_free_sock_slab;
--
-- sprintf(request_sock_slab_name, mask, prot->name);
-- prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name,
-- prot->rsk_prot->obj_size, 0,
-- SLAB_HWCACHE_ALIGN, NULL);
--
-- if (prot->rsk_prot->slab == NULL) {
-- printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n",
-- prot->name);
-- goto out_free_request_sock_slab_name;
-- }
-- }
--
-- if (prot->twsk_prot != NULL) {
-- static const char mask[] = "tw_sock_%s";
--
-- timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
--
-- if (timewait_sock_slab_name == NULL)
-- goto out_free_request_sock_slab;
--
-- sprintf(timewait_sock_slab_name, mask, prot->name);
-- prot->twsk_prot->twsk_slab =
-- kmem_cache_create(timewait_sock_slab_name,
-- prot->twsk_prot->twsk_obj_size,
-- 0, SLAB_HWCACHE_ALIGN,
-- NULL);
-- if (prot->twsk_prot->twsk_slab == NULL)
-- goto out_free_timewait_sock_slab_name;
-- }
-- }
--
-- write_lock(&proto_list_lock);
-- list_add(&prot->node, &proto_list);
-- assign_proto_idx(prot);
-- write_unlock(&proto_list_lock);
-- return 0;
--
--out_free_timewait_sock_slab_name:
-- kfree(timewait_sock_slab_name);
--out_free_request_sock_slab:
-- if (prot->rsk_prot && prot->rsk_prot->slab) {
-- kmem_cache_destroy(prot->rsk_prot->slab);
-- prot->rsk_prot->slab = NULL;
-- }
--out_free_request_sock_slab_name:
-- kfree(request_sock_slab_name);
--out_free_sock_slab:
-- kmem_cache_destroy(prot->slab);
-- prot->slab = NULL;
--out:
-- return -ENOBUFS;
--}
--
--EXPORT_SYMBOL(proto_register);
--
--void proto_unregister(struct proto *prot)
--{
-- write_lock(&proto_list_lock);
-- release_proto_idx(prot);
-- list_del(&prot->node);
-- write_unlock(&proto_list_lock);
--
-- if (prot->slab != NULL) {
-- kmem_cache_destroy(prot->slab);
-- prot->slab = NULL;
-- }
--
-- if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
-- const char *name = kmem_cache_name(prot->rsk_prot->slab);
--
-- kmem_cache_destroy(prot->rsk_prot->slab);
-- kfree(name);
-- prot->rsk_prot->slab = NULL;
-- }
--
-- if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
-- const char *name = kmem_cache_name(prot->twsk_prot->twsk_slab);
--
-- kmem_cache_destroy(prot->twsk_prot->twsk_slab);
-- kfree(name);
-- prot->twsk_prot->twsk_slab = NULL;
-- }
--}
--
--EXPORT_SYMBOL(proto_unregister);
--
--#ifdef CONFIG_PROC_FS
--static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
-- __acquires(proto_list_lock)
--{
-- read_lock(&proto_list_lock);
-- return seq_list_start_head(&proto_list, *pos);
--}
--
--static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
--{
-- return seq_list_next(v, &proto_list, pos);
--}
--
--static void proto_seq_stop(struct seq_file *seq, void *v)
-- __releases(proto_list_lock)
--{
-- read_unlock(&proto_list_lock);
--}
--
--static char proto_method_implemented(const void *method)
--{
-- return method == NULL ? 'n' : 'y';
--}
--
--static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
--{
-- seq_printf(seq, "%-9s %4u %6d %6d %-3s %6u %-3s %-10s "
-- "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
-- proto->name,
-- proto->obj_size,
-- proto->sockets_allocated != NULL ? atomic_read(proto->sockets_allocated) : -1,
-- proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
-- proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
-- proto->max_header,
-- proto->slab == NULL ? "no" : "yes",
-- module_name(proto->owner),
-- proto_method_implemented(proto->close),
-- proto_method_implemented(proto->connect),
-- proto_method_implemented(proto->disconnect),
-- proto_method_implemented(proto->accept),
-- proto_method_implemented(proto->ioctl),
-- proto_method_implemented(proto->init),
-- proto_method_implemented(proto->destroy),
-- proto_method_implemented(proto->shutdown),
-- proto_method_implemented(proto->setsockopt),
-- proto_method_implemented(proto->getsockopt),
-- proto_method_implemented(proto->sendmsg),
-- proto_method_implemented(proto->recvmsg),
-- proto_method_implemented(proto->sendpage),
-- proto_method_implemented(proto->bind),
-- proto_method_implemented(proto->backlog_rcv),
-- proto_method_implemented(proto->hash),
-- proto_method_implemented(proto->unhash),
-- proto_method_implemented(proto->get_port),
-- proto_method_implemented(proto->enter_memory_pressure));
--}
--
--static int proto_seq_show(struct seq_file *seq, void *v)
--{
-- if (v == &proto_list)
-- seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
-- "protocol",
-- "size",
-- "sockets",
-- "memory",
-- "press",
-- "maxhdr",
-- "slab",
-- "module",
-- "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
-- else
-- proto_seq_printf(seq, list_entry(v, struct proto, node));
-- return 0;
--}
--
--static const struct seq_operations proto_seq_ops = {
-- .start = proto_seq_start,
-- .next = proto_seq_next,
-- .stop = proto_seq_stop,
-- .show = proto_seq_show,
--};
--
--static int proto_seq_open(struct inode *inode, struct file *file)
--{
-- return seq_open(file, &proto_seq_ops);
--}
--
--static const struct file_operations proto_seq_fops = {
-- .owner = THIS_MODULE,
-- .open = proto_seq_open,
-- .read = seq_read,
-- .llseek = seq_lseek,
-- .release = seq_release,
--};
--
--static int __init proto_init(void)
--{
-- /* register /proc/net/protocols */
-- return proc_net_fops_create(&init_net, "protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0;
--}
--
--subsys_initcall(proto_init);
--
--#endif /* PROC_FS */
--
--EXPORT_SYMBOL(sk_alloc);
--EXPORT_SYMBOL(sk_free);
--EXPORT_SYMBOL(sk_send_sigurg);
--EXPORT_SYMBOL(sock_alloc_send_skb);
--EXPORT_SYMBOL(sock_init_data);
--EXPORT_SYMBOL(sock_kfree_s);
--EXPORT_SYMBOL(sock_kmalloc);
--EXPORT_SYMBOL(sock_no_accept);
--EXPORT_SYMBOL(sock_no_bind);
--EXPORT_SYMBOL(sock_no_connect);
--EXPORT_SYMBOL(sock_no_getname);
--EXPORT_SYMBOL(sock_no_getsockopt);
--EXPORT_SYMBOL(sock_no_ioctl);
--EXPORT_SYMBOL(sock_no_listen);
--EXPORT_SYMBOL(sock_no_mmap);
--EXPORT_SYMBOL(sock_no_poll);
--EXPORT_SYMBOL(sock_no_recvmsg);
--EXPORT_SYMBOL(sock_no_sendmsg);
--EXPORT_SYMBOL(sock_no_sendpage);
--EXPORT_SYMBOL(sock_no_setsockopt);
--EXPORT_SYMBOL(sock_no_shutdown);
--EXPORT_SYMBOL(sock_no_socketpair);
--EXPORT_SYMBOL(sock_rfree);
--EXPORT_SYMBOL(sock_setsockopt);
--EXPORT_SYMBOL(sock_wfree);
--EXPORT_SYMBOL(sock_wmalloc);
--EXPORT_SYMBOL(sock_i_uid);
--EXPORT_SYMBOL(sock_i_ino);
--EXPORT_SYMBOL(sysctl_optmem_max);
-diff -Nurb linux-2.6.27-524/net/ipv4/udp.c.orig linux-2.6.27-525/net/ipv4/udp.c.orig
---- linux-2.6.27-524/net/ipv4/udp.c.orig 2009-12-04 16:03:48.000000000 -0500
-+++ linux-2.6.27-525/net/ipv4/udp.c.orig 1969-12-31 19:00:00.000000000 -0500
-@@ -1,1766 +0,0 @@
--/*
-- * INET An implementation of the TCP/IP protocol suite for the LINUX
-- * operating system. INET is implemented using the BSD Socket
-- * interface as the means of communication with the user level.
-- *
-- * The User Datagram Protocol (UDP).
-- *
-- * Authors: Ross Biro
-- * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
-- * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
-- * Alan Cox, <Alan.Cox@linux.org>
-- * Hirokazu Takahashi, <taka@valinux.co.jp>
-- *
-- * Fixes:
-- * Alan Cox : verify_area() calls
-- * Alan Cox : stopped close while in use off icmp
-- * messages. Not a fix but a botch that
-- * for udp at least is 'valid'.
-- * Alan Cox : Fixed icmp handling properly
-- * Alan Cox : Correct error for oversized datagrams
-- * Alan Cox : Tidied select() semantics.
-- * Alan Cox : udp_err() fixed properly, also now
-- * select and read wake correctly on errors
-- * Alan Cox : udp_send verify_area moved to avoid mem leak
-- * Alan Cox : UDP can count its memory
-- * Alan Cox : send to an unknown connection causes
-- * an ECONNREFUSED off the icmp, but
-- * does NOT close.
-- * Alan Cox : Switched to new sk_buff handlers. No more backlog!
-- * Alan Cox : Using generic datagram code. Even smaller and the PEEK
-- * bug no longer crashes it.
-- * Fred Van Kempen : Net2e support for sk->broadcast.
-- * Alan Cox : Uses skb_free_datagram
-- * Alan Cox : Added get/set sockopt support.
-- * Alan Cox : Broadcasting without option set returns EACCES.
-- * Alan Cox : No wakeup calls. Instead we now use the callbacks.
-- * Alan Cox : Use ip_tos and ip_ttl
-- * Alan Cox : SNMP Mibs
-- * Alan Cox : MSG_DONTROUTE, and 0.0.0.0 support.
-- * Matt Dillon : UDP length checks.
-- * Alan Cox : Smarter af_inet used properly.
-- * Alan Cox : Use new kernel side addressing.
-- * Alan Cox : Incorrect return on truncated datagram receive.
-- * Arnt Gulbrandsen : New udp_send and stuff
-- * Alan Cox : Cache last socket
-- * Alan Cox : Route cache
-- * Jon Peatfield : Minor efficiency fix to sendto().
-- * Mike Shaver : RFC1122 checks.
-- * Alan Cox : Nonblocking error fix.
-- * Willy Konynenberg : Transparent proxying support.
-- * Mike McLagan : Routing by source
-- * David S. Miller : New socket lookup architecture.
-- * Last socket cache retained as it
-- * does have a high hit rate.
-- * Olaf Kirch : Don't linearise iovec on sendmsg.
-- * Andi Kleen : Some cleanups, cache destination entry
-- * for connect.
-- * Vitaly E. Lavrov : Transparent proxy revived after year coma.
-- * Melvin Smith : Check msg_name not msg_namelen in sendto(),
-- * return ENOTCONN for unconnected sockets (POSIX)
-- * Janos Farkas : don't deliver multi/broadcasts to a different
-- * bound-to-device socket
-- * Hirokazu Takahashi : HW checksumming for outgoing UDP
-- * datagrams.
-- * Hirokazu Takahashi : sendfile() on UDP works now.
-- * Arnaldo C. Melo : convert /proc/net/udp to seq_file
-- * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
-- * Alexey Kuznetsov: allow both IPv4 and IPv6 sockets to bind
-- * a single port at the same time.
-- * Derek Atkins <derek@ihtfp.com>: Add Encapulation Support
-- * James Chapman : Add L2TP encapsulation type.
-- *
-- *
-- * This program is free software; you can redistribute it and/or
-- * modify it under the terms of the GNU General Public License
-- * as published by the Free Software Foundation; either version
-- * 2 of the License, or (at your option) any later version.
-- */
--
--#include <asm/system.h>
--#include <asm/uaccess.h>
--#include <asm/ioctls.h>
--#include <linux/bootmem.h>
--#include <linux/types.h>
--#include <linux/fcntl.h>
--#include <linux/module.h>
--#include <linux/socket.h>
--#include <linux/sockios.h>
--#include <linux/igmp.h>
--#include <linux/in.h>
--#include <linux/errno.h>
--#include <linux/timer.h>
--#include <linux/mm.h>
--#include <linux/inet.h>
--#include <linux/netdevice.h>
--#include <net/tcp_states.h>
--#include <linux/skbuff.h>
--#include <linux/proc_fs.h>
--#include <linux/seq_file.h>
--#include <net/net_namespace.h>
--#include <net/icmp.h>
--#include <net/route.h>
--#include <net/checksum.h>
--#include <net/xfrm.h>
--#include "udp_impl.h"
--
--/*
-- * Snmp MIB for the UDP layer
-- */
--
--DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly;
--EXPORT_SYMBOL(udp_stats_in6);
--
--struct hlist_head udp_hash[UDP_HTABLE_SIZE];
--DEFINE_RWLOCK(udp_hash_lock);
--
--int sysctl_udp_mem[3] __read_mostly;
--int sysctl_udp_rmem_min __read_mostly;
--int sysctl_udp_wmem_min __read_mostly;
--
--EXPORT_SYMBOL(sysctl_udp_mem);
--EXPORT_SYMBOL(sysctl_udp_rmem_min);
--EXPORT_SYMBOL(sysctl_udp_wmem_min);
--
--atomic_t udp_memory_allocated;
--EXPORT_SYMBOL(udp_memory_allocated);
--
--static inline int __udp_lib_lport_inuse(struct net *net, __u16 num,
-- const struct hlist_head udptable[])
--{
-- struct sock *sk;
-- struct hlist_node *node;
--
-- sk_for_each(sk, node, &udptable[udp_hashfn(net, num)])
-- if (net_eq(sock_net(sk), net) && sk->sk_hash == num)
-- return 1;
-- return 0;
--}
--
--/**
-- * udp_lib_get_port - UDP/-Lite port lookup for IPv4 and IPv6
-- *
-- * @sk: socket struct in question
-- * @snum: port number to look up
-- * @saddr_comp: AF-dependent comparison of bound local IP addresses
-- */
--int udp_lib_get_port(struct sock *sk, unsigned short snum,
-- int (*saddr_comp)(const struct sock *sk1,
-- const struct sock *sk2 ) )
--{
-- struct hlist_head *udptable = sk->sk_prot->h.udp_hash;
-- struct hlist_node *node;
-- struct hlist_head *head;
-- struct sock *sk2;
-- int error = 1;
-- struct net *net = sock_net(sk);
--
-- write_lock_bh(&udp_hash_lock);
--
-- if (!snum) {
-- int i, low, high, remaining;
-- unsigned rover, best, best_size_so_far;
--
-- inet_get_local_port_range(&low, &high);
-- remaining = (high - low) + 1;
--
-- best_size_so_far = UINT_MAX;
-- best = rover = net_random() % remaining + low;
--
-- /* 1st pass: look for empty (or shortest) hash chain */
-- for (i = 0; i < UDP_HTABLE_SIZE; i++) {
-- int size = 0;
--
-- head = &udptable[udp_hashfn(net, rover)];
-- if (hlist_empty(head))
-- goto gotit;
--
-- sk_for_each(sk2, node, head) {
-- if (++size >= best_size_so_far)
-- goto next;
-- }
-- best_size_so_far = size;
-- best = rover;
-- next:
-- /* fold back if end of range */
-- if (++rover > high)
-- rover = low + ((rover - low)
-- & (UDP_HTABLE_SIZE - 1));
--
--
-- }
--
-- /* 2nd pass: find hole in shortest hash chain */
-- rover = best;
-- for (i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++) {
-- if (! __udp_lib_lport_inuse(net, rover, udptable))
-- goto gotit;
-- rover += UDP_HTABLE_SIZE;
-- if (rover > high)
-- rover = low + ((rover - low)
-- & (UDP_HTABLE_SIZE - 1));
-- }
--
--
-- /* All ports in use! */
-- goto fail;
--
--gotit:
-- snum = rover;
-- } else {
-- head = &udptable[udp_hashfn(net, snum)];
--
-- sk_for_each(sk2, node, head)
-- if (sk2->sk_hash == snum &&
-- sk2 != sk &&
-- net_eq(sock_net(sk2), net) &&
-- (!sk2->sk_reuse || !sk->sk_reuse) &&
-- (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if
-- || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
-- (*saddr_comp)(sk, sk2) )
-- goto fail;
-- }
--
-- inet_sk(sk)->num = snum;
-- sk->sk_hash = snum;
-- if (sk_unhashed(sk)) {
-- head = &udptable[udp_hashfn(net, snum)];
-- sk_add_node(sk, head);
-- sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
-- }
-- error = 0;
--fail:
-- write_unlock_bh(&udp_hash_lock);
-- return error;
--}
--
--extern int ipv4_rcv_saddr_equal(const struct sock *, const struct sock *);
--
--int udp_v4_get_port(struct sock *sk, unsigned short snum)
--{
-- return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal);
--}
--
--
--/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
-- * harder than this. -DaveM
-- */
--static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
-- __be16 sport, __be32 daddr, __be16 dport,
-- int dif, struct hlist_head udptable[])
--{
-- struct sock *sk, *result = NULL;
-- struct hlist_node *node;
-- unsigned short hnum = ntohs(dport);
-- int badness = -1;
--
-- read_lock(&udp_hash_lock);
-- sk_for_each(sk, node, &udptable[udp_hashfn(net, hnum)]) {
-- struct inet_sock *inet = inet_sk(sk);
--
-- if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum &&
-- !ipv6_only_sock(sk)) {
-- int score = (sk->sk_family == PF_INET ? 1 : 0);
--
-- if (inet->rcv_saddr) {
-- if (inet->rcv_saddr != daddr)
-- continue;
-- score+=2;
-- } else {
-- /* block non nx_info ips */
-- if (!v4_addr_in_nx_info(sk->sk_nx_info,
-- daddr, NXA_MASK_BIND))
-- continue;
-- }
-- if (inet->daddr) {
-- if (inet->daddr != saddr)
-- continue;
-- score+=2;
-- }
-- if (inet->dport) {
-- if (inet->dport != sport)
-- continue;
-- score+=2;
-- }
-- if (sk->sk_bound_dev_if) {
-- if (sk->sk_bound_dev_if != dif)
-- continue;
-- score+=2;
-- }
-- if (score == 9) {
-- result = sk;
-- break;
-- } else if (score > badness) {
-- result = sk;
-- badness = score;
-- }
-- }
-- }
--
-- if (result)
-- sock_hold(result);
-- read_unlock(&udp_hash_lock);
-- return result;
--}
--
--static inline struct sock *udp_v4_mcast_next(struct net *net, struct sock *sk,
-- __be16 loc_port, __be32 loc_addr,
-- __be16 rmt_port, __be32 rmt_addr,
-- int dif)
--{
-- struct hlist_node *node;
-- struct sock *s = sk;
-- unsigned short hnum = ntohs(loc_port);
--
-- sk_for_each_from(s, node) {
-- struct inet_sock *inet = inet_sk(s);
--
-- if (!net_eq(sock_net(s), net) ||
-- s->sk_hash != hnum ||
-- (inet->daddr && inet->daddr != rmt_addr) ||
-- (inet->dport != rmt_port && inet->dport) ||
-- !v4_sock_addr_match(sk->sk_nx_info, inet, loc_addr) ||
-- ipv6_only_sock(s) ||
-- (s->sk_bound_dev_if && s->sk_bound_dev_if != dif))
-- continue;
-- if (!ip_mc_sf_allow(s, loc_addr, rmt_addr, dif))
-- continue;
-- goto found;
-- }
-- s = NULL;
--found:
-- return s;
--}
--
--/*
-- * This routine is called by the ICMP module when it gets some
-- * sort of error condition. If err < 0 then the socket should
-- * be closed and the error returned to the user. If err > 0
-- * it's just the icmp type << 8 | icmp code.
-- * Header points to the ip header of the error packet. We move
-- * on past this. Then (as it used to claim before adjustment)
-- * header points to the first 8 bytes of the udp header. We need
-- * to find the appropriate port.
-- */
--
--void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[])
--{
-- struct inet_sock *inet;
-- struct iphdr *iph = (struct iphdr*)skb->data;
-- struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2));
-- const int type = icmp_hdr(skb)->type;
-- const int code = icmp_hdr(skb)->code;
-- struct sock *sk;
-- int harderr;
-- int err;
-- struct net *net = dev_net(skb->dev);
--
-- sk = __udp4_lib_lookup(net, iph->daddr, uh->dest,
-- iph->saddr, uh->source, skb->dev->ifindex, udptable);
-- if (sk == NULL) {
-- ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
-- return; /* No socket for error */
-- }
--
-- err = 0;
-- harderr = 0;
-- inet = inet_sk(sk);
--
-- switch (type) {
-- default:
-- case ICMP_TIME_EXCEEDED:
-- err = EHOSTUNREACH;
-- break;
-- case ICMP_SOURCE_QUENCH:
-- goto out;
-- case ICMP_PARAMETERPROB:
-- err = EPROTO;
-- harderr = 1;
-- break;
-- case ICMP_DEST_UNREACH:
-- if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
-- if (inet->pmtudisc != IP_PMTUDISC_DONT) {
-- err = EMSGSIZE;
-- harderr = 1;
-- break;
-- }
-- goto out;
-- }
-- err = EHOSTUNREACH;
-- if (code <= NR_ICMP_UNREACH) {
-- harderr = icmp_err_convert[code].fatal;
-- err = icmp_err_convert[code].errno;
-- }
-- break;
-- }
--
-- /*
-- * RFC1122: OK. Passes ICMP errors back to application, as per
-- * 4.1.3.3.
-- */
-- if (!inet->recverr) {
-- if (!harderr || sk->sk_state != TCP_ESTABLISHED)
-- goto out;
-- } else {
-- ip_icmp_error(sk, skb, err, uh->dest, info, (u8*)(uh+1));
-- }
-- sk->sk_err = err;
-- sk->sk_error_report(sk);
--out:
-- sock_put(sk);
--}
--
--void udp_err(struct sk_buff *skb, u32 info)
--{
-- __udp4_lib_err(skb, info, udp_hash);
--}
--
--/*
-- * Throw away all pending data and cancel the corking. Socket is locked.
-- */
--void udp_flush_pending_frames(struct sock *sk)
--{
-- struct udp_sock *up = udp_sk(sk);
--
-- if (up->pending) {
-- up->len = 0;
-- up->pending = 0;
-- ip_flush_pending_frames(sk);
-- }
--}
--EXPORT_SYMBOL(udp_flush_pending_frames);
--
--/**
-- * udp4_hwcsum_outgoing - handle outgoing HW checksumming
-- * @sk: socket we are sending on
-- * @skb: sk_buff containing the filled-in UDP header
-- * (checksum field must be zeroed out)
-- */
--static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
-- __be32 src, __be32 dst, int len )
--{
-- unsigned int offset;
-- struct udphdr *uh = udp_hdr(skb);
-- __wsum csum = 0;
--
-- if (skb_queue_len(&sk->sk_write_queue) == 1) {
-- /*
-- * Only one fragment on the socket.
-- */
-- skb->csum_start = skb_transport_header(skb) - skb->head;
-- skb->csum_offset = offsetof(struct udphdr, check);
-- uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0);
-- } else {
-- /*
-- * HW-checksum won't work as there are two or more
-- * fragments on the socket so that all csums of sk_buffs
-- * should be together
-- */
-- offset = skb_transport_offset(skb);
-- skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
--
-- skb->ip_summed = CHECKSUM_NONE;
--
-- skb_queue_walk(&sk->sk_write_queue, skb) {
-- csum = csum_add(csum, skb->csum);
-- }
--
-- uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum);
-- if (uh->check == 0)
-- uh->check = CSUM_MANGLED_0;
-- }
--}
--
--/*
-- * Push out all pending data as one UDP datagram. Socket is locked.
-- */
--static int udp_push_pending_frames(struct sock *sk)
--{
-- struct udp_sock *up = udp_sk(sk);
-- struct inet_sock *inet = inet_sk(sk);
-- struct flowi *fl = &inet->cork.fl;
-- struct sk_buff *skb;
-- struct udphdr *uh;
-- int err = 0;
-- int is_udplite = IS_UDPLITE(sk);
-- __wsum csum = 0;
--
-- /* Grab the skbuff where UDP header space exists. */
-- if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
-- goto out;
--
-- /*
-- * Create a UDP header
-- */
-- uh = udp_hdr(skb);
-- uh->source = fl->fl_ip_sport;
-- uh->dest = fl->fl_ip_dport;
-- uh->len = htons(up->len);
-- uh->check = 0;
--
-- if (is_udplite) /* UDP-Lite */
-- csum = udplite_csum_outgoing(sk, skb);
--
-- else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */
--
-- skb->ip_summed = CHECKSUM_NONE;
-- goto send;
--
-- } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
--
-- udp4_hwcsum_outgoing(sk, skb, fl->fl4_src,fl->fl4_dst, up->len);
-- goto send;
--
-- } else /* `normal' UDP */
-- csum = udp_csum_outgoing(sk, skb);
--
-- /* add protocol-dependent pseudo-header */
-- uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len,
-- sk->sk_protocol, csum );
-- if (uh->check == 0)
-- uh->check = CSUM_MANGLED_0;
--
--send:
-- err = ip_push_pending_frames(sk);
--out:
-- up->len = 0;
-- up->pending = 0;
-- if (!err)
-- UDP_INC_STATS_USER(sock_net(sk),
-- UDP_MIB_OUTDATAGRAMS, is_udplite);
-- return err;
--}
--
--int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
-- size_t len)
--{
-- struct inet_sock *inet = inet_sk(sk);
-- struct udp_sock *up = udp_sk(sk);
-- int ulen = len;
-- struct ipcm_cookie ipc;
-- struct rtable *rt = NULL;
-- int free = 0;
-- int connected = 0;
-- __be32 daddr, faddr, saddr;
-- __be16 dport;
-- u8 tos;
-- int err, is_udplite = IS_UDPLITE(sk);
-- int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
-- int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
--
-- if (len > 0xFFFF)
-- return -EMSGSIZE;
--
-- /*
-- * Check the flags.
-- */
--
-- if (msg->msg_flags&MSG_OOB) /* Mirror BSD error message compatibility */
-- return -EOPNOTSUPP;
--
-- ipc.opt = NULL;
--
-- if (up->pending) {
-- /*
-- * There are pending frames.
-- * The socket lock must be held while it's corked.
-- */
-- lock_sock(sk);
-- if (likely(up->pending)) {
-- if (unlikely(up->pending != AF_INET)) {
-- release_sock(sk);
-- return -EINVAL;
-- }
-- goto do_append_data;
-- }
-- release_sock(sk);
-- }
-- ulen += sizeof(struct udphdr);
--
-- /*
-- * Get and verify the address.
-- */
-- if (msg->msg_name) {
-- struct sockaddr_in * usin = (struct sockaddr_in*)msg->msg_name;
-- if (msg->msg_namelen < sizeof(*usin))
-- return -EINVAL;
-- if (usin->sin_family != AF_INET) {
-- if (usin->sin_family != AF_UNSPEC)
-- return -EAFNOSUPPORT;
-- }
--
-- daddr = usin->sin_addr.s_addr;
-- dport = usin->sin_port;
-- if (dport == 0)
-- return -EINVAL;
-- } else {
-- if (sk->sk_state != TCP_ESTABLISHED)
-- return -EDESTADDRREQ;
-- daddr = inet->daddr;
-- dport = inet->dport;
-- /* Open fast path for connected socket.
-- Route will not be used, if at least one option is set.
-- */
-- connected = 1;
-- }
-- ipc.addr = inet->saddr;
--
-- ipc.oif = sk->sk_bound_dev_if;
-- if (msg->msg_controllen) {
-- err = ip_cmsg_send(sock_net(sk), msg, &ipc);
-- if (err)
-- return err;
-- if (ipc.opt)
-- free = 1;
-- connected = 0;
-- }
-- if (!ipc.opt)
-- ipc.opt = inet->opt;
--
-- saddr = ipc.addr;
-- ipc.addr = faddr = daddr;
--
-- if (ipc.opt && ipc.opt->srr) {
-- if (!daddr)
-- return -EINVAL;
-- faddr = ipc.opt->faddr;
-- connected = 0;
-- }
-- tos = RT_TOS(inet->tos);
-- if (sock_flag(sk, SOCK_LOCALROUTE) ||
-- (msg->msg_flags & MSG_DONTROUTE) ||
-- (ipc.opt && ipc.opt->is_strictroute)) {
-- tos |= RTO_ONLINK;
-- connected = 0;
-- }
--
-- if (ipv4_is_multicast(daddr)) {
-- if (!ipc.oif)
-- ipc.oif = inet->mc_index;
-- if (!saddr)
-- saddr = inet->mc_addr;
-- connected = 0;
-- }
--
-- if (connected)
-- rt = (struct rtable*)sk_dst_check(sk, 0);
--
-- if (rt == NULL) {
-- struct flowi fl = { .oif = ipc.oif,
-- .nl_u = { .ip4_u =
-- { .daddr = faddr,
-- .saddr = saddr,
-- .tos = tos } },
-- .proto = sk->sk_protocol,
-- .uli_u = { .ports =
-- { .sport = inet->sport,
-- .dport = dport } } };
-- struct net *net = sock_net(sk);
-- struct nx_info *nxi = sk->sk_nx_info;
--
-- security_sk_classify_flow(sk, &fl);
-- err = ip_v4_find_src(net, nxi, &rt, &fl);
-- if (err)
-- goto out;
--
-- err = ip_route_output_flow(net, &rt, &fl, sk, 1);
-- if (err) {
-- if (err == -ENETUNREACH)
-- IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
-- goto out;
-- }
--
-- err = -EACCES;
-- if ((rt->rt_flags & RTCF_BROADCAST) &&
-- !sock_flag(sk, SOCK_BROADCAST))
-- goto out;
-- if (connected)
-- sk_dst_set(sk, dst_clone(&rt->u.dst));
-- }
--
-- if (msg->msg_flags&MSG_CONFIRM)
-- goto do_confirm;
--back_from_confirm:
--
-- saddr = rt->rt_src;
-- if (!ipc.addr)
-- daddr = ipc.addr = rt->rt_dst;
--
-- lock_sock(sk);
-- if (unlikely(up->pending)) {
-- /* The socket is already corked while preparing it. */
-- /* ... which is an evident application bug. --ANK */
-- release_sock(sk);
--
-- LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n");
-- err = -EINVAL;
-- goto out;
-- }
-- /*
-- * Now cork the socket to pend data.
-- */
-- inet->cork.fl.fl4_dst = daddr;
-- inet->cork.fl.fl_ip_dport = dport;
-- inet->cork.fl.fl4_src = saddr;
-- inet->cork.fl.fl_ip_sport = inet->sport;
-- up->pending = AF_INET;
--
--do_append_data:
-- up->len += ulen;
-- getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
-- err = ip_append_data(sk, getfrag, msg->msg_iov, ulen,
-- sizeof(struct udphdr), &ipc, rt,
-- corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
-- if (err)
-- udp_flush_pending_frames(sk);
-- else if (!corkreq)
-- err = udp_push_pending_frames(sk);
-- else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
-- up->pending = 0;
-- release_sock(sk);
--
--out:
-- ip_rt_put(rt);
-- if (free)
-- kfree(ipc.opt);
-- if (!err)
-- return len;
-- /*
-- * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting
-- * ENOBUFS might not be good (it's not tunable per se), but otherwise
-- * we don't have a good statistic (IpOutDiscards but it can be too many
-- * things). We could add another new stat but at least for now that
-- * seems like overkill.
-- */
-- if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
-- UDP_INC_STATS_USER(sock_net(sk),
-- UDP_MIB_SNDBUFERRORS, is_udplite);
-- }
-- return err;
--
--do_confirm:
-- dst_confirm(&rt->u.dst);
-- if (!(msg->msg_flags&MSG_PROBE) || len)
-- goto back_from_confirm;
-- err = 0;
-- goto out;
--}
--
--int udp_sendpage(struct sock *sk, struct page *page, int offset,
-- size_t size, int flags)
--{
-- struct udp_sock *up = udp_sk(sk);
-- int ret;
--
-- if (!up->pending) {
-- struct msghdr msg = { .msg_flags = flags|MSG_MORE };
--
-- /* Call udp_sendmsg to specify destination address which
-- * sendpage interface can't pass.
-- * This will succeed only when the socket is connected.
-- */
-- ret = udp_sendmsg(NULL, sk, &msg, 0);
-- if (ret < 0)
-- return ret;
-- }
--
-- lock_sock(sk);
--
-- if (unlikely(!up->pending)) {
-- release_sock(sk);
--
-- LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 3\n");
-- return -EINVAL;
-- }
--
-- ret = ip_append_page(sk, page, offset, size, flags);
-- if (ret == -EOPNOTSUPP) {
-- release_sock(sk);
-- return sock_no_sendpage(sk->sk_socket, page, offset,
-- size, flags);
-- }
-- if (ret < 0) {
-- udp_flush_pending_frames(sk);
-- goto out;
-- }
--
-- up->len += size;
-- if (!(up->corkflag || (flags&MSG_MORE)))
-- ret = udp_push_pending_frames(sk);
-- if (!ret)
-- ret = size;
--out:
-- release_sock(sk);
-- return ret;
--}
--
--/*
-- * IOCTL requests applicable to the UDP protocol
-- */
--
--int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
--{
-- switch (cmd) {
-- case SIOCOUTQ:
-- {
-- int amount = atomic_read(&sk->sk_wmem_alloc);
-- return put_user(amount, (int __user *)arg);
-- }
--
-- case SIOCINQ:
-- {
-- struct sk_buff *skb;
-- unsigned long amount;
--
-- amount = 0;
-- spin_lock_bh(&sk->sk_receive_queue.lock);
-- skb = skb_peek(&sk->sk_receive_queue);
-- if (skb != NULL) {
-- /*
-- * We will only return the amount
-- * of this packet since that is all
-- * that will be read.
-- */
-- amount = skb->len - sizeof(struct udphdr);
-- }
-- spin_unlock_bh(&sk->sk_receive_queue.lock);
-- return put_user(amount, (int __user *)arg);
-- }
--
-- default:
-- return -ENOIOCTLCMD;
-- }
--
-- return 0;
--}
--
--/*
-- * This should be easy, if there is something there we
-- * return it, otherwise we block.
-- */
--
--int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
-- size_t len, int noblock, int flags, int *addr_len)
--{
-- struct inet_sock *inet = inet_sk(sk);
-- struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
-- struct sk_buff *skb;
-- unsigned int ulen, copied;
-- int peeked;
-- int err;
-- int is_udplite = IS_UDPLITE(sk);
--
-- /*
-- * Check any passed addresses
-- */
-- if (addr_len)
-- *addr_len=sizeof(*sin);
--
-- if (flags & MSG_ERRQUEUE)
-- return ip_recv_error(sk, msg, len);
--
--try_again:
-- skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
-- &peeked, &err);
-- if (!skb)
-- goto out;
--
-- ulen = skb->len - sizeof(struct udphdr);
-- copied = len;
-- if (copied > ulen)
-- copied = ulen;
-- else if (copied < ulen)
-- msg->msg_flags |= MSG_TRUNC;
--
-- /*
-- * If checksum is needed at all, try to do it while copying the
-- * data. If the data is truncated, or if we only want a partial
-- * coverage checksum (UDP-Lite), do it before the copy.
-- */
--
-- if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) {
-- if (udp_lib_checksum_complete(skb))
-- goto csum_copy_err;
-- }
--
-- if (skb_csum_unnecessary(skb))
-- err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
-- msg->msg_iov, copied );
-- else {
-- err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov);
--
-- if (err == -EINVAL)
-- goto csum_copy_err;
-- }
--
-- if (err)
-- goto out_free;
--
-- if (!peeked)
-- UDP_INC_STATS_USER(sock_net(sk),
-- UDP_MIB_INDATAGRAMS, is_udplite);
--
-- sock_recv_timestamp(msg, sk, skb);
--
-- /* Copy the address. */
-- if (sin)
-- {
-- sin->sin_family = AF_INET;
-- sin->sin_port = udp_hdr(skb)->source;
-- sin->sin_addr.s_addr = nx_map_sock_lback(
-- skb->sk->sk_nx_info, ip_hdr(skb)->saddr);
-- memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
-- }
-- if (inet->cmsg_flags)
-- ip_cmsg_recv(msg, skb);
--
-- err = copied;
-- if (flags & MSG_TRUNC)
-- err = ulen;
--
--out_free:
-- lock_sock(sk);
-- skb_free_datagram(sk, skb);
-- release_sock(sk);
--out:
-- return err;
--
--csum_copy_err:
-- lock_sock(sk);
-- if (!skb_kill_datagram(sk, skb, flags))
-- UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
-- release_sock(sk);
--
-- if (noblock)
-- return -EAGAIN;
-- goto try_again;
--}
--
--
--int udp_disconnect(struct sock *sk, int flags)
--{
-- struct inet_sock *inet = inet_sk(sk);
-- /*
-- * 1003.1g - break association.
-- */
--
-- sk->sk_state = TCP_CLOSE;
-- inet->daddr = 0;
-- inet->dport = 0;
-- sk->sk_bound_dev_if = 0;
-- if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
-- inet_reset_saddr(sk);
--
-- if (!(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) {
-- sk->sk_prot->unhash(sk);
-- inet->sport = 0;
-- }
-- sk_dst_reset(sk);
-- return 0;
--}
--
--static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
--{
-- int is_udplite = IS_UDPLITE(sk);
-- int rc;
--
-- if ((rc = sock_queue_rcv_skb(sk, skb)) < 0) {
-- /* Note that an ENOMEM error is charged twice */
-- if (rc == -ENOMEM) {
-- UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS,
-- is_udplite);
-- atomic_inc(&sk->sk_drops);
-- }
-- goto drop;
-- }
--
-- return 0;
--
--drop:
-- UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
-- kfree_skb(skb);
-- return -1;
--}
--
--/* returns:
-- * -1: error
-- * 0: success
-- * >0: "udp encap" protocol resubmission
-- *
-- * Note that in the success and error cases, the skb is assumed to
-- * have either been requeued or freed.
-- */
--int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
--{
-- struct udp_sock *up = udp_sk(sk);
-- int rc;
-- int is_udplite = IS_UDPLITE(sk);
--
-- /*
-- * Charge it to the socket, dropping if the queue is full.
-- */
-- if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
-- goto drop;
-- nf_reset(skb);
--
-- if (up->encap_type) {
-- /*
-- * This is an encapsulation socket so pass the skb to
-- * the socket's udp_encap_rcv() hook. Otherwise, just
-- * fall through and pass this up the UDP socket.
-- * up->encap_rcv() returns the following value:
-- * =0 if skb was successfully passed to the encap
-- * handler or was discarded by it.
-- * >0 if skb should be passed on to UDP.
-- * <0 if skb should be resubmitted as proto -N
-- */
--
-- /* if we're overly short, let UDP handle it */
-- if (skb->len > sizeof(struct udphdr) &&
-- up->encap_rcv != NULL) {
-- int ret;
--
-- ret = (*up->encap_rcv)(sk, skb);
-- if (ret <= 0) {
-- UDP_INC_STATS_BH(sock_net(sk),
-- UDP_MIB_INDATAGRAMS,
-- is_udplite);
-- return -ret;
-- }
-- }
--
-- /* FALLTHROUGH -- it's a UDP Packet */
-- }
--
-- /*
-- * UDP-Lite specific tests, ignored on UDP sockets
-- */
-- if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) {
--
-- /*
-- * MIB statistics other than incrementing the error count are
-- * disabled for the following two types of errors: these depend
-- * on the application settings, not on the functioning of the
-- * protocol stack as such.
-- *
-- * RFC 3828 here recommends (sec 3.3): "There should also be a
-- * way ... to ... at least let the receiving application block
-- * delivery of packets with coverage values less than a value
-- * provided by the application."
-- */
-- if (up->pcrlen == 0) { /* full coverage was set */
-- LIMIT_NETDEBUG(KERN_WARNING "UDPLITE: partial coverage "
-- "%d while full coverage %d requested\n",
-- UDP_SKB_CB(skb)->cscov, skb->len);
-- goto drop;
-- }
-- /* The next case involves violating the min. coverage requested
-- * by the receiver. This is subtle: if receiver wants x and x is
-- * greater than the buffersize/MTU then receiver will complain
-- * that it wants x while sender emits packets of smaller size y.
-- * Therefore the above ...()->partial_cov statement is essential.
-- */
-- if (UDP_SKB_CB(skb)->cscov < up->pcrlen) {
-- LIMIT_NETDEBUG(KERN_WARNING
-- "UDPLITE: coverage %d too small, need min %d\n",
-- UDP_SKB_CB(skb)->cscov, up->pcrlen);
-- goto drop;
-- }
-- }
--
-- if (sk->sk_filter) {
-- if (udp_lib_checksum_complete(skb))
-- goto drop;
-- }
--
-- rc = 0;
--
-- bh_lock_sock(sk);
-- if (!sock_owned_by_user(sk))
-- rc = __udp_queue_rcv_skb(sk, skb);
-- else
-- sk_add_backlog(sk, skb);
-- bh_unlock_sock(sk);
--
-- return rc;
--
--drop:
-- UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
-- kfree_skb(skb);
-- return -1;
--}
--
--/*
-- * Multicasts and broadcasts go to each listener.
-- *
-- * Note: called only from the BH handler context,
-- * so we don't need to lock the hashes.
-- */
--static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
-- struct udphdr *uh,
-- __be32 saddr, __be32 daddr,
-- struct hlist_head udptable[])
--{
-- struct sock *sk;
-- int dif;
--
-- read_lock(&udp_hash_lock);
-- sk = sk_head(&udptable[udp_hashfn(net, ntohs(uh->dest))]);
-- dif = skb->dev->ifindex;
-- sk = udp_v4_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif);
-- if (sk) {
-- struct sock *sknext = NULL;
--
-- do {
-- struct sk_buff *skb1 = skb;
--
-- sknext = udp_v4_mcast_next(net, sk_next(sk), uh->dest,
-- daddr, uh->source, saddr,
-- dif);
-- if (sknext)
-- skb1 = skb_clone(skb, GFP_ATOMIC);
--
-- if (skb1) {
-- int ret = udp_queue_rcv_skb(sk, skb1);
-- if (ret > 0)
-- /* we should probably re-process instead
-- * of dropping packets here. */
-- kfree_skb(skb1);
-- }
-- sk = sknext;
-- } while (sknext);
-- } else
-- kfree_skb(skb);
-- read_unlock(&udp_hash_lock);
-- return 0;
--}
--
--/* Initialize UDP checksum. If exited with zero value (success),
-- * CHECKSUM_UNNECESSARY means, that no more checks are required.
-- * Otherwise, csum completion requires chacksumming packet body,
-- * including udp header and folding it to skb->csum.
-- */
--static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
-- int proto)
--{
-- const struct iphdr *iph;
-- int err;
--
-- UDP_SKB_CB(skb)->partial_cov = 0;
-- UDP_SKB_CB(skb)->cscov = skb->len;
--
-- if (proto == IPPROTO_UDPLITE) {
-- err = udplite_checksum_init(skb, uh);
-- if (err)
-- return err;
-- }
--
-- iph = ip_hdr(skb);
-- if (uh->check == 0) {
-- skb->ip_summed = CHECKSUM_UNNECESSARY;
-- } else if (skb->ip_summed == CHECKSUM_COMPLETE) {
-- if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len,
-- proto, skb->csum))
-- skb->ip_summed = CHECKSUM_UNNECESSARY;
-- }
-- if (!skb_csum_unnecessary(skb))
-- skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
-- skb->len, proto, 0);
-- /* Probably, we should checksum udp header (it should be in cache
-- * in any case) and data in tiny packets (< rx copybreak).
-- */
--
-- return 0;
--}
--
--/*
-- * All we need to do is get the socket, and then do a checksum.
-- */
--
--int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
-- int proto)
--{
-- struct sock *sk;
-- struct udphdr *uh;
-- unsigned short ulen;
-- struct rtable *rt = (struct rtable*)skb->dst;
-- __be32 saddr = ip_hdr(skb)->saddr;
-- __be32 daddr = ip_hdr(skb)->daddr;
-- struct net *net = dev_net(skb->dev);
--
-- /*
-- * Validate the packet.
-- */
-- if (!pskb_may_pull(skb, sizeof(struct udphdr)))
-- goto drop; /* No space for header. */
--
-- uh = udp_hdr(skb);
-- ulen = ntohs(uh->len);
-- if (ulen > skb->len)
-- goto short_packet;
--
-- if (proto == IPPROTO_UDP) {
-- /* UDP validates ulen. */
-- if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen))
-- goto short_packet;
-- uh = udp_hdr(skb);
-- }
--
-- if (udp4_csum_init(skb, uh, proto))
-- goto csum_error;
--
-- if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
-- return __udp4_lib_mcast_deliver(net, skb, uh,
-- saddr, daddr, udptable);
--
-- sk = __udp4_lib_lookup(net, saddr, uh->source, daddr,
-- uh->dest, inet_iif(skb), udptable);
--
-- if (sk != NULL) {
-- int ret = udp_queue_rcv_skb(sk, skb);
-- sock_put(sk);
--
-- /* a return value > 0 means to resubmit the input, but
-- * it wants the return to be -protocol, or 0
-- */
-- if (ret > 0)
-- return -ret;
-- return 0;
-- }
--
-- if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
-- goto drop;
-- nf_reset(skb);
--
-- /* No socket. Drop packet silently, if checksum is wrong */
-- if (udp_lib_checksum_complete(skb))
-- goto csum_error;
--
-- UDP_INC_STATS_BH(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
-- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
--
-- /*
-- * Hmm. We got an UDP packet to a port to which we
-- * don't wanna listen. Ignore it.
-- */
-- kfree_skb(skb);
-- return 0;
--
--short_packet:
-- LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From " NIPQUAD_FMT ":%u %d/%d to " NIPQUAD_FMT ":%u\n",
-- proto == IPPROTO_UDPLITE ? "-Lite" : "",
-- NIPQUAD(saddr),
-- ntohs(uh->source),
-- ulen,
-- skb->len,
-- NIPQUAD(daddr),
-- ntohs(uh->dest));
-- goto drop;
--
--csum_error:
-- /*
-- * RFC1122: OK. Discards the bad packet silently (as far as
-- * the network is concerned, anyway) as per 4.1.3.4 (MUST).
-- */
-- LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From " NIPQUAD_FMT ":%u to " NIPQUAD_FMT ":%u ulen %d\n",
-- proto == IPPROTO_UDPLITE ? "-Lite" : "",
-- NIPQUAD(saddr),
-- ntohs(uh->source),
-- NIPQUAD(daddr),
-- ntohs(uh->dest),
-- ulen);
--drop:
-- UDP_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
-- kfree_skb(skb);
-- return 0;
--}
--
--int udp_rcv(struct sk_buff *skb)
--{
-- return __udp4_lib_rcv(skb, udp_hash, IPPROTO_UDP);
--}
--
--void udp_destroy_sock(struct sock *sk)
--{
-- lock_sock(sk);
-- udp_flush_pending_frames(sk);
-- release_sock(sk);
--}
--
--/*
-- * Socket option code for UDP
-- */
--int udp_lib_setsockopt(struct sock *sk, int level, int optname,
-- char __user *optval, int optlen,
-- int (*push_pending_frames)(struct sock *))
--{
-- struct udp_sock *up = udp_sk(sk);
-- int val;
-- int err = 0;
-- int is_udplite = IS_UDPLITE(sk);
--
-- if (optlen<sizeof(int))
-- return -EINVAL;
--
-- if (get_user(val, (int __user *)optval))
-- return -EFAULT;
--
-- switch (optname) {
-- case UDP_CORK:
-- if (val != 0) {
-- up->corkflag = 1;
-- } else {
-- up->corkflag = 0;
-- lock_sock(sk);
-- (*push_pending_frames)(sk);
-- release_sock(sk);
-- }
-- break;
--
-- case UDP_ENCAP:
-- switch (val) {
-- case 0:
-- case UDP_ENCAP_ESPINUDP:
-- case UDP_ENCAP_ESPINUDP_NON_IKE:
-- up->encap_rcv = xfrm4_udp_encap_rcv;
-- /* FALLTHROUGH */
-- case UDP_ENCAP_L2TPINUDP:
-- up->encap_type = val;
-- break;
-- default:
-- err = -ENOPROTOOPT;
-- break;
-- }
-- break;
--
-- /*
-- * UDP-Lite's partial checksum coverage (RFC 3828).
-- */
-- /* The sender sets actual checksum coverage length via this option.
-- * The case coverage > packet length is handled by send module. */
-- case UDPLITE_SEND_CSCOV:
-- if (!is_udplite) /* Disable the option on UDP sockets */
-- return -ENOPROTOOPT;
-- if (val != 0 && val < 8) /* Illegal coverage: use default (8) */
-- val = 8;
-- else if (val > USHORT_MAX)
-- val = USHORT_MAX;
-- up->pcslen = val;
-- up->pcflag |= UDPLITE_SEND_CC;
-- break;
--
-- /* The receiver specifies a minimum checksum coverage value. To make
-- * sense, this should be set to at least 8 (as done below). If zero is
-- * used, this again means full checksum coverage. */
-- case UDPLITE_RECV_CSCOV:
-- if (!is_udplite) /* Disable the option on UDP sockets */
-- return -ENOPROTOOPT;
-- if (val != 0 && val < 8) /* Avoid silly minimal values. */
-- val = 8;
-- else if (val > USHORT_MAX)
-- val = USHORT_MAX;
-- up->pcrlen = val;
-- up->pcflag |= UDPLITE_RECV_CC;
-- break;
--
-- default:
-- err = -ENOPROTOOPT;
-- break;
-- }
--
-- return err;
--}
--
--int udp_setsockopt(struct sock *sk, int level, int optname,
-- char __user *optval, int optlen)
--{
-- if (level == SOL_UDP || level == SOL_UDPLITE)
-- return udp_lib_setsockopt(sk, level, optname, optval, optlen,
-- udp_push_pending_frames);
-- return ip_setsockopt(sk, level, optname, optval, optlen);
--}
--
--#ifdef CONFIG_COMPAT
--int compat_udp_setsockopt(struct sock *sk, int level, int optname,
-- char __user *optval, int optlen)
--{
-- if (level == SOL_UDP || level == SOL_UDPLITE)
-- return udp_lib_setsockopt(sk, level, optname, optval, optlen,
-- udp_push_pending_frames);
-- return compat_ip_setsockopt(sk, level, optname, optval, optlen);
--}
--#endif
--
--int udp_lib_getsockopt(struct sock *sk, int level, int optname,
-- char __user *optval, int __user *optlen)
--{
-- struct udp_sock *up = udp_sk(sk);
-- int val, len;
--
-- if (get_user(len,optlen))
-- return -EFAULT;
--
-- len = min_t(unsigned int, len, sizeof(int));
--
-- if (len < 0)
-- return -EINVAL;
--
-- switch (optname) {
-- case UDP_CORK:
-- val = up->corkflag;
-- break;
--
-- case UDP_ENCAP:
-- val = up->encap_type;
-- break;
--
-- /* The following two cannot be changed on UDP sockets, the return is
-- * always 0 (which corresponds to the full checksum coverage of UDP). */
-- case UDPLITE_SEND_CSCOV:
-- val = up->pcslen;
-- break;
--
-- case UDPLITE_RECV_CSCOV:
-- val = up->pcrlen;
-- break;
--
-- default:
-- return -ENOPROTOOPT;
-- }
--
-- if (put_user(len, optlen))
-- return -EFAULT;
-- if (copy_to_user(optval, &val,len))
-- return -EFAULT;
-- return 0;
--}
--
--int udp_getsockopt(struct sock *sk, int level, int optname,
-- char __user *optval, int __user *optlen)
--{
-- if (level == SOL_UDP || level == SOL_UDPLITE)
-- return udp_lib_getsockopt(sk, level, optname, optval, optlen);
-- return ip_getsockopt(sk, level, optname, optval, optlen);
--}
--
--#ifdef CONFIG_COMPAT
--int compat_udp_getsockopt(struct sock *sk, int level, int optname,
-- char __user *optval, int __user *optlen)
--{
-- if (level == SOL_UDP || level == SOL_UDPLITE)
-- return udp_lib_getsockopt(sk, level, optname, optval, optlen);
-- return compat_ip_getsockopt(sk, level, optname, optval, optlen);
--}
--#endif
--/**
-- * udp_poll - wait for a UDP event.
-- * @file - file struct
-- * @sock - socket
-- * @wait - poll table
-- *
-- * This is same as datagram poll, except for the special case of
-- * blocking sockets. If application is using a blocking fd
-- * and a packet with checksum error is in the queue;
-- * then it could get return from select indicating data available
-- * but then block when reading it. Add special case code
-- * to work around these arguably broken applications.
-- */
--unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
--{
-- unsigned int mask = datagram_poll(file, sock, wait);
-- struct sock *sk = sock->sk;
-- int is_lite = IS_UDPLITE(sk);
--
-- /* Check for false positives due to checksum errors */
-- if ( (mask & POLLRDNORM) &&
-- !(file->f_flags & O_NONBLOCK) &&
-- !(sk->sk_shutdown & RCV_SHUTDOWN)){
-- struct sk_buff_head *rcvq = &sk->sk_receive_queue;
-- struct sk_buff *skb;
--
-- spin_lock_bh(&rcvq->lock);
-- while ((skb = skb_peek(rcvq)) != NULL &&
-- udp_lib_checksum_complete(skb)) {
-- UDP_INC_STATS_BH(sock_net(sk),
-- UDP_MIB_INERRORS, is_lite);
-- __skb_unlink(skb, rcvq);
-- kfree_skb(skb);
-- }
-- spin_unlock_bh(&rcvq->lock);
--
-- /* nothing to see, move along */
-- if (skb == NULL)
-- mask &= ~(POLLIN | POLLRDNORM);
-- }
--
-- return mask;
--
--}
--
--struct proto udp_prot = {
-- .name = "UDP",
-- .owner = THIS_MODULE,
-- .close = udp_lib_close,
-- .connect = ip4_datagram_connect,
-- .disconnect = udp_disconnect,
-- .ioctl = udp_ioctl,
-- .destroy = udp_destroy_sock,
-- .setsockopt = udp_setsockopt,
-- .getsockopt = udp_getsockopt,
-- .sendmsg = udp_sendmsg,
-- .recvmsg = udp_recvmsg,
-- .sendpage = udp_sendpage,
-- .backlog_rcv = __udp_queue_rcv_skb,
-- .hash = udp_lib_hash,
-- .unhash = udp_lib_unhash,
-- .get_port = udp_v4_get_port,
-- .memory_allocated = &udp_memory_allocated,
-- .sysctl_mem = sysctl_udp_mem,
-- .sysctl_wmem = &sysctl_udp_wmem_min,
-- .sysctl_rmem = &sysctl_udp_rmem_min,
-- .obj_size = sizeof(struct udp_sock),
-- .h.udp_hash = udp_hash,
--#ifdef CONFIG_COMPAT
-- .compat_setsockopt = compat_udp_setsockopt,
-- .compat_getsockopt = compat_udp_getsockopt,
--#endif
--};
--
--/* ------------------------------------------------------------------------ */
--#ifdef CONFIG_PROC_FS
--
--static struct sock *udp_get_first(struct seq_file *seq)
--{
-- struct sock *sk;
-- struct udp_iter_state *state = seq->private;
-- struct net *net = seq_file_net(seq);
--
-- for (state->bucket = 0; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) {
-- struct hlist_node *node;
-- sk_for_each(sk, node, state->hashtable + state->bucket) {
-- if (!net_eq(sock_net(sk), net))
-- continue;
-- if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
-- continue;
-- if (sk->sk_family == state->family)
-- goto found;
-- }
-- }
-- sk = NULL;
--found:
-- return sk;
--}
--
--static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk)
--{
-- struct udp_iter_state *state = seq->private;
-- struct net *net = seq_file_net(seq);
--
-- do {
-- sk = sk_next(sk);
--try_again:
-- ;
-- } while (sk && (!net_eq(sock_net(sk), net) ||
-- sk->sk_family != state->family ||
-- !nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT)));
--
-- if (!sk && ++state->bucket < UDP_HTABLE_SIZE) {
-- sk = sk_head(state->hashtable + state->bucket);
-- goto try_again;
-- }
-- return sk;
--}
--
--static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos)
--{
-- struct sock *sk = udp_get_first(seq);
--
-- if (sk)
-- while (pos && (sk = udp_get_next(seq, sk)) != NULL)
-- --pos;
-- return pos ? NULL : sk;
--}
--
--static void *udp_seq_start(struct seq_file *seq, loff_t *pos)
-- __acquires(udp_hash_lock)
--{
-- read_lock(&udp_hash_lock);
-- return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN;
--}
--
--static void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
--{
-- struct sock *sk;
--
-- if (v == SEQ_START_TOKEN)
-- sk = udp_get_idx(seq, 0);
-- else
-- sk = udp_get_next(seq, v);
--
-- ++*pos;
-- return sk;
--}
--
--static void udp_seq_stop(struct seq_file *seq, void *v)
-- __releases(udp_hash_lock)
--{
-- read_unlock(&udp_hash_lock);
--}
--
--static int udp_seq_open(struct inode *inode, struct file *file)
--{
-- struct udp_seq_afinfo *afinfo = PDE(inode)->data;
-- struct udp_iter_state *s;
-- int err;
--
-- err = seq_open_net(inode, file, &afinfo->seq_ops,
-- sizeof(struct udp_iter_state));
-- if (err < 0)
-- return err;
--
-- s = ((struct seq_file *)file->private_data)->private;
-- s->family = afinfo->family;
-- s->hashtable = afinfo->hashtable;
-- return err;
--}
--
--/* ------------------------------------------------------------------------ */
--int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo)
--{
-- struct proc_dir_entry *p;
-- int rc = 0;
--
-- afinfo->seq_fops.open = udp_seq_open;
-- afinfo->seq_fops.read = seq_read;
-- afinfo->seq_fops.llseek = seq_lseek;
-- afinfo->seq_fops.release = seq_release_net;
--
-- afinfo->seq_ops.start = udp_seq_start;
-- afinfo->seq_ops.next = udp_seq_next;
-- afinfo->seq_ops.stop = udp_seq_stop;
--
-- p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
-- &afinfo->seq_fops, afinfo);
-- if (!p)
-- rc = -ENOMEM;
-- return rc;
--}
--
--void udp_proc_unregister(struct net *net, struct udp_seq_afinfo *afinfo)
--{
-- proc_net_remove(net, afinfo->name);
--}
--
--/* ------------------------------------------------------------------------ */
--static void udp4_format_sock(struct sock *sp, struct seq_file *f,
-- int bucket, int *len)
--{
-- struct inet_sock *inet = inet_sk(sp);
-- __be32 dest = inet->daddr;
-- __be32 src = inet->rcv_saddr;
-- __u16 destp = ntohs(inet->dport);
-- __u16 srcp = ntohs(inet->sport);
--
-- seq_printf(f, "%4d: %08X:%04X %08X:%04X"
-- " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d%n",
-- bucket,
-- nx_map_sock_lback(current_nx_info(), src), srcp,
-- nx_map_sock_lback(current_nx_info(), dest), destp,
-- sp->sk_state,
-- atomic_read(&sp->sk_wmem_alloc),
-- atomic_read(&sp->sk_rmem_alloc),
-- 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp),
-- atomic_read(&sp->sk_refcnt), sp,
-- atomic_read(&sp->sk_drops), len);
--}
--
--int udp4_seq_show(struct seq_file *seq, void *v)
--{
-- if (v == SEQ_START_TOKEN)
-- seq_printf(seq, "%-127s\n",
-- " sl local_address rem_address st tx_queue "
-- "rx_queue tr tm->when retrnsmt uid timeout "
-- "inode ref pointer drops");
-- else {
-- struct udp_iter_state *state = seq->private;
-- int len;
--
-- udp4_format_sock(v, seq, state->bucket, &len);
-- seq_printf(seq, "%*s\n", 127 - len ,"");
-- }
-- return 0;
--}
--
--/* ------------------------------------------------------------------------ */
--static struct udp_seq_afinfo udp4_seq_afinfo = {
-- .name = "udp",
-- .family = AF_INET,
-- .hashtable = udp_hash,
-- .seq_fops = {
-- .owner = THIS_MODULE,
-- },
-- .seq_ops = {
-- .show = udp4_seq_show,
-- },
--};
--
--static int udp4_proc_init_net(struct net *net)
--{
-- return udp_proc_register(net, &udp4_seq_afinfo);
--}
--
--static void udp4_proc_exit_net(struct net *net)
--{
-- udp_proc_unregister(net, &udp4_seq_afinfo);
--}
--
--static struct pernet_operations udp4_net_ops = {
-- .init = udp4_proc_init_net,
-- .exit = udp4_proc_exit_net,
--};
--
--int __init udp4_proc_init(void)
--{
-- return register_pernet_subsys(&udp4_net_ops);
--}
--
--void udp4_proc_exit(void)
--{
-- unregister_pernet_subsys(&udp4_net_ops);
--}
--#endif /* CONFIG_PROC_FS */
--
--void __init udp_init(void)
--{
-- unsigned long limit;
--
-- /* Set the pressure threshold up by the same strategy of TCP. It is a
-- * fraction of global memory that is up to 1/2 at 256 MB, decreasing
-- * toward zero with the amount of memory, with a floor of 128 pages.
-- */
-- limit = min(nr_all_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT);
-- limit = (limit * (nr_all_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11);
-- limit = max(limit, 128UL);
-- sysctl_udp_mem[0] = limit / 4 * 3;
-- sysctl_udp_mem[1] = limit;
-- sysctl_udp_mem[2] = sysctl_udp_mem[0] * 2;
--
-- sysctl_udp_rmem_min = SK_MEM_QUANTUM;
-- sysctl_udp_wmem_min = SK_MEM_QUANTUM;
--}
--
--EXPORT_SYMBOL(udp_disconnect);
--EXPORT_SYMBOL(udp_hash);
--EXPORT_SYMBOL(udp_hash_lock);
--EXPORT_SYMBOL(udp_ioctl);
--EXPORT_SYMBOL(udp_prot);
--EXPORT_SYMBOL(udp_sendmsg);
--EXPORT_SYMBOL(udp_lib_getsockopt);
--EXPORT_SYMBOL(udp_lib_setsockopt);
--EXPORT_SYMBOL(udp_poll);
--EXPORT_SYMBOL(udp_lib_get_port);
--
--#ifdef CONFIG_PROC_FS
--EXPORT_SYMBOL(udp_proc_register);
--EXPORT_SYMBOL(udp_proc_unregister);
--#endif
-diff -Nurb linux-2.6.27-524/net/packet/af_packet.c linux-2.6.27-525/net/packet/af_packet.c
---- linux-2.6.27-524/net/packet/af_packet.c 2009-12-04 16:03:47.000000000 -0500
-+++ linux-2.6.27-525/net/packet/af_packet.c 2009-12-04 16:09:31.000000000 -0500
+diff -NurpP --exclude '*.orig' --exclude '*.rej' linux-2.6.27.10-vs2.3.x-PS-522-523-524/net/packet/af_packet.c linux-2.6.27.10-vs2.3.x-PS-522-523-524-525/net/packet/af_packet.c
+--- linux-2.6.27.10-vs2.3.x-PS-522-523-524/net/packet/af_packet.c 2008-10-13 14:52:09.000000000 +0200
++++ linux-2.6.27.10-vs2.3.x-PS-522-523-524-525/net/packet/af_packet.c 2009-01-21 03:38:41.000000000 +0100
@@ -77,6 +77,7 @@
#include <linux/poll.h>
#include <linux/module.h>
#include <linux/init.h>
+#include <linux/vs_network.h>
- #include <linux/mutex.h>
#ifdef CONFIG_INET
-@@ -278,10 +279,53 @@
+ #include <net/inet_common.h>
+@@ -276,10 +277,53 @@ static const struct proto_ops packet_ops
static const struct proto_ops packet_ops_spkt;
/*
* When we registered the protocol we saved the socket in the data
-@@ -301,6 +345,16 @@
+@@ -299,6 +343,16 @@ static int packet_rcv_spkt(struct sk_buf
* so that this procedure is noop.
*/
if (skb->pkt_type == PACKET_LOOPBACK)
goto out;
-@@ -359,6 +413,9 @@
+@@ -357,6 +411,9 @@ static int packet_sendmsg_spkt(struct ki
__be16 proto=0;
int err;
/*
* Get and verify the address.
*/
-@@ -451,11 +508,16 @@
+@@ -449,11 +506,16 @@ out_unlock:
return err;
}
rcu_read_lock_bh();
filter = rcu_dereference(sk->sk_filter);
if (filter != NULL)
-@@ -775,6 +837,9 @@
+@@ -773,6 +835,9 @@ static int packet_sendmsg(struct kiocb *
unsigned char *addr;
int ifindex, err, reserve = 0;
/*
* Get and verify the address.
*/
-@@ -941,6 +1006,7 @@
+@@ -939,6 +1004,7 @@ static int packet_do_bind(struct sock *s
po->num = protocol;
po->prot_hook.type = protocol;
po->prot_hook.dev = dev;
po->ifindex = dev ? dev->ifindex : 0;
-@@ -1039,8 +1105,9 @@
+@@ -1037,8 +1103,9 @@ static int packet_create(struct net *net
__be16 proto = (__force __be16)protocol; /* weird, but documented */
int err;
if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
sock->type != SOCK_PACKET)
return -ESOCKTNOSUPPORT;
-@@ -1072,6 +1139,7 @@
+@@ -1069,6 +1136,7 @@ static int packet_create(struct net *net
+
spin_lock_init(&po->bind_lock);
- mutex_init(&po->pg_vec_lock);
po->prot_hook.func = packet_rcv;
-+ po->prot_hook.sknid_elevator = 1;
++ po->prot_hook.sknid_elevator = 1;
if (sock->type == SOCK_PACKET)
po->prot_hook.func = packet_rcv_spkt;