* The functions in this file will not compile correctly with gcc 2.4.x
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/cache.h>
#include <linux/rtnetlink.h>
#include <linux/init.h>
-#include <linux/highmem.h>
#include <net/protocol.h>
#include <net/dst.h>
#include <asm/uaccess.h>
#include <asm/system.h>
-static kmem_cache_t *skbuff_head_cache;
+#include "kmap_skb.h"
+
+static struct kmem_cache *skbuff_head_cache __read_mostly;
+static struct kmem_cache *skbuff_fclone_cache __read_mostly;
/*
* Keep out-of-line to prevent kernel bloat.
*/
void skb_over_panic(struct sk_buff *skb, int sz, void *here)
{
- printk(KERN_INFO "skput:over: %p:%d put:%d dev:%s",
- here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
+ printk(KERN_EMERG "skb_over_panic: text:%p len:%d put:%d head:%p "
+ "data:%p tail:%p end:%p dev:%s\n",
+ here, skb->len, sz, skb->head, skb->data, skb->tail, skb->end,
+ skb->dev ? skb->dev->name : "<NULL>");
BUG();
}
void skb_under_panic(struct sk_buff *skb, int sz, void *here)
{
- printk(KERN_INFO "skput:under: %p:%d put:%d dev:%s",
- here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
+ printk(KERN_EMERG "skb_under_panic: text:%p len:%d put:%d head:%p "
+ "data:%p tail:%p end:%p dev:%s\n",
+ here, skb->len, sz, skb->head, skb->data, skb->tail, skb->end,
+ skb->dev ? skb->dev->name : "<NULL>");
BUG();
}
+void skb_truesize_bug(struct sk_buff *skb)
+{
+ printk(KERN_ERR "SKB BUG: Invalid truesize (%u) "
+ "len=%u, sizeof(sk_buff)=%Zd\n",
+ skb->truesize, skb->len, sizeof(struct sk_buff));
+}
+EXPORT_SYMBOL(skb_truesize_bug);
+
/* Allocate a new skbuff. We do this ourselves so we can fill in a few
* 'private' fields and also do memory statistics to find all the
* [BEEP] leaks.
*/
/**
- * alloc_skb - allocate a network buffer
+ * __alloc_skb - allocate a network buffer
* @size: size to allocate
* @gfp_mask: allocation mask
+ * @fclone: allocate from fclone cache instead of head cache
+ * and allocate a cloned (child) skb
+ * @node: numa node to allocate memory on
*
* Allocate a new &sk_buff. The returned buffer has no headroom and a
* tail room of size bytes. The object has a reference count of one.
* Buffers may only be allocated from interrupts using a @gfp_mask of
* %GFP_ATOMIC.
*/
-struct sk_buff *alloc_skb(unsigned int size, int gfp_mask)
+#ifndef CONFIG_HAVE_ARCH_ALLOC_SKB
+struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
+ int fclone, int node)
{
+ struct kmem_cache *cache;
+ struct skb_shared_info *shinfo;
struct sk_buff *skb;
u8 *data;
+ cache = fclone ? skbuff_fclone_cache : skbuff_head_cache;
+
/* Get the HEAD */
- skb = kmem_cache_alloc(skbuff_head_cache,
- gfp_mask & ~__GFP_DMA);
+ skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node);
if (!skb)
goto out;
/* Get the DATA. Size must match skb_add_mtu(). */
size = SKB_DATA_ALIGN(size);
- data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
+ data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info),
+ gfp_mask, node);
+ if (!data)
+ goto nodata;
+
+ memset(skb, 0, offsetof(struct sk_buff, truesize));
+ skb->truesize = size + sizeof(struct sk_buff);
+ atomic_set(&skb->users, 1);
+ skb->head = data;
+ skb->data = data;
+ skb->tail = data;
+ skb->end = data + size;
+ /* make sure we initialize shinfo sequentially */
+ shinfo = skb_shinfo(skb);
+ atomic_set(&shinfo->dataref, 1);
+ shinfo->nr_frags = 0;
+ shinfo->gso_size = 0;
+ shinfo->gso_segs = 0;
+ shinfo->gso_type = 0;
+ shinfo->ip6_frag_id = 0;
+ shinfo->frag_list = NULL;
+
+ if (fclone) {
+ struct sk_buff *child = skb + 1;
+ atomic_t *fclone_ref = (atomic_t *) (child + 1);
+
+ skb->fclone = SKB_FCLONE_ORIG;
+ atomic_set(fclone_ref, 1);
+
+ child->fclone = SKB_FCLONE_UNAVAILABLE;
+ }
+out:
+ return skb;
+nodata:
+ kmem_cache_free(cache, skb);
+ skb = NULL;
+ goto out;
+}
+#endif /* !CONFIG_HAVE_ARCH_ALLOC_SKB */
+
+/**
+ * alloc_skb_from_cache - allocate a network buffer
+ * @cp: kmem_cache from which to allocate the data area
+ * (object size must be big enough for @size bytes + skb overheads)
+ * @size: size to allocate
+ * @gfp_mask: allocation mask
+ *
+ * Allocate a new &sk_buff. The returned buffer has no headroom and
+ * tail room of size bytes. The object has a reference count of one.
+ * The return is the buffer. On a failure the return is %NULL.
+ *
+ * Buffers may only be allocated from interrupts using a @gfp_mask of
+ * %GFP_ATOMIC.
+ */
+struct sk_buff *alloc_skb_from_cache(struct kmem_cache *cp,
+ unsigned int size,
+ gfp_t gfp_mask,
+ int fclone)
+{
+ struct kmem_cache *cache;
+ struct sk_buff *skb;
+ u8 *data;
+
+ cache = fclone ? skbuff_fclone_cache : skbuff_head_cache;
+
+ /* Get the HEAD */
+ skb = kmem_cache_alloc(cache, gfp_mask & ~__GFP_DMA);
+ if (!skb)
+ goto out;
+
+ /* Get the DATA. */
+ size = SKB_DATA_ALIGN(size);
+ data = kmem_cache_alloc(cp, gfp_mask);
if (!data)
goto nodata;
atomic_set(&(skb_shinfo(skb)->dataref), 1);
skb_shinfo(skb)->nr_frags = 0;
- skb_shinfo(skb)->tso_size = 0;
- skb_shinfo(skb)->tso_segs = 0;
+ skb_shinfo(skb)->gso_size = 0;
+ skb_shinfo(skb)->gso_segs = 0;
+ skb_shinfo(skb)->gso_type = 0;
skb_shinfo(skb)->frag_list = NULL;
+
+ if (fclone) {
+ struct sk_buff *child = skb + 1;
+ atomic_t *fclone_ref = (atomic_t *) (child + 1);
+
+ skb->fclone = SKB_FCLONE_ORIG;
+ atomic_set(fclone_ref, 1);
+
+ child->fclone = SKB_FCLONE_UNAVAILABLE;
+ }
out:
return skb;
nodata:
- kmem_cache_free(skbuff_head_cache, skb);
+ kmem_cache_free(cache, skb);
skb = NULL;
goto out;
}
+/**
+ * __netdev_alloc_skb - allocate an skbuff for rx on a specific device
+ * @dev: network device to receive on
+ * @length: length to allocate
+ * @gfp_mask: get_free_pages mask, passed to alloc_skb
+ *
+ * Allocate a new &sk_buff and assign it a usage count of one. The
+ * buffer has unspecified headroom built in. Users should allocate
+ * the headroom they think they need without accounting for the
+ * built in space. The built in space is used for optimisations.
+ *
+ * %NULL is returned if there is no free memory.
+ */
+struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
+ unsigned int length, gfp_t gfp_mask)
+{
+ int node = dev->class_dev.dev ? dev_to_node(dev->class_dev.dev) : -1;
+ struct sk_buff *skb;
+
+ skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, node);
+ if (likely(skb)) {
+ skb_reserve(skb, NET_SKB_PAD);
+ skb->dev = dev;
+ }
+ return skb;
+}
-static void skb_drop_fraglist(struct sk_buff *skb)
+static void skb_drop_list(struct sk_buff **listp)
{
- struct sk_buff *list = skb_shinfo(skb)->frag_list;
+ struct sk_buff *list = *listp;
- skb_shinfo(skb)->frag_list = NULL;
+ *listp = NULL;
do {
struct sk_buff *this = list;
} while (list);
}
+static inline void skb_drop_fraglist(struct sk_buff *skb)
+{
+ skb_drop_list(&skb_shinfo(skb)->frag_list);
+}
+
static void skb_clone_fraglist(struct sk_buff *skb)
{
struct sk_buff *list;
skb_get(list);
}
-void skb_release_data(struct sk_buff *skb)
+static void skb_release_data(struct sk_buff *skb)
{
if (!skb->cloned ||
- atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) {
+ !atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1,
+ &skb_shinfo(skb)->dataref)) {
if (skb_shinfo(skb)->nr_frags) {
int i;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
*/
void kfree_skbmem(struct sk_buff *skb)
{
+ struct sk_buff *other;
+ atomic_t *fclone_ref;
+
skb_release_data(skb);
- kmem_cache_free(skbuff_head_cache, skb);
+ switch (skb->fclone) {
+ case SKB_FCLONE_UNAVAILABLE:
+ kmem_cache_free(skbuff_head_cache, skb);
+ break;
+
+ case SKB_FCLONE_ORIG:
+ fclone_ref = (atomic_t *) (skb + 2);
+ if (atomic_dec_and_test(fclone_ref))
+ kmem_cache_free(skbuff_fclone_cache, skb);
+ break;
+
+ case SKB_FCLONE_CLONE:
+ fclone_ref = (atomic_t *) (skb + 1);
+ other = skb - 1;
+
+ /* The clone portion is available for
+ * fast-cloning again.
+ */
+ skb->fclone = SKB_FCLONE_UNAVAILABLE;
+
+ if (atomic_dec_and_test(fclone_ref))
+ kmem_cache_free(skbuff_fclone_cache, other);
+ break;
+ };
}
/**
void __kfree_skb(struct sk_buff *skb)
{
- if (skb->list) {
- printk(KERN_WARNING "Warning: kfree_skb passed an skb still "
- "on a list (from %p).\n", NET_CALLER(skb));
- BUG();
- }
-
dst_release(skb->dst);
#ifdef CONFIG_XFRM
secpath_put(skb->sp);
#endif
- if(skb->destructor) {
- if (in_irq())
- printk(KERN_WARNING "Warning: kfree_skb on "
- "hard IRQ %p\n", NET_CALLER(skb));
+ if (skb->destructor) {
+ WARN_ON(in_irq());
skb->destructor(skb);
}
#ifdef CONFIG_NETFILTER
nf_conntrack_put(skb->nfct);
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+ nf_conntrack_put_reasm(skb->nfct_reasm);
+#endif
#ifdef CONFIG_BRIDGE_NETFILTER
nf_bridge_put(skb->nf_bridge);
#endif
skb->tc_index = 0;
#ifdef CONFIG_NET_CLS_ACT
skb->tc_verd = 0;
- skb->tc_classid = 0;
#endif
#endif
kfree_skbmem(skb);
}
+/**
+ * kfree_skb - free an sk_buff
+ * @skb: buffer to free
+ *
+ * Drop a reference to the buffer and free it if the usage count has
+ * hit zero.
+ */
+void kfree_skb(struct sk_buff *skb)
+{
+ if (unlikely(!skb))
+ return;
+ if (likely(atomic_read(&skb->users) == 1))
+ smp_rmb();
+ else if (likely(!atomic_dec_and_test(&skb->users)))
+ return;
+ __kfree_skb(skb);
+}
+
/**
* skb_clone - duplicate an sk_buff
* @skb: buffer to clone
* %GFP_ATOMIC.
*/
-struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
+struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
{
- struct sk_buff *n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
-
- if (!n)
- return NULL;
+ struct sk_buff *n;
+
+ n = skb + 1;
+ if (skb->fclone == SKB_FCLONE_ORIG &&
+ n->fclone == SKB_FCLONE_UNAVAILABLE) {
+ atomic_t *fclone_ref = (atomic_t *) (n + 1);
+ n->fclone = SKB_FCLONE_CLONE;
+ atomic_inc(fclone_ref);
+ } else {
+ n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
+ if (!n)
+ return NULL;
+ n->fclone = SKB_FCLONE_UNAVAILABLE;
+ }
#define C(x) n->x = skb->x
n->next = n->prev = NULL;
- n->list = NULL;
n->sk = NULL;
- C(stamp);
+ C(tstamp);
C(dev);
- C(real_dev);
C(h);
C(nh);
C(mac);
memcpy(n->cb, skb->cb, sizeof(skb->cb));
C(len);
C(data_len);
+ C(mac_len);
C(csum);
C(local_df);
n->cloned = 1;
+ n->nohdr = 0;
+#ifdef CONFIG_XEN
+ C(proto_data_valid);
+ C(proto_csum_blank);
+#endif
C(pkt_type);
C(ip_summed);
C(priority);
+#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
+ C(ipvs_property);
+#endif
C(protocol);
- C(security);
n->destructor = NULL;
+ C(mark);
#ifdef CONFIG_NETFILTER
- C(nfmark);
- C(nfcache);
C(nfct);
nf_conntrack_get(skb->nfct);
-#ifdef CONFIG_NETFILTER_DEBUG
- C(nf_debug);
+ C(nfctinfo);
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+ C(nfct_reasm);
+ nf_conntrack_get_reasm(skb->nfct_reasm);
#endif
#ifdef CONFIG_BRIDGE_NETFILTER
C(nf_bridge);
nf_bridge_get(skb->nf_bridge);
#endif
#endif /*CONFIG_NETFILTER*/
-#if defined(CONFIG_HIPPI)
- C(private);
-#endif
#ifdef CONFIG_NET_SCHED
C(tc_index);
#ifdef CONFIG_NET_CLS_ACT
n->tc_verd = SET_TC_VERD(skb->tc_verd,0);
- n->tc_verd = CLR_TC_OK2MUNGE(skb->tc_verd);
- n->tc_verd = CLR_TC_MUNGED(skb->tc_verd);
- C(input_dev);
- C(tc_classid);
+ n->tc_verd = CLR_TC_OK2MUNGE(n->tc_verd);
+ n->tc_verd = CLR_TC_MUNGED(n->tc_verd);
+ C(iif);
#endif
-
+ skb_copy_secmark(n, skb);
#endif
- C(xid);
C(truesize);
atomic_set(&n->users, 1);
C(head);
*/
unsigned long offset = new->data - old->data;
- new->list = NULL;
new->sk = NULL;
new->dev = old->dev;
- new->real_dev = old->real_dev;
new->priority = old->priority;
new->protocol = old->protocol;
new->dst = dst_clone(old->dst);
new->mac.raw = old->mac.raw + offset;
memcpy(new->cb, old->cb, sizeof(old->cb));
new->local_df = old->local_df;
+ new->fclone = SKB_FCLONE_UNAVAILABLE;
new->pkt_type = old->pkt_type;
- new->stamp = old->stamp;
+ new->tstamp = old->tstamp;
new->destructor = NULL;
- new->security = old->security;
+ new->mark = old->mark;
#ifdef CONFIG_NETFILTER
- new->nfmark = old->nfmark;
- new->nfcache = old->nfcache;
new->nfct = old->nfct;
nf_conntrack_get(old->nfct);
-#ifdef CONFIG_NETFILTER_DEBUG
- new->nf_debug = old->nf_debug;
+ new->nfctinfo = old->nfctinfo;
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+ new->nfct_reasm = old->nfct_reasm;
+ nf_conntrack_get_reasm(old->nfct_reasm);
+#endif
+#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
+ new->ipvs_property = old->ipvs_property;
#endif
#ifdef CONFIG_BRIDGE_NETFILTER
new->nf_bridge = old->nf_bridge;
#endif
new->tc_index = old->tc_index;
#endif
- new->xid = old->xid;
+ skb_copy_secmark(new, old);
atomic_set(&new->users, 1);
+ skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size;
+ skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs;
+ skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type;
}
/**
* header is going to be modified. Use pskb_copy() instead.
*/
-struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask)
+struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
{
int headerlen = skb->data - skb->head;
/*
* The returned buffer has a reference count of 1.
*/
-struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask)
+struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
{
/*
* Allocate the copy buffer
n->csum = skb->csum;
n->ip_summed = skb->ip_summed;
+ n->truesize += skb->data_len;
n->data_len = skb->data_len;
n->len = skb->len;
}
skb_shinfo(n)->nr_frags = i;
}
- skb_shinfo(n)->tso_size = skb_shinfo(skb)->tso_size;
- skb_shinfo(n)->tso_segs = skb_shinfo(skb)->tso_segs;
if (skb_shinfo(skb)->frag_list) {
skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
* reloaded after call to this function.
*/
-int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask)
+int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
+ gfp_t gfp_mask)
{
int i;
u8 *data;
skb->h.raw += off;
skb->nh.raw += off;
skb->cloned = 0;
+ skb->nohdr = 0;
atomic_set(&skb_shinfo(skb)->dataref, 1);
return 0;
* only by netfilter in the cases when checksum is recalculated? --ANK
*/
struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
- int newheadroom, int newtailroom, int gfp_mask)
+ int newheadroom, int newtailroom,
+ gfp_t gfp_mask)
{
/*
* Allocate the copy buffer
BUG();
copy_skb_header(n, skb);
- skb_shinfo(n)->tso_size = skb_shinfo(skb)->tso_size;
- skb_shinfo(n)->tso_segs = skb_shinfo(skb)->tso_segs;
return n;
}
* filled. Used by network drivers which may DMA or transfer data
* beyond the buffer end onto the wire.
*
- * May return NULL in out of memory cases.
+ * May return error in out of memory cases. The skb is freed on error.
*/
-struct sk_buff *skb_pad(struct sk_buff *skb, int pad)
+int skb_pad(struct sk_buff *skb, int pad)
{
- struct sk_buff *nskb;
+ int err;
+ int ntail;
/* If the skbuff is non linear tailroom is always zero.. */
- if (skb_tailroom(skb) >= pad) {
+ if (!skb_cloned(skb) && skb_tailroom(skb) >= pad) {
memset(skb->data+skb->len, 0, pad);
- return skb;
+ return 0;
}
-
- nskb = skb_copy_expand(skb, skb_headroom(skb), skb_tailroom(skb) + pad, GFP_ATOMIC);
+
+ ntail = skb->data_len + pad - (skb->end - skb->tail);
+ if (likely(skb_cloned(skb) || ntail > 0)) {
+ err = pskb_expand_head(skb, 0, ntail, GFP_ATOMIC);
+ if (unlikely(err))
+ goto free_skb;
+ }
+
+ /* FIXME: The use of this function with non-linear skb's really needs
+ * to be audited.
+ */
+ err = skb_linearize(skb);
+ if (unlikely(err))
+ goto free_skb;
+
+ memset(skb->data + skb->len, 0, pad);
+ return 0;
+
+free_skb:
kfree_skb(skb);
- if (nskb)
- memset(nskb->data+nskb->len, 0, pad);
- return nskb;
+ return err;
}
-/* Trims skb to length len. It can change skb pointers, if "realloc" is 1.
- * If realloc==0 and trimming is impossible without change of data,
- * it is BUG().
+/* Trims skb to length len. It can change skb pointers.
*/
-int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc)
+int ___pskb_trim(struct sk_buff *skb, unsigned int len)
{
+ struct sk_buff **fragp;
+ struct sk_buff *frag;
int offset = skb_headlen(skb);
int nfrags = skb_shinfo(skb)->nr_frags;
int i;
+ int err;
- for (i = 0; i < nfrags; i++) {
+ if (skb_cloned(skb) &&
+ unlikely((err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))))
+ return err;
+
+ i = 0;
+ if (offset >= len)
+ goto drop_pages;
+
+ for (; i < nfrags; i++) {
int end = offset + skb_shinfo(skb)->frags[i].size;
- if (end > len) {
- if (skb_cloned(skb)) {
- if (!realloc)
- BUG();
- if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
- return -ENOMEM;
- }
- if (len <= offset) {
- put_page(skb_shinfo(skb)->frags[i].page);
- skb_shinfo(skb)->nr_frags--;
- } else {
- skb_shinfo(skb)->frags[i].size = len - offset;
- }
+
+ if (end < len) {
+ offset = end;
+ continue;
}
- offset = end;
+
+ skb_shinfo(skb)->frags[i++].size = len - offset;
+
+drop_pages:
+ skb_shinfo(skb)->nr_frags = i;
+
+ for (; i < nfrags; i++)
+ put_page(skb_shinfo(skb)->frags[i].page);
+
+ if (skb_shinfo(skb)->frag_list)
+ skb_drop_fraglist(skb);
+ goto done;
}
- if (offset < len) {
+ for (fragp = &skb_shinfo(skb)->frag_list; (frag = *fragp);
+ fragp = &frag->next) {
+ int end = offset + frag->len;
+
+ if (skb_shared(frag)) {
+ struct sk_buff *nfrag;
+
+ nfrag = skb_clone(frag, GFP_ATOMIC);
+ if (unlikely(!nfrag))
+ return -ENOMEM;
+
+ nfrag->next = frag->next;
+ kfree_skb(frag);
+ frag = nfrag;
+ *fragp = frag;
+ }
+
+ if (end < len) {
+ offset = end;
+ continue;
+ }
+
+ if (end > len &&
+ unlikely((err = pskb_trim(frag, len - offset))))
+ return err;
+
+ if (frag->next)
+ skb_drop_list(&frag->next);
+ break;
+ }
+
+done:
+ if (len > skb_headlen(skb)) {
skb->data_len -= skb->len - len;
skb->len = len;
} else {
- if (len <= skb_headlen(skb)) {
- skb->len = len;
- skb->data_len = 0;
- skb->tail = skb->data + len;
- if (skb_shinfo(skb)->frag_list && !skb_cloned(skb))
- skb_drop_fraglist(skb);
- } else {
- skb->data_len -= skb->len - len;
- skb->len = len;
- }
+ skb->len = len;
+ skb->data_len = 0;
+ skb->tail = skb->data + len;
}
return 0;
struct sk_buff *insp = NULL;
do {
- if (!list)
- BUG();
+ BUG_ON(!list);
if (list->len <= eat) {
/* Eaten as whole. */
return -EFAULT;
}
+/**
+ * skb_store_bits - store bits from kernel buffer to skb
+ * @skb: destination buffer
+ * @offset: offset in destination
+ * @from: source buffer
+ * @len: number of bytes to copy
+ *
+ * Copy the specified number of bytes from the source buffer to the
+ * destination skb. This function handles all the messy bits of
+ * traversing fragment lists and such.
+ */
+
+int skb_store_bits(const struct sk_buff *skb, int offset, void *from, int len)
+{
+ int i, copy;
+ int start = skb_headlen(skb);
+
+ if (offset > (int)skb->len - len)
+ goto fault;
+
+ if ((copy = start - offset) > 0) {
+ if (copy > len)
+ copy = len;
+ memcpy(skb->data + offset, from, copy);
+ if ((len -= copy) == 0)
+ return 0;
+ offset += copy;
+ from += copy;
+ }
+
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+ int end;
+
+ BUG_TRAP(start <= offset + len);
+
+ end = start + frag->size;
+ if ((copy = end - offset) > 0) {
+ u8 *vaddr;
+
+ if (copy > len)
+ copy = len;
+
+ vaddr = kmap_skb_frag(frag);
+ memcpy(vaddr + frag->page_offset + offset - start,
+ from, copy);
+ kunmap_skb_frag(vaddr);
+
+ if ((len -= copy) == 0)
+ return 0;
+ offset += copy;
+ from += copy;
+ }
+ start = end;
+ }
+
+ if (skb_shinfo(skb)->frag_list) {
+ struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+ for (; list; list = list->next) {
+ int end;
+
+ BUG_TRAP(start <= offset + len);
+
+ end = start + list->len;
+ if ((copy = end - offset) > 0) {
+ if (copy > len)
+ copy = len;
+ if (skb_store_bits(list, offset - start,
+ from, copy))
+ goto fault;
+ if ((len -= copy) == 0)
+ return 0;
+ offset += copy;
+ from += copy;
+ }
+ start = end;
+ }
+ }
+ if (!len)
+ return 0;
+
+fault:
+ return -EFAULT;
+}
+
+EXPORT_SYMBOL(skb_store_bits);
+
/* Checksum skb data. */
-unsigned int skb_checksum(const struct sk_buff *skb, int offset,
- int len, unsigned int csum)
+__wsum skb_checksum(const struct sk_buff *skb, int offset,
+ int len, __wsum csum)
{
int start = skb_headlen(skb);
int i, copy = start - offset;
end = start + skb_shinfo(skb)->frags[i].size;
if ((copy = end - offset) > 0) {
- unsigned int csum2;
+ __wsum csum2;
u8 *vaddr;
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
end = start + list->len;
if ((copy = end - offset) > 0) {
- unsigned int csum2;
+ __wsum csum2;
if (copy > len)
copy = len;
csum2 = skb_checksum(list, offset - start,
start = end;
}
}
- if (len)
- BUG();
+ BUG_ON(len);
return csum;
}
/* Both of above in one bottle. */
-unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
- u8 *to, int len, unsigned int csum)
+__wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
+ u8 *to, int len, __wsum csum)
{
int start = skb_headlen(skb);
int i, copy = start - offset;
end = start + skb_shinfo(skb)->frags[i].size;
if ((copy = end - offset) > 0) {
- unsigned int csum2;
+ __wsum csum2;
u8 *vaddr;
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
struct sk_buff *list = skb_shinfo(skb)->frag_list;
for (; list; list = list->next) {
- unsigned int csum2;
+ __wsum csum2;
int end;
BUG_TRAP(start <= offset + len);
start = end;
}
}
- if (len)
- BUG();
+ BUG_ON(len);
return csum;
}
void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
{
- unsigned int csum;
+ __wsum csum;
long csstart;
- if (skb->ip_summed == CHECKSUM_HW)
+ if (skb->ip_summed == CHECKSUM_PARTIAL)
csstart = skb->h.raw - skb->data;
else
csstart = skb_headlen(skb);
- if (csstart > skb_headlen(skb))
- BUG();
+ BUG_ON(csstart > skb_headlen(skb));
memcpy(to, skb->data, csstart);
csum = skb_copy_and_csum_bits(skb, csstart, to + csstart,
skb->len - csstart, 0);
- if (skb->ip_summed == CHECKSUM_HW) {
- long csstuff = csstart + skb->csum;
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ long csstuff = csstart + skb->csum_offset;
- *((unsigned short *)(to + csstuff)) = csum_fold(csum);
+ *((__sum16 *)(to + csstuff)) = csum_fold(csum);
}
}
__skb_queue_tail(list, newsk);
spin_unlock_irqrestore(&list->lock, flags);
}
+
/**
* skb_unlink - remove a buffer from a list
* @skb: buffer to remove
+ * @list: list to use
*
- * Place a packet after a given packet in a list. The list locks are taken
- * and this function is atomic with respect to other list locked calls
+ * Remove a packet from a list. The list locks are taken and this
+ * function is atomic with respect to other list locked calls
*
- * Works even without knowing the list it is sitting on, which can be
- * handy at times. It also means that THE LIST MUST EXIST when you
- * unlink. Thus a list must have its contents unlinked before it is
- * destroyed.
+ * You must know what list the SKB is on.
*/
-void skb_unlink(struct sk_buff *skb)
+void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
{
- struct sk_buff_head *list = skb->list;
-
- if (list) {
- unsigned long flags;
+ unsigned long flags;
- spin_lock_irqsave(&list->lock, flags);
- if (skb->list == list)
- __skb_unlink(skb, skb->list);
- spin_unlock_irqrestore(&list->lock, flags);
- }
+ spin_lock_irqsave(&list->lock, flags);
+ __skb_unlink(skb, list);
+ spin_unlock_irqrestore(&list->lock, flags);
}
-
/**
* skb_append - append a buffer
* @old: buffer to insert after
* @newsk: buffer to insert
+ * @list: list to use
*
* Place a packet after a given packet in a list. The list locks are taken
* and this function is atomic with respect to other list locked calls.
* A buffer cannot be placed on two lists at the same time.
*/
-
-void skb_append(struct sk_buff *old, struct sk_buff *newsk)
+void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list)
{
unsigned long flags;
- spin_lock_irqsave(&old->list->lock, flags);
- __skb_append(old, newsk);
- spin_unlock_irqrestore(&old->list->lock, flags);
+ spin_lock_irqsave(&list->lock, flags);
+ __skb_append(old, newsk, list);
+ spin_unlock_irqrestore(&list->lock, flags);
}
* skb_insert - insert a buffer
* @old: buffer to insert before
* @newsk: buffer to insert
+ * @list: list to use
+ *
+ * Place a packet before a given packet in a list. The list locks are
+ * taken and this function is atomic with respect to other list locked
+ * calls.
*
- * Place a packet before a given packet in a list. The list locks are taken
- * and this function is atomic with respect to other list locked calls
* A buffer cannot be placed on two lists at the same time.
*/
-
-void skb_insert(struct sk_buff *old, struct sk_buff *newsk)
+void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list)
{
unsigned long flags;
- spin_lock_irqsave(&old->list->lock, flags);
- __skb_insert(newsk, old->prev, old, old->list);
- spin_unlock_irqrestore(&old->list->lock, flags);
+ spin_lock_irqsave(&list->lock, flags);
+ __skb_insert(newsk, old->prev, old, list);
+ spin_unlock_irqrestore(&list->lock, flags);
}
#if 0
}
#endif
-static void inline skb_split_inside_header(struct sk_buff *skb,
+static inline void skb_split_inside_header(struct sk_buff *skb,
struct sk_buff* skb1,
const u32 len, const int pos)
{
skb->tail = skb->data + len;
}
-static void inline skb_split_no_header(struct sk_buff *skb,
+static inline void skb_split_no_header(struct sk_buff *skb,
struct sk_buff* skb1,
const u32 len, int pos)
{
if (pos < len) {
/* Split frag.
- * We have to variants in this case:
+ * We have two variants in this case:
* 1. Move all the frag to the second
* part, if it is possible. F.e.
* this approach is mandatory for TUX,
/**
* skb_split - Split fragmented skb to two parts at length len.
+ * @skb: the buffer to split
+ * @skb1: the buffer to receive the second part
+ * @len: new length for skb
*/
void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
{
skb_split_no_header(skb, skb1, len, pos);
}
+/**
+ * skb_prepare_seq_read - Prepare a sequential read of skb data
+ * @skb: the buffer to read
+ * @from: lower offset of data to be read
+ * @to: upper offset of data to be read
+ * @st: state variable
+ *
+ * Initializes the specified state variable. Must be called before
+ * invoking skb_seq_read() for the first time.
+ */
+void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from,
+ unsigned int to, struct skb_seq_state *st)
+{
+ st->lower_offset = from;
+ st->upper_offset = to;
+ st->root_skb = st->cur_skb = skb;
+ st->frag_idx = st->stepped_offset = 0;
+ st->frag_data = NULL;
+}
+
+/**
+ * skb_seq_read - Sequentially read skb data
+ * @consumed: number of bytes consumed by the caller so far
+ * @data: destination pointer for data to be returned
+ * @st: state variable
+ *
+ * Reads a block of skb data at &consumed relative to the
+ * lower offset specified to skb_prepare_seq_read(). Assigns
+ * the head of the data block to &data and returns the length
+ * of the block or 0 if the end of the skb data or the upper
+ * offset has been reached.
+ *
+ * The caller is not required to consume all of the data
+ * returned, i.e. &consumed is typically set to the number
+ * of bytes already consumed and the next call to
+ * skb_seq_read() will return the remaining part of the block.
+ *
+ * Note: The size of each block of data returned can be arbitary,
+ * this limitation is the cost for zerocopy seqeuental
+ * reads of potentially non linear data.
+ *
+ * Note: Fragment lists within fragments are not implemented
+ * at the moment, state->root_skb could be replaced with
+ * a stack for this purpose.
+ */
+unsigned int skb_seq_read(unsigned int consumed, const u8 **data,
+ struct skb_seq_state *st)
+{
+ unsigned int block_limit, abs_offset = consumed + st->lower_offset;
+ skb_frag_t *frag;
+
+ if (unlikely(abs_offset >= st->upper_offset))
+ return 0;
+
+next_skb:
+ block_limit = skb_headlen(st->cur_skb);
+
+ if (abs_offset < block_limit) {
+ *data = st->cur_skb->data + abs_offset;
+ return block_limit - abs_offset;
+ }
+
+ if (st->frag_idx == 0 && !st->frag_data)
+ st->stepped_offset += skb_headlen(st->cur_skb);
+
+ while (st->frag_idx < skb_shinfo(st->cur_skb)->nr_frags) {
+ frag = &skb_shinfo(st->cur_skb)->frags[st->frag_idx];
+ block_limit = frag->size + st->stepped_offset;
+
+ if (abs_offset < block_limit) {
+ if (!st->frag_data)
+ st->frag_data = kmap_skb_frag(frag);
+
+ *data = (u8 *) st->frag_data + frag->page_offset +
+ (abs_offset - st->stepped_offset);
+
+ return block_limit - abs_offset;
+ }
+
+ if (st->frag_data) {
+ kunmap_skb_frag(st->frag_data);
+ st->frag_data = NULL;
+ }
+
+ st->frag_idx++;
+ st->stepped_offset += frag->size;
+ }
+
+ if (st->cur_skb->next) {
+ st->cur_skb = st->cur_skb->next;
+ st->frag_idx = 0;
+ goto next_skb;
+ } else if (st->root_skb == st->cur_skb &&
+ skb_shinfo(st->root_skb)->frag_list) {
+ st->cur_skb = skb_shinfo(st->root_skb)->frag_list;
+ goto next_skb;
+ }
+
+ return 0;
+}
+
+/**
+ * skb_abort_seq_read - Abort a sequential read of skb data
+ * @st: state variable
+ *
+ * Must be called if skb_seq_read() was not called until it
+ * returned 0.
+ */
+void skb_abort_seq_read(struct skb_seq_state *st)
+{
+ if (st->frag_data)
+ kunmap_skb_frag(st->frag_data);
+}
+
+#define TS_SKB_CB(state) ((struct skb_seq_state *) &((state)->cb))
+
+static unsigned int skb_ts_get_next_block(unsigned int offset, const u8 **text,
+ struct ts_config *conf,
+ struct ts_state *state)
+{
+ return skb_seq_read(offset, text, TS_SKB_CB(state));
+}
+
+static void skb_ts_finish(struct ts_config *conf, struct ts_state *state)
+{
+ skb_abort_seq_read(TS_SKB_CB(state));
+}
+
+/**
+ * skb_find_text - Find a text pattern in skb data
+ * @skb: the buffer to look in
+ * @from: search offset
+ * @to: search limit
+ * @config: textsearch configuration
+ * @state: uninitialized textsearch state variable
+ *
+ * Finds a pattern in the skb data according to the specified
+ * textsearch configuration. Use textsearch_next() to retrieve
+ * subsequent occurrences of the pattern. Returns the offset
+ * to the first occurrence or UINT_MAX if no match was found.
+ */
+unsigned int skb_find_text(struct sk_buff *skb, unsigned int from,
+ unsigned int to, struct ts_config *config,
+ struct ts_state *state)
+{
+ unsigned int ret;
+
+ config->get_next_block = skb_ts_get_next_block;
+ config->finish = skb_ts_finish;
+
+ skb_prepare_seq_read(skb, from, to, TS_SKB_CB(state));
+
+ ret = textsearch_find(config, state);
+ return (ret <= to - from ? ret : UINT_MAX);
+}
+
+/**
+ * skb_append_datato_frags: - append the user data to a skb
+ * @sk: sock structure
+ * @skb: skb structure to be appened with user data.
+ * @getfrag: call back function to be used for getting the user data
+ * @from: pointer to user message iov
+ * @length: length of the iov message
+ *
+ * Description: This procedure append the user data in the fragment part
+ * of the skb if any page alloc fails user this procedure returns -ENOMEM
+ */
+int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
+ int (*getfrag)(void *from, char *to, int offset,
+ int len, int odd, struct sk_buff *skb),
+ void *from, int length)
+{
+ int frg_cnt = 0;
+ skb_frag_t *frag = NULL;
+ struct page *page = NULL;
+ int copy, left;
+ int offset = 0;
+ int ret;
+
+ do {
+ /* Return error if we don't have space for new frag */
+ frg_cnt = skb_shinfo(skb)->nr_frags;
+ if (frg_cnt >= MAX_SKB_FRAGS)
+ return -EFAULT;
+
+ /* allocate a new page for next frag */
+ page = alloc_pages(sk->sk_allocation, 0);
+
+ /* If alloc_page fails just return failure and caller will
+ * free previous allocated pages by doing kfree_skb()
+ */
+ if (page == NULL)
+ return -ENOMEM;
+
+ /* initialize the next frag */
+ sk->sk_sndmsg_page = page;
+ sk->sk_sndmsg_off = 0;
+ skb_fill_page_desc(skb, frg_cnt, page, 0, 0);
+ skb->truesize += PAGE_SIZE;
+ atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
+
+ /* get the new initialized frag */
+ frg_cnt = skb_shinfo(skb)->nr_frags;
+ frag = &skb_shinfo(skb)->frags[frg_cnt - 1];
+
+ /* copy the user data to page */
+ left = PAGE_SIZE - frag->page_offset;
+ copy = (length > left)? left : length;
+
+ ret = getfrag(from, (page_address(frag->page) +
+ frag->page_offset + frag->size),
+ offset, copy, 0, skb);
+ if (ret < 0)
+ return -EFAULT;
+
+ /* copy was successful so update the size parameters */
+ sk->sk_sndmsg_off += copy;
+ frag->size += copy;
+ skb->len += copy;
+ skb->data_len += copy;
+ offset += copy;
+ length -= copy;
+
+ } while (length > 0);
+
+ return 0;
+}
+
+/**
+ * skb_pull_rcsum - pull skb and update receive checksum
+ * @skb: buffer to update
+ * @start: start of data before pull
+ * @len: length of data pulled
+ *
+ * This function performs an skb_pull on the packet and updates
+ * update the CHECKSUM_COMPLETE checksum. It should be used on
+ * receive path processing instead of skb_pull unless you know
+ * that the checksum difference is zero (e.g., a valid IP header)
+ * or you are setting ip_summed to CHECKSUM_NONE.
+ */
+unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len)
+{
+ BUG_ON(len > skb->len);
+ skb->len -= len;
+ BUG_ON(skb->len < skb->data_len);
+ skb_postpull_rcsum(skb, skb->data, len);
+ return skb->data += len;
+}
+
+EXPORT_SYMBOL_GPL(skb_pull_rcsum);
+
+/**
+ * skb_segment - Perform protocol segmentation on skb.
+ * @skb: buffer to segment
+ * @features: features for the output path (see dev->features)
+ *
+ * This function performs segmentation on the given skb. It returns
+ * the segment at the given position. It returns NULL if there are
+ * no more segments to generate, or when an error is encountered.
+ */
+struct sk_buff *skb_segment(struct sk_buff *skb, int features)
+{
+ struct sk_buff *segs = NULL;
+ struct sk_buff *tail = NULL;
+ unsigned int mss = skb_shinfo(skb)->gso_size;
+ unsigned int doffset = skb->data - skb->mac.raw;
+ unsigned int offset = doffset;
+ unsigned int headroom;
+ unsigned int len;
+ int sg = features & NETIF_F_SG;
+ int nfrags = skb_shinfo(skb)->nr_frags;
+ int err = -ENOMEM;
+ int i = 0;
+ int pos;
+
+ __skb_push(skb, doffset);
+ headroom = skb_headroom(skb);
+ pos = skb_headlen(skb);
+
+ do {
+ struct sk_buff *nskb;
+ skb_frag_t *frag;
+ int hsize;
+ int k;
+ int size;
+
+ len = skb->len - offset;
+ if (len > mss)
+ len = mss;
+
+ hsize = skb_headlen(skb) - offset;
+ if (hsize < 0)
+ hsize = 0;
+ if (hsize > len || !sg)
+ hsize = len;
+
+ nskb = alloc_skb(hsize + doffset + headroom, GFP_ATOMIC);
+ if (unlikely(!nskb))
+ goto err;
+
+ if (segs)
+ tail->next = nskb;
+ else
+ segs = nskb;
+ tail = nskb;
+
+ nskb->dev = skb->dev;
+ nskb->priority = skb->priority;
+ nskb->protocol = skb->protocol;
+ nskb->dst = dst_clone(skb->dst);
+ memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
+ nskb->pkt_type = skb->pkt_type;
+ nskb->mac_len = skb->mac_len;
+
+ skb_reserve(nskb, headroom);
+ nskb->mac.raw = nskb->data;
+ nskb->nh.raw = nskb->data + skb->mac_len;
+ nskb->h.raw = nskb->nh.raw + (skb->h.raw - skb->nh.raw);
+ memcpy(skb_put(nskb, doffset), skb->data, doffset);
+
+ if (!sg) {
+ nskb->csum = skb_copy_and_csum_bits(skb, offset,
+ skb_put(nskb, len),
+ len, 0);
+ continue;
+ }
+
+ frag = skb_shinfo(nskb)->frags;
+ k = 0;
+
+ nskb->ip_summed = CHECKSUM_PARTIAL;
+ nskb->csum = skb->csum;
+ memcpy(skb_put(nskb, hsize), skb->data + offset, hsize);
+
+ while (pos < offset + len) {
+ BUG_ON(i >= nfrags);
+
+ *frag = skb_shinfo(skb)->frags[i];
+ get_page(frag->page);
+ size = frag->size;
+
+ if (pos < offset) {
+ frag->page_offset += offset - pos;
+ frag->size -= offset - pos;
+ }
+
+ k++;
+
+ if (pos + size <= offset + len) {
+ i++;
+ pos += size;
+ } else {
+ frag->size -= pos + size - (offset + len);
+ break;
+ }
+
+ frag++;
+ }
+
+ skb_shinfo(nskb)->nr_frags = k;
+ nskb->data_len = len - hsize;
+ nskb->len += nskb->data_len;
+ nskb->truesize += nskb->data_len;
+ } while ((offset += len) < skb->len);
+
+ return segs;
+
+err:
+ while ((skb = segs)) {
+ segs = skb->next;
+ kfree(skb);
+ }
+ return ERR_PTR(err);
+}
+
+EXPORT_SYMBOL_GPL(skb_segment);
+
void __init skb_init(void)
{
skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
sizeof(struct sk_buff),
0,
- SLAB_HWCACHE_ALIGN,
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC,
NULL, NULL);
- if (!skbuff_head_cache)
- panic("cannot create skbuff cache");
+ skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache",
+ (2*sizeof(struct sk_buff)) +
+ sizeof(atomic_t),
+ 0,
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC,
+ NULL, NULL);
}
EXPORT_SYMBOL(___pskb_trim);
EXPORT_SYMBOL(__kfree_skb);
+EXPORT_SYMBOL(kfree_skb);
EXPORT_SYMBOL(__pskb_pull_tail);
-EXPORT_SYMBOL(alloc_skb);
+EXPORT_SYMBOL(__alloc_skb);
+EXPORT_SYMBOL(__netdev_alloc_skb);
EXPORT_SYMBOL(pskb_copy);
EXPORT_SYMBOL(pskb_expand_head);
EXPORT_SYMBOL(skb_checksum);
EXPORT_SYMBOL(skb_unlink);
EXPORT_SYMBOL(skb_append);
EXPORT_SYMBOL(skb_split);
+EXPORT_SYMBOL(skb_prepare_seq_read);
+EXPORT_SYMBOL(skb_seq_read);
+EXPORT_SYMBOL(skb_abort_seq_read);
+EXPORT_SYMBOL(skb_find_text);
+EXPORT_SYMBOL(skb_append_datato_frags);