X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=net%2Fcore%2Fskbuff.c;h=8e3e58be2775886f252c3195daf94cb3eabebf34;hb=987b0145d94eecf292d8b301228356f44611ab7c;hp=2df876a10bc5d7d5b26037434370c938e9d3e9bb;hpb=aa818a825cc5808aab3eb2995af0b14b0418a3dd;p=linux-2.6.git diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 2df876a10..8e3e58be2 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -68,7 +68,8 @@ #include #include -static kmem_cache_t *skbuff_head_cache; +static kmem_cache_t *skbuff_head_cache __read_mostly; +static kmem_cache_t *skbuff_fclone_cache __read_mostly; /* * Keep out-of-line to prevent kernel bloat. @@ -86,8 +87,10 @@ static kmem_cache_t *skbuff_head_cache; */ void skb_over_panic(struct sk_buff *skb, int sz, void *here) { - printk(KERN_INFO "skput:over: %p:%d put:%d dev:%s", - here, skb->len, sz, skb->dev ? skb->dev->name : ""); + printk(KERN_EMERG "skb_over_panic: text:%p len:%d put:%d head:%p " + "data:%p tail:%p end:%p dev:%s\n", + here, skb->len, sz, skb->head, skb->data, skb->tail, skb->end, + skb->dev ? skb->dev->name : ""); BUG(); } @@ -102,8 +105,10 @@ void skb_over_panic(struct sk_buff *skb, int sz, void *here) void skb_under_panic(struct sk_buff *skb, int sz, void *here) { - printk(KERN_INFO "skput:under: %p:%d put:%d dev:%s", - here, skb->len, sz, skb->dev ? skb->dev->name : ""); + printk(KERN_EMERG "skb_under_panic: text:%p len:%d put:%d head:%p " + "data:%p tail:%p end:%p dev:%s\n", + here, skb->len, sz, skb->head, skb->data, skb->tail, skb->end, + skb->dev ? skb->dev->name : ""); BUG(); } @@ -114,9 +119,11 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here) */ /** - * alloc_skb - allocate a network buffer + * __alloc_skb - allocate a network buffer * @size: size to allocate * @gfp_mask: allocation mask + * @fclone: allocate from fclone cache instead of head cache + * and allocate a cloned (child) skb * * Allocate a new &sk_buff. The returned buffer has no headroom and a * tail room of size bytes. The object has a reference count of one. @@ -125,14 +132,18 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here) * Buffers may only be allocated from interrupts using a @gfp_mask of * %GFP_ATOMIC. */ -struct sk_buff *alloc_skb(unsigned int size, int gfp_mask) +struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, + int fclone) { + kmem_cache_t *cache; + struct skb_shared_info *shinfo; struct sk_buff *skb; u8 *data; + cache = fclone ? skbuff_fclone_cache : skbuff_head_cache; + /* Get the HEAD */ - skb = kmem_cache_alloc(skbuff_head_cache, - gfp_mask & ~__GFP_DMA); + skb = kmem_cache_alloc(cache, gfp_mask & ~__GFP_DMA); if (!skb) goto out; @@ -142,6 +153,73 @@ struct sk_buff *alloc_skb(unsigned int size, int gfp_mask) if (!data) goto nodata; + memset(skb, 0, offsetof(struct sk_buff, truesize)); + skb->truesize = size + sizeof(struct sk_buff); + atomic_set(&skb->users, 1); + skb->head = data; + skb->data = data; + skb->tail = data; + skb->end = data + size; + /* make sure we initialize shinfo sequentially */ + shinfo = skb_shinfo(skb); + atomic_set(&shinfo->dataref, 1); + shinfo->nr_frags = 0; + shinfo->tso_size = 0; + shinfo->tso_segs = 0; + shinfo->ufo_size = 0; + shinfo->ip6_frag_id = 0; + shinfo->frag_list = NULL; + + if (fclone) { + struct sk_buff *child = skb + 1; + atomic_t *fclone_ref = (atomic_t *) (child + 1); + + skb->fclone = SKB_FCLONE_ORIG; + atomic_set(fclone_ref, 1); + + child->fclone = SKB_FCLONE_UNAVAILABLE; + } +out: + return skb; +nodata: + kmem_cache_free(cache, skb); + skb = NULL; + goto out; +} + +/** + * alloc_skb_from_cache - allocate a network buffer + * @cp: kmem_cache from which to allocate the data area + * (object size must be big enough for @size bytes + skb overheads) + * @size: size to allocate + * @gfp_mask: allocation mask + * + * Allocate a new &sk_buff. The returned buffer has no headroom and + * tail room of size bytes. The object has a reference count of one. + * The return is the buffer. On a failure the return is %NULL. + * + * Buffers may only be allocated from interrupts using a @gfp_mask of + * %GFP_ATOMIC. + */ +struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp, + unsigned int size, + gfp_t gfp_mask) +{ + struct sk_buff *skb; + u8 *data; + + /* Get the HEAD */ + skb = kmem_cache_alloc(skbuff_head_cache, + gfp_mask & ~__GFP_DMA); + if (!skb) + goto out; + + /* Get the DATA. */ + size = SKB_DATA_ALIGN(size); + data = kmem_cache_alloc(cp, gfp_mask); + if (!data) + goto nodata; + memset(skb, 0, offsetof(struct sk_buff, truesize)); skb->truesize = size + sizeof(struct sk_buff); atomic_set(&skb->users, 1); @@ -154,6 +232,7 @@ struct sk_buff *alloc_skb(unsigned int size, int gfp_mask) skb_shinfo(skb)->nr_frags = 0; skb_shinfo(skb)->tso_size = 0; skb_shinfo(skb)->tso_segs = 0; + skb_shinfo(skb)->ufo_size = 0; skb_shinfo(skb)->frag_list = NULL; out: return skb; @@ -164,11 +243,11 @@ nodata: } -static void skb_drop_fraglist(struct sk_buff *skb) +static void skb_drop_list(struct sk_buff **listp) { - struct sk_buff *list = skb_shinfo(skb)->frag_list; + struct sk_buff *list = *listp; - skb_shinfo(skb)->frag_list = NULL; + *listp = NULL; do { struct sk_buff *this = list; @@ -177,6 +256,11 @@ static void skb_drop_fraglist(struct sk_buff *skb) } while (list); } +static inline void skb_drop_fraglist(struct sk_buff *skb) +{ + skb_drop_list(&skb_shinfo(skb)->frag_list); +} + static void skb_clone_fraglist(struct sk_buff *skb) { struct sk_buff *list; @@ -188,7 +272,8 @@ static void skb_clone_fraglist(struct sk_buff *skb) void skb_release_data(struct sk_buff *skb) { if (!skb->cloned || - atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) { + !atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1, + &skb_shinfo(skb)->dataref)) { if (skb_shinfo(skb)->nr_frags) { int i; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) @@ -207,8 +292,34 @@ void skb_release_data(struct sk_buff *skb) */ void kfree_skbmem(struct sk_buff *skb) { + struct sk_buff *other; + atomic_t *fclone_ref; + skb_release_data(skb); - kmem_cache_free(skbuff_head_cache, skb); + switch (skb->fclone) { + case SKB_FCLONE_UNAVAILABLE: + kmem_cache_free(skbuff_head_cache, skb); + break; + + case SKB_FCLONE_ORIG: + fclone_ref = (atomic_t *) (skb + 2); + if (atomic_dec_and_test(fclone_ref)) + kmem_cache_free(skbuff_fclone_cache, skb); + break; + + case SKB_FCLONE_CLONE: + fclone_ref = (atomic_t *) (skb + 1); + other = skb - 1; + + /* The clone portion is available for + * fast-cloning again. + */ + skb->fclone = SKB_FCLONE_UNAVAILABLE; + + if (atomic_dec_and_test(fclone_ref)) + kmem_cache_free(skbuff_fclone_cache, other); + break; + }; } /** @@ -222,24 +333,19 @@ void kfree_skbmem(struct sk_buff *skb) void __kfree_skb(struct sk_buff *skb) { - if (skb->list) { - printk(KERN_WARNING "Warning: kfree_skb passed an skb still " - "on a list (from %p).\n", NET_CALLER(skb)); - BUG(); - } - dst_release(skb->dst); #ifdef CONFIG_XFRM secpath_put(skb->sp); #endif - if(skb->destructor) { - if (in_irq()) - printk(KERN_WARNING "Warning: kfree_skb on " - "hard IRQ %p\n", NET_CALLER(skb)); + if (skb->destructor) { + WARN_ON(in_irq()); skb->destructor(skb); } #ifdef CONFIG_NETFILTER nf_conntrack_put(skb->nfct); +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) + nf_conntrack_put_reasm(skb->nfct_reasm); +#endif #ifdef CONFIG_BRIDGE_NETFILTER nf_bridge_put(skb->nf_bridge); #endif @@ -249,7 +355,6 @@ void __kfree_skb(struct sk_buff *skb) skb->tc_index = 0; #ifdef CONFIG_NET_CLS_ACT skb->tc_verd = 0; - skb->tc_classid = 0; #endif #endif @@ -270,21 +375,29 @@ void __kfree_skb(struct sk_buff *skb) * %GFP_ATOMIC. */ -struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask) +struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) { - struct sk_buff *n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); - - if (!n) - return NULL; + struct sk_buff *n; + + n = skb + 1; + if (skb->fclone == SKB_FCLONE_ORIG && + n->fclone == SKB_FCLONE_UNAVAILABLE) { + atomic_t *fclone_ref = (atomic_t *) (n + 1); + n->fclone = SKB_FCLONE_CLONE; + atomic_inc(fclone_ref); + } else { + n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); + if (!n) + return NULL; + n->fclone = SKB_FCLONE_UNAVAILABLE; + } #define C(x) n->x = skb->x n->next = n->prev = NULL; - n->list = NULL; n->sk = NULL; - C(stamp); + C(tstamp); C(dev); - C(real_dev); C(h); C(nh); C(mac); @@ -300,40 +413,45 @@ struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask) C(csum); C(local_df); n->cloned = 1; + n->nohdr = 0; C(pkt_type); C(ip_summed); C(priority); +#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) + C(ipvs_property); +#endif C(protocol); - C(security); n->destructor = NULL; #ifdef CONFIG_NETFILTER C(nfmark); - C(nfcache); C(nfct); nf_conntrack_get(skb->nfct); -#ifdef CONFIG_NETFILTER_DEBUG - C(nf_debug); + C(nfctinfo); +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) + C(nfct_reasm); + nf_conntrack_get_reasm(skb->nfct_reasm); #endif #ifdef CONFIG_BRIDGE_NETFILTER C(nf_bridge); nf_bridge_get(skb->nf_bridge); #endif #endif /*CONFIG_NETFILTER*/ -#if defined(CONFIG_HIPPI) - C(private); -#endif #ifdef CONFIG_NET_SCHED C(tc_index); #ifdef CONFIG_NET_CLS_ACT n->tc_verd = SET_TC_VERD(skb->tc_verd,0); - n->tc_verd = CLR_TC_OK2MUNGE(skb->tc_verd); - n->tc_verd = CLR_TC_MUNGED(skb->tc_verd); + n->tc_verd = CLR_TC_OK2MUNGE(n->tc_verd); + n->tc_verd = CLR_TC_MUNGED(n->tc_verd); C(input_dev); - C(tc_classid); #endif #endif +#if defined(CONFIG_VNET) || defined(CONFIG_VNET_MODULE) C(xid); +#endif +#if defined(CONFIG_VNET) || defined(CONFIG_VNET_MODULE) + C(xid); +#endif C(truesize); atomic_set(&n->users, 1); C(head); @@ -354,10 +472,8 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) */ unsigned long offset = new->data - old->data; - new->list = NULL; new->sk = NULL; new->dev = old->dev; - new->real_dev = old->real_dev; new->priority = old->priority; new->protocol = old->protocol; new->dst = dst_clone(old->dst); @@ -369,17 +485,21 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->mac.raw = old->mac.raw + offset; memcpy(new->cb, old->cb, sizeof(old->cb)); new->local_df = old->local_df; + new->fclone = SKB_FCLONE_UNAVAILABLE; new->pkt_type = old->pkt_type; - new->stamp = old->stamp; + new->tstamp = old->tstamp; new->destructor = NULL; - new->security = old->security; #ifdef CONFIG_NETFILTER new->nfmark = old->nfmark; - new->nfcache = old->nfcache; new->nfct = old->nfct; nf_conntrack_get(old->nfct); -#ifdef CONFIG_NETFILTER_DEBUG - new->nf_debug = old->nf_debug; + new->nfctinfo = old->nfctinfo; +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) + new->nfct_reasm = old->nfct_reasm; + nf_conntrack_get_reasm(old->nfct_reasm); +#endif +#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) + new->ipvs_property = old->ipvs_property; #endif #ifdef CONFIG_BRIDGE_NETFILTER new->nf_bridge = old->nf_bridge; @@ -392,8 +512,13 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) #endif new->tc_index = old->tc_index; #endif +#if defined(CONFIG_VNET) || defined(CONFIG_VNET_MODULE) new->xid = old->xid; +#endif atomic_set(&new->users, 1); + skb_shinfo(new)->tso_size = skb_shinfo(old)->tso_size; + skb_shinfo(new)->tso_segs = skb_shinfo(old)->tso_segs; + skb_shinfo(new)->ufo_size = skb_shinfo(old)->ufo_size; } /** @@ -413,7 +538,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) * header is going to be modified. Use pskb_copy() instead. */ -struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask) +struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) { int headerlen = skb->data - skb->head; /* @@ -452,7 +577,7 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask) * The returned buffer has a reference count of 1. */ -struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask) +struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) { /* * Allocate the copy buffer @@ -471,6 +596,7 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask) n->csum = skb->csum; n->ip_summed = skb->ip_summed; + n->truesize += skb->data_len; n->data_len = skb->data_len; n->len = skb->len; @@ -483,8 +609,6 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask) } skb_shinfo(n)->nr_frags = i; } - skb_shinfo(n)->tso_size = skb_shinfo(skb)->tso_size; - skb_shinfo(n)->tso_segs = skb_shinfo(skb)->tso_segs; if (skb_shinfo(skb)->frag_list) { skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list; @@ -512,7 +636,8 @@ out: * reloaded after call to this function. */ -int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask) +int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, + gfp_t gfp_mask) { int i; u8 *data; @@ -551,6 +676,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask) skb->h.raw += off; skb->nh.raw += off; skb->cloned = 0; + skb->nohdr = 0; atomic_set(&skb_shinfo(skb)->dataref, 1); return 0; @@ -601,7 +727,8 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom) * only by netfilter in the cases when checksum is recalculated? --ANK */ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, - int newheadroom, int newtailroom, int gfp_mask) + int newheadroom, int newtailroom, + gfp_t gfp_mask) { /* * Allocate the copy buffer @@ -631,8 +758,6 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, BUG(); copy_skb_header(n, skb); - skb_shinfo(n)->tso_size = skb_shinfo(skb)->tso_size; - skb_shinfo(n)->tso_segs = skb_shinfo(skb)->tso_segs; return n; } @@ -666,50 +791,86 @@ struct sk_buff *skb_pad(struct sk_buff *skb, int pad) return nskb; } -/* Trims skb to length len. It can change skb pointers, if "realloc" is 1. - * If realloc==0 and trimming is impossible without change of data, - * it is BUG(). +/* Trims skb to length len. It can change skb pointers. */ -int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc) +int ___pskb_trim(struct sk_buff *skb, unsigned int len) { + struct sk_buff **fragp; + struct sk_buff *frag; int offset = skb_headlen(skb); int nfrags = skb_shinfo(skb)->nr_frags; int i; + int err; - for (i = 0; i < nfrags; i++) { + if (skb_cloned(skb) && + unlikely((err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))) + return err; + + i = 0; + if (offset >= len) + goto drop_pages; + + for (; i < nfrags; i++) { int end = offset + skb_shinfo(skb)->frags[i].size; - if (end > len) { - if (skb_cloned(skb)) { - if (!realloc) - BUG(); - if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) - return -ENOMEM; - } - if (len <= offset) { - put_page(skb_shinfo(skb)->frags[i].page); - skb_shinfo(skb)->nr_frags--; - } else { - skb_shinfo(skb)->frags[i].size = len - offset; - } + + if (end < len) { + offset = end; + continue; } - offset = end; + + skb_shinfo(skb)->frags[i++].size = len - offset; + +drop_pages: + skb_shinfo(skb)->nr_frags = i; + + for (; i < nfrags; i++) + put_page(skb_shinfo(skb)->frags[i].page); + + if (skb_shinfo(skb)->frag_list) + skb_drop_fraglist(skb); + goto done; } - if (offset < len) { + for (fragp = &skb_shinfo(skb)->frag_list; (frag = *fragp); + fragp = &frag->next) { + int end = offset + frag->len; + + if (skb_shared(frag)) { + struct sk_buff *nfrag; + + nfrag = skb_clone(frag, GFP_ATOMIC); + if (unlikely(!nfrag)) + return -ENOMEM; + + nfrag->next = frag->next; + kfree_skb(frag); + frag = nfrag; + *fragp = frag; + } + + if (end < len) { + offset = end; + continue; + } + + if (end > len && + unlikely((err = pskb_trim(frag, len - offset)))) + return err; + + if (frag->next) + skb_drop_list(&frag->next); + break; + } + +done: + if (len > skb_headlen(skb)) { skb->data_len -= skb->len - len; skb->len = len; } else { - if (len <= skb_headlen(skb)) { - skb->len = len; - skb->data_len = 0; - skb->tail = skb->data + len; - if (skb_shinfo(skb)->frag_list && !skb_cloned(skb)) - skb_drop_fraglist(skb); - } else { - skb->data_len -= skb->len - len; - skb->len = len; - } + skb->len = len; + skb->data_len = 0; + skb->tail = skb->data + len; } return 0; @@ -784,8 +945,7 @@ unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta) struct sk_buff *insp = NULL; do { - if (!list) - BUG(); + BUG_ON(!list); if (list->len <= eat) { /* Eaten as whole. */ @@ -931,6 +1091,94 @@ fault: return -EFAULT; } +/** + * skb_store_bits - store bits from kernel buffer to skb + * @skb: destination buffer + * @offset: offset in destination + * @from: source buffer + * @len: number of bytes to copy + * + * Copy the specified number of bytes from the source buffer to the + * destination skb. This function handles all the messy bits of + * traversing fragment lists and such. + */ + +int skb_store_bits(const struct sk_buff *skb, int offset, void *from, int len) +{ + int i, copy; + int start = skb_headlen(skb); + + if (offset > (int)skb->len - len) + goto fault; + + if ((copy = start - offset) > 0) { + if (copy > len) + copy = len; + memcpy(skb->data + offset, from, copy); + if ((len -= copy) == 0) + return 0; + offset += copy; + from += copy; + } + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + int end; + + BUG_TRAP(start <= offset + len); + + end = start + frag->size; + if ((copy = end - offset) > 0) { + u8 *vaddr; + + if (copy > len) + copy = len; + + vaddr = kmap_skb_frag(frag); + memcpy(vaddr + frag->page_offset + offset - start, + from, copy); + kunmap_skb_frag(vaddr); + + if ((len -= copy) == 0) + return 0; + offset += copy; + from += copy; + } + start = end; + } + + if (skb_shinfo(skb)->frag_list) { + struct sk_buff *list = skb_shinfo(skb)->frag_list; + + for (; list; list = list->next) { + int end; + + BUG_TRAP(start <= offset + len); + + end = start + list->len; + if ((copy = end - offset) > 0) { + if (copy > len) + copy = len; + if (skb_store_bits(list, offset - start, + from, copy)) + goto fault; + if ((len -= copy) == 0) + return 0; + offset += copy; + from += copy; + } + start = end; + } + } + if (!len) + return 0; + +fault: + return -EFAULT; +} + +EXPORT_SYMBOL(skb_store_bits); + /* Checksum skb data. */ unsigned int skb_checksum(const struct sk_buff *skb, int offset, @@ -1001,8 +1249,7 @@ unsigned int skb_checksum(const struct sk_buff *skb, int offset, start = end; } } - if (len) - BUG(); + BUG_ON(len); return csum; } @@ -1084,8 +1331,7 @@ unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, start = end; } } - if (len) - BUG(); + BUG_ON(len); return csum; } @@ -1099,8 +1345,7 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) else csstart = skb_headlen(skb); - if (csstart > skb_headlen(skb)) - BUG(); + BUG_ON(csstart > skb_headlen(skb)); memcpy(to, skb->data, csstart); @@ -1209,50 +1454,43 @@ void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) __skb_queue_tail(list, newsk); spin_unlock_irqrestore(&list->lock, flags); } + /** * skb_unlink - remove a buffer from a list * @skb: buffer to remove + * @list: list to use * - * Place a packet after a given packet in a list. The list locks are taken - * and this function is atomic with respect to other list locked calls + * Remove a packet from a list. The list locks are taken and this + * function is atomic with respect to other list locked calls * - * Works even without knowing the list it is sitting on, which can be - * handy at times. It also means that THE LIST MUST EXIST when you - * unlink. Thus a list must have its contents unlinked before it is - * destroyed. + * You must know what list the SKB is on. */ -void skb_unlink(struct sk_buff *skb) +void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) { - struct sk_buff_head *list = skb->list; - - if (list) { - unsigned long flags; + unsigned long flags; - spin_lock_irqsave(&list->lock, flags); - if (skb->list == list) - __skb_unlink(skb, skb->list); - spin_unlock_irqrestore(&list->lock, flags); - } + spin_lock_irqsave(&list->lock, flags); + __skb_unlink(skb, list); + spin_unlock_irqrestore(&list->lock, flags); } - /** * skb_append - append a buffer * @old: buffer to insert after * @newsk: buffer to insert + * @list: list to use * * Place a packet after a given packet in a list. The list locks are taken * and this function is atomic with respect to other list locked calls. * A buffer cannot be placed on two lists at the same time. */ - -void skb_append(struct sk_buff *old, struct sk_buff *newsk) +void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list) { unsigned long flags; - spin_lock_irqsave(&old->list->lock, flags); - __skb_append(old, newsk); - spin_unlock_irqrestore(&old->list->lock, flags); + spin_lock_irqsave(&list->lock, flags); + __skb_append(old, newsk, list); + spin_unlock_irqrestore(&list->lock, flags); } @@ -1260,19 +1498,21 @@ void skb_append(struct sk_buff *old, struct sk_buff *newsk) * skb_insert - insert a buffer * @old: buffer to insert before * @newsk: buffer to insert + * @list: list to use + * + * Place a packet before a given packet in a list. The list locks are + * taken and this function is atomic with respect to other list locked + * calls. * - * Place a packet before a given packet in a list. The list locks are taken - * and this function is atomic with respect to other list locked calls * A buffer cannot be placed on two lists at the same time. */ - -void skb_insert(struct sk_buff *old, struct sk_buff *newsk) +void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list) { unsigned long flags; - spin_lock_irqsave(&old->list->lock, flags); - __skb_insert(newsk, old->prev, old, old->list); - spin_unlock_irqrestore(&old->list->lock, flags); + spin_lock_irqsave(&list->lock, flags); + __skb_insert(newsk, old->prev, old, list); + spin_unlock_irqrestore(&list->lock, flags); } #if 0 @@ -1288,7 +1528,7 @@ void skb_add_mtu(int mtu) } #endif -static void inline skb_split_inside_header(struct sk_buff *skb, +static inline void skb_split_inside_header(struct sk_buff *skb, struct sk_buff* skb1, const u32 len, const int pos) { @@ -1309,7 +1549,7 @@ static void inline skb_split_inside_header(struct sk_buff *skb, skb->tail = skb->data + len; } -static void inline skb_split_no_header(struct sk_buff *skb, +static inline void skb_split_no_header(struct sk_buff *skb, struct sk_buff* skb1, const u32 len, int pos) { @@ -1329,7 +1569,7 @@ static void inline skb_split_no_header(struct sk_buff *skb, if (pos < len) { /* Split frag. - * We have to variants in this case: + * We have two variants in this case: * 1. Move all the frag to the second * part, if it is possible. F.e. * this approach is mandatory for TUX, @@ -1352,6 +1592,9 @@ static void inline skb_split_no_header(struct sk_buff *skb, /** * skb_split - Split fragmented skb to two parts at length len. + * @skb: the buffer to split + * @skb1: the buffer to receive the second part + * @len: new length for skb */ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) { @@ -1363,6 +1606,231 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) skb_split_no_header(skb, skb1, len, pos); } +/** + * skb_prepare_seq_read - Prepare a sequential read of skb data + * @skb: the buffer to read + * @from: lower offset of data to be read + * @to: upper offset of data to be read + * @st: state variable + * + * Initializes the specified state variable. Must be called before + * invoking skb_seq_read() for the first time. + */ +void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from, + unsigned int to, struct skb_seq_state *st) +{ + st->lower_offset = from; + st->upper_offset = to; + st->root_skb = st->cur_skb = skb; + st->frag_idx = st->stepped_offset = 0; + st->frag_data = NULL; +} + +/** + * skb_seq_read - Sequentially read skb data + * @consumed: number of bytes consumed by the caller so far + * @data: destination pointer for data to be returned + * @st: state variable + * + * Reads a block of skb data at &consumed relative to the + * lower offset specified to skb_prepare_seq_read(). Assigns + * the head of the data block to &data and returns the length + * of the block or 0 if the end of the skb data or the upper + * offset has been reached. + * + * The caller is not required to consume all of the data + * returned, i.e. &consumed is typically set to the number + * of bytes already consumed and the next call to + * skb_seq_read() will return the remaining part of the block. + * + * Note: The size of each block of data returned can be arbitary, + * this limitation is the cost for zerocopy seqeuental + * reads of potentially non linear data. + * + * Note: Fragment lists within fragments are not implemented + * at the moment, state->root_skb could be replaced with + * a stack for this purpose. + */ +unsigned int skb_seq_read(unsigned int consumed, const u8 **data, + struct skb_seq_state *st) +{ + unsigned int block_limit, abs_offset = consumed + st->lower_offset; + skb_frag_t *frag; + + if (unlikely(abs_offset >= st->upper_offset)) + return 0; + +next_skb: + block_limit = skb_headlen(st->cur_skb); + + if (abs_offset < block_limit) { + *data = st->cur_skb->data + abs_offset; + return block_limit - abs_offset; + } + + if (st->frag_idx == 0 && !st->frag_data) + st->stepped_offset += skb_headlen(st->cur_skb); + + while (st->frag_idx < skb_shinfo(st->cur_skb)->nr_frags) { + frag = &skb_shinfo(st->cur_skb)->frags[st->frag_idx]; + block_limit = frag->size + st->stepped_offset; + + if (abs_offset < block_limit) { + if (!st->frag_data) + st->frag_data = kmap_skb_frag(frag); + + *data = (u8 *) st->frag_data + frag->page_offset + + (abs_offset - st->stepped_offset); + + return block_limit - abs_offset; + } + + if (st->frag_data) { + kunmap_skb_frag(st->frag_data); + st->frag_data = NULL; + } + + st->frag_idx++; + st->stepped_offset += frag->size; + } + + if (st->cur_skb->next) { + st->cur_skb = st->cur_skb->next; + st->frag_idx = 0; + goto next_skb; + } else if (st->root_skb == st->cur_skb && + skb_shinfo(st->root_skb)->frag_list) { + st->cur_skb = skb_shinfo(st->root_skb)->frag_list; + goto next_skb; + } + + return 0; +} + +/** + * skb_abort_seq_read - Abort a sequential read of skb data + * @st: state variable + * + * Must be called if skb_seq_read() was not called until it + * returned 0. + */ +void skb_abort_seq_read(struct skb_seq_state *st) +{ + if (st->frag_data) + kunmap_skb_frag(st->frag_data); +} + +#define TS_SKB_CB(state) ((struct skb_seq_state *) &((state)->cb)) + +static unsigned int skb_ts_get_next_block(unsigned int offset, const u8 **text, + struct ts_config *conf, + struct ts_state *state) +{ + return skb_seq_read(offset, text, TS_SKB_CB(state)); +} + +static void skb_ts_finish(struct ts_config *conf, struct ts_state *state) +{ + skb_abort_seq_read(TS_SKB_CB(state)); +} + +/** + * skb_find_text - Find a text pattern in skb data + * @skb: the buffer to look in + * @from: search offset + * @to: search limit + * @config: textsearch configuration + * @state: uninitialized textsearch state variable + * + * Finds a pattern in the skb data according to the specified + * textsearch configuration. Use textsearch_next() to retrieve + * subsequent occurrences of the pattern. Returns the offset + * to the first occurrence or UINT_MAX if no match was found. + */ +unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, + unsigned int to, struct ts_config *config, + struct ts_state *state) +{ + config->get_next_block = skb_ts_get_next_block; + config->finish = skb_ts_finish; + + skb_prepare_seq_read(skb, from, to, TS_SKB_CB(state)); + + return textsearch_find(config, state); +} + +/** + * skb_append_datato_frags: - append the user data to a skb + * @sk: sock structure + * @skb: skb structure to be appened with user data. + * @getfrag: call back function to be used for getting the user data + * @from: pointer to user message iov + * @length: length of the iov message + * + * Description: This procedure append the user data in the fragment part + * of the skb if any page alloc fails user this procedure returns -ENOMEM + */ +int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, + int (*getfrag)(void *from, char *to, int offset, + int len, int odd, struct sk_buff *skb), + void *from, int length) +{ + int frg_cnt = 0; + skb_frag_t *frag = NULL; + struct page *page = NULL; + int copy, left; + int offset = 0; + int ret; + + do { + /* Return error if we don't have space for new frag */ + frg_cnt = skb_shinfo(skb)->nr_frags; + if (frg_cnt >= MAX_SKB_FRAGS) + return -EFAULT; + + /* allocate a new page for next frag */ + page = alloc_pages(sk->sk_allocation, 0); + + /* If alloc_page fails just return failure and caller will + * free previous allocated pages by doing kfree_skb() + */ + if (page == NULL) + return -ENOMEM; + + /* initialize the next frag */ + sk->sk_sndmsg_page = page; + sk->sk_sndmsg_off = 0; + skb_fill_page_desc(skb, frg_cnt, page, 0, 0); + skb->truesize += PAGE_SIZE; + atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc); + + /* get the new initialized frag */ + frg_cnt = skb_shinfo(skb)->nr_frags; + frag = &skb_shinfo(skb)->frags[frg_cnt - 1]; + + /* copy the user data to page */ + left = PAGE_SIZE - frag->page_offset; + copy = (length > left)? left : length; + + ret = getfrag(from, (page_address(frag->page) + + frag->page_offset + frag->size), + offset, copy, 0, skb); + if (ret < 0) + return -EFAULT; + + /* copy was successful so update the size parameters */ + sk->sk_sndmsg_off += copy; + frag->size += copy; + skb->len += copy; + skb->data_len += copy; + offset += copy; + length -= copy; + + } while (length > 0); + + return 0; +} + void __init skb_init(void) { skbuff_head_cache = kmem_cache_create("skbuff_head_cache", @@ -1372,12 +1840,21 @@ void __init skb_init(void) NULL, NULL); if (!skbuff_head_cache) panic("cannot create skbuff cache"); + + skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache", + (2*sizeof(struct sk_buff)) + + sizeof(atomic_t), + 0, + SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (!skbuff_fclone_cache) + panic("cannot create skbuff cache"); } EXPORT_SYMBOL(___pskb_trim); EXPORT_SYMBOL(__kfree_skb); EXPORT_SYMBOL(__pskb_pull_tail); -EXPORT_SYMBOL(alloc_skb); +EXPORT_SYMBOL(__alloc_skb); EXPORT_SYMBOL(pskb_copy); EXPORT_SYMBOL(pskb_expand_head); EXPORT_SYMBOL(skb_checksum); @@ -1401,3 +1878,8 @@ EXPORT_SYMBOL(skb_queue_tail); EXPORT_SYMBOL(skb_unlink); EXPORT_SYMBOL(skb_append); EXPORT_SYMBOL(skb_split); +EXPORT_SYMBOL(skb_prepare_seq_read); +EXPORT_SYMBOL(skb_seq_read); +EXPORT_SYMBOL(skb_abort_seq_read); +EXPORT_SYMBOL(skb_find_text); +EXPORT_SYMBOL(skb_append_datato_frags);