X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=include%2Flinux%2Fskbuff.h;h=976285a2818002451d5d7e14fee591ce561a4d0e;hb=refs%2Fheads%2Fvserver;hp=7e631a542577deac5830f72f223ff45825b08931;hpb=76828883507a47dae78837ab5dec5a5b4513c667;p=linux-2.6.git diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 7e631a542..976285a28 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -14,7 +14,6 @@ #ifndef _LINUX_SKBUFF_H #define _LINUX_SKBUFF_H -#include #include #include #include @@ -23,19 +22,19 @@ #include #include #include -#include -#include -#include #include #include #include +#include +#include #define HAVE_ALLOC_SKB /* For the drivers to know */ #define HAVE_ALIGNABLE_SKB /* Ditto 8) */ #define CHECKSUM_NONE 0 -#define CHECKSUM_HW 1 +#define CHECKSUM_PARTIAL 1 #define CHECKSUM_UNNECESSARY 2 +#define CHECKSUM_COMPLETE 3 #define SKB_DATA_ALIGN(X) (((X) + (SMP_CACHE_BYTES - 1)) & \ ~(SMP_CACHE_BYTES - 1)) @@ -56,17 +55,17 @@ * Apparently with secret goal to sell you new device, when you * will add new protocol to your host. F.e. IPv6. 8) * - * HW: the most generic way. Device supplied checksum of _all_ + * COMPLETE: the most generic way. Device supplied checksum of _all_ * the packet as seen by netif_rx in skb->csum. * NOTE: Even if device supports only some protocols, but - * is able to produce some skb->csum, it MUST use HW, + * is able to produce some skb->csum, it MUST use COMPLETE, * not UNNECESSARY. * * B. Checksumming on output. * * NONE: skb is checksummed by protocol or csum is not required. * - * HW: device is required to csum packet as seen by hard_start_xmit + * PARTIAL: device is required to csum packet as seen by hard_start_xmit * from skb->h.raw to the end and to record the checksum * at skb->h.raw+skb->csum. * @@ -134,10 +133,11 @@ struct skb_frag_struct { struct skb_shared_info { atomic_t dataref; unsigned short nr_frags; - unsigned short tso_size; - unsigned short tso_segs; - unsigned short ufo_size; - unsigned int ip6_frag_id; + unsigned short gso_size; + /* Warning: this field is not always filled in (UFO)! */ + unsigned short gso_segs; + unsigned short gso_type; + __be32 ip6_frag_id; struct sk_buff *frag_list; skb_frag_t frags[MAX_SKB_FRAGS]; }; @@ -168,6 +168,19 @@ enum { SKB_FCLONE_CLONE, }; +enum { + SKB_GSO_TCPV4 = 1 << 0, + SKB_GSO_UDP = 1 << 1, + + /* This indicates the skb is from an untrusted source. */ + SKB_GSO_DODGY = 1 << 2, + + /* This indicates the tcp segment has CWR set. */ + SKB_GSO_TCP_ECN = 1 << 3, + + SKB_GSO_TCPV6 = 1 << 4, +}; + /** * struct sk_buff - socket buffer * @next: Next buffer in list @@ -175,7 +188,7 @@ enum { * @sk: Socket we are owned by * @tstamp: Time we arrived * @dev: Device we arrived on/are leaving by - * @input_dev: Device we arrived on + * @iif: ifindex of device we arrived on * @h: Transport layer header * @nh: Network layer header * @mac: Link layer header @@ -189,6 +202,8 @@ enum { * @local_df: allow local fragmentation * @cloned: Head may be cloned (check refcnt to be sure) * @nohdr: Payload reference only, must not modify header + * @proto_data_valid: Protocol data validated since arriving at localhost + * @proto_csum_blank: Protocol csum must be added before leaving localhost * @pkt_type: Packet class * @fclone: skbuff clone status * @ip_summed: Driver fed us an IP checksum @@ -201,7 +216,7 @@ enum { * @tail: Tail pointer * @end: End pointer * @destructor: Destruct function - * @nfmark: Can be used for communication between hooks + * @mark: Generic packet mark * @nfct: Associated connection, if any * @ipvs_property: skbuff is owned by ipvs * @nfctinfo: Relationship of this skb to the connection @@ -209,6 +224,9 @@ enum { * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c * @tc_index: Traffic control index * @tc_verd: traffic control verdict + * @dma_cookie: a cookie to one of several possible DMA operations + * done by skb DMA functions + * @secmark: security marking */ struct sk_buff { @@ -219,7 +237,8 @@ struct sk_buff { struct sock *sk; struct skb_timeval tstamp; struct net_device *dev; - struct net_device *input_dev; + int iif; + /* 4 byte hole on 64 bit*/ union { struct tcphdr *th; @@ -255,8 +274,11 @@ struct sk_buff { unsigned int len, data_len, - mac_len, - csum; + mac_len; + union { + __wsum csum; + __u32 csum_offset; + }; __u32 priority; __u8 local_df:1, cloned:1, @@ -265,12 +287,17 @@ struct sk_buff { nfctinfo:3; __u8 pkt_type:3, fclone:2, +#ifndef CONFIG_XEN ipvs_property:1; +#else + ipvs_property:1, + proto_data_valid:1, + proto_csum_blank:1; +#endif __be16 protocol; void (*destructor)(struct sk_buff *skb); #ifdef CONFIG_NETFILTER - __u32 nfmark; struct nf_conntrack *nfct; #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) struct sk_buff *nfct_reasm; @@ -284,8 +311,15 @@ struct sk_buff { #ifdef CONFIG_NET_CLS_ACT __u16 tc_verd; /* traffic control verdict */ #endif +#endif +#ifdef CONFIG_NET_DMA + dma_cookie_t dma_cookie; +#endif +#ifdef CONFIG_NETWORK_SECMARK + __u32 secmark; #endif + __u32 mark; /* These elements must be at the end, see alloc_skb() for details. */ unsigned int truesize; @@ -304,24 +338,26 @@ struct sk_buff { #include +extern void kfree_skb(struct sk_buff *skb); extern void __kfree_skb(struct sk_buff *skb); extern struct sk_buff *__alloc_skb(unsigned int size, - gfp_t priority, int fclone); + gfp_t priority, int fclone, int node); static inline struct sk_buff *alloc_skb(unsigned int size, gfp_t priority) { - return __alloc_skb(size, priority, 0); + return __alloc_skb(size, priority, 0, -1); } static inline struct sk_buff *alloc_skb_fclone(unsigned int size, gfp_t priority) { - return __alloc_skb(size, priority, 1); + return __alloc_skb(size, priority, 1, -1); } -extern struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp, +extern struct sk_buff *alloc_skb_from_cache(struct kmem_cache *cp, unsigned int size, - gfp_t priority); + gfp_t priority, + int fclone); extern void kfree_skbmem(struct sk_buff *skb); extern struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority); @@ -337,12 +373,19 @@ extern struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, extern struct sk_buff *skb_copy_expand(const struct sk_buff *skb, int newheadroom, int newtailroom, gfp_t priority); -extern struct sk_buff * skb_pad(struct sk_buff *skb, int pad); +extern int skb_pad(struct sk_buff *skb, int pad); #define dev_kfree_skb(a) kfree_skb(a) extern void skb_over_panic(struct sk_buff *skb, int len, void *here); extern void skb_under_panic(struct sk_buff *skb, int len, void *here); +extern void skb_truesize_bug(struct sk_buff *skb); + +static inline void skb_truesize_check(struct sk_buff *skb) +{ + if (unlikely((int)skb->truesize < sizeof(struct sk_buff) + skb->len)) + skb_truesize_bug(skb); +} extern int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, int getfrag(void *from, char *to, int offset, @@ -403,22 +446,6 @@ static inline struct sk_buff *skb_get(struct sk_buff *skb) * atomic change. */ -/** - * kfree_skb - free an sk_buff - * @skb: buffer to free - * - * Drop a reference to the buffer and free it if the usage count has - * hit zero. - */ -static inline void kfree_skb(struct sk_buff *skb) -{ - if (likely(atomic_read(&skb->users) == 1)) - smp_rmb(); - else if (likely(!atomic_dec_and_test(&skb->users))) - return; - __kfree_skb(skb); -} - /** * skb_cloned - is the buffer a clone * @skb: buffer to check @@ -589,6 +616,14 @@ static inline __u32 skb_queue_len(const struct sk_buff_head *list_) return list_->qlen; } +/* + * This function creates a split out lock class for each invocation; + * this is needed for now since a whole lot of users of the skb-queue + * infrastructure in drivers have different locking usage (in hardirq) + * than the networking core (in softirq only). In the long run either the + * network layer or drivers should need annotation to consolidate the + * main types of usage into 3 classes. + */ static inline void skb_queue_head_init(struct sk_buff_head *list) { spin_lock_init(&list->lock); @@ -956,6 +991,25 @@ static inline void skb_reserve(struct sk_buff *skb, int len) #define NET_IP_ALIGN 2 #endif +/* + * The networking layer reserves some headroom in skb data (via + * dev_alloc_skb). This is used to avoid having to reallocate skb data when + * the header has to grow. In the default case, if the header has to grow + * 16 bytes or less we avoid the reallocation. + * + * Unfortunately this headroom changes the DMA alignment of the resulting + * network packet. As for NET_IP_ALIGN, this unaligned DMA is expensive + * on some architectures. An architecture can override this value, + * perhaps setting it to a cacheline in size (since that will maintain + * cacheline alignment of the DMA). It must be a power of 2. + * + * Various parts of the networking layer expect at least 16 bytes of + * headroom, you should not reduce this. + */ +#ifndef NET_SKB_PAD +#define NET_SKB_PAD 16 +#endif + extern int ___pskb_trim(struct sk_buff *skb, unsigned int len); static inline void __skb_trim(struct sk_buff *skb, unsigned int len) @@ -997,6 +1051,21 @@ static inline int pskb_trim(struct sk_buff *skb, unsigned int len) return (len < skb->len) ? __pskb_trim(skb, len) : 0; } +/** + * pskb_trim_unique - remove end from a paged unique (not cloned) buffer + * @skb: buffer to alter + * @len: new length + * + * This is identical to pskb_trim except that the caller knows that + * the skb is not cloned so we should never get an error due to out- + * of-memory. + */ +static inline void pskb_trim_unique(struct sk_buff *skb, unsigned int len) +{ + int err = pskb_trim(skb, len); + BUG_ON(err); +} + /** * skb_orphan - orphan a buffer * @skb: buffer to orphan @@ -1031,7 +1100,7 @@ static inline void __skb_queue_purge(struct sk_buff_head *list) #ifndef CONFIG_HAVE_ARCH_DEV_ALLOC_SKB /** - * __dev_alloc_skb - allocate an skbuff for sending + * __dev_alloc_skb - allocate an skbuff for receiving * @length: length to allocate * @gfp_mask: get_free_pages mask, passed to alloc_skb * @@ -1040,22 +1109,22 @@ static inline void __skb_queue_purge(struct sk_buff_head *list) * the headroom they think they need without accounting for the * built in space. The built in space is used for optimisations. * - * %NULL is returned in there is no free memory. + * %NULL is returned if there is no free memory. */ static inline struct sk_buff *__dev_alloc_skb(unsigned int length, gfp_t gfp_mask) { - struct sk_buff *skb = alloc_skb(length + 16, gfp_mask); + struct sk_buff *skb = alloc_skb(length + NET_SKB_PAD, gfp_mask); if (likely(skb)) - skb_reserve(skb, 16); + skb_reserve(skb, NET_SKB_PAD); return skb; } #else -extern struct sk_buff *__dev_alloc_skb(unsigned int length, int gfp_mask); +extern struct sk_buff *__dev_alloc_skb(unsigned int length, gfp_t gfp_mask); #endif /** - * dev_alloc_skb - allocate an skbuff for sending + * dev_alloc_skb - allocate an skbuff for receiving * @length: length to allocate * * Allocate a new &sk_buff and assign it a usage count of one. The @@ -1063,7 +1132,7 @@ extern struct sk_buff *__dev_alloc_skb(unsigned int length, int gfp_mask); * the headroom they think they need without accounting for the * built in space. The built in space is used for optimisations. * - * %NULL is returned in there is no free memory. Although this function + * %NULL is returned if there is no free memory. Although this function * allocates memory it can be called from an interrupt. */ static inline struct sk_buff *dev_alloc_skb(unsigned int length) @@ -1071,6 +1140,28 @@ static inline struct sk_buff *dev_alloc_skb(unsigned int length) return __dev_alloc_skb(length, GFP_ATOMIC); } +extern struct sk_buff *__netdev_alloc_skb(struct net_device *dev, + unsigned int length, gfp_t gfp_mask); + +/** + * netdev_alloc_skb - allocate an skbuff for rx on a specific device + * @dev: network device to receive on + * @length: length to allocate + * + * Allocate a new &sk_buff and assign it a usage count of one. The + * buffer has unspecified headroom built in. Users should allocate + * the headroom they think they need without accounting for the + * built in space. The built in space is used for optimisations. + * + * %NULL is returned if there is no free memory. Although this function + * allocates memory it can be called from an interrupt. + */ +static inline struct sk_buff *netdev_alloc_skb(struct net_device *dev, + unsigned int length) +{ + return __netdev_alloc_skb(dev, length, GFP_ATOMIC); +} + /** * skb_cow - copy header of skb when it is required * @skb: buffer to cow @@ -1085,13 +1176,15 @@ static inline struct sk_buff *dev_alloc_skb(unsigned int length) */ static inline int skb_cow(struct sk_buff *skb, unsigned int headroom) { - int delta = (headroom > 16 ? headroom : 16) - skb_headroom(skb); + int delta = (headroom > NET_SKB_PAD ? headroom : NET_SKB_PAD) - + skb_headroom(skb); if (delta < 0) delta = 0; if (delta || skb_cloned(skb)) - return pskb_expand_head(skb, (delta + 15) & ~15, 0, GFP_ATOMIC); + return pskb_expand_head(skb, (delta + (NET_SKB_PAD-1)) & + ~(NET_SKB_PAD-1), 0, GFP_ATOMIC); return 0; } @@ -1102,16 +1195,15 @@ static inline int skb_cow(struct sk_buff *skb, unsigned int headroom) * * Pads up a buffer to ensure the trailing bytes exist and are * blanked. If the buffer already contains sufficient data it - * is untouched. Returns the buffer, which may be a replacement - * for the original, or NULL for out of memory - in which case - * the original buffer is still freed. + * is untouched. Otherwise it is extended. Returns zero on + * success. The skb is freed on error. */ -static inline struct sk_buff *skb_padto(struct sk_buff *skb, unsigned int len) +static inline int skb_padto(struct sk_buff *skb, unsigned int len) { unsigned int size = skb->len; if (likely(size >= len)) - return skb; + return 0; return skb_pad(skb, len-size); } @@ -1122,8 +1214,7 @@ static inline int skb_add_data(struct sk_buff *skb, if (skb->ip_summed == CHECKSUM_NONE) { int err = 0; - unsigned int csum = csum_and_copy_from_user(from, - skb_put(skb, copy), + __wsum csum = csum_and_copy_from_user(from, skb_put(skb, copy), copy, 0, &err); if (!err) { skb->csum = csum_block_add(skb->csum, csum, off); @@ -1148,18 +1239,34 @@ static inline int skb_can_coalesce(struct sk_buff *skb, int i, return 0; } +static inline int __skb_linearize(struct sk_buff *skb) +{ + return __pskb_pull_tail(skb, skb->data_len) ? 0 : -ENOMEM; +} + /** * skb_linearize - convert paged skb to linear one * @skb: buffer to linarize - * @gfp: allocation mode * * If there is no free memory -ENOMEM is returned, otherwise zero * is returned and the old skb data released. */ -extern int __skb_linearize(struct sk_buff *skb, gfp_t gfp); -static inline int skb_linearize(struct sk_buff *skb, gfp_t gfp) +static inline int skb_linearize(struct sk_buff *skb) +{ + return skb_is_nonlinear(skb) ? __skb_linearize(skb) : 0; +} + +/** + * skb_linearize_cow - make sure skb is linear and writable + * @skb: buffer to process + * + * If there is no free memory -ENOMEM is returned, otherwise zero + * is returned and the old skb data released. + */ +static inline int skb_linearize_cow(struct sk_buff *skb) { - return __skb_linearize(skb, gfp); + return skb_is_nonlinear(skb) || skb_cloned(skb) ? + __skb_linearize(skb) : 0; } /** @@ -1169,17 +1276,19 @@ static inline int skb_linearize(struct sk_buff *skb, gfp_t gfp) * @len: length of data pulled * * After doing a pull on a received packet, you need to call this to - * update the CHECKSUM_HW checksum, or set ip_summed to CHECKSUM_NONE - * so that it can be recomputed from scratch. + * update the CHECKSUM_COMPLETE checksum, or set ip_summed to + * CHECKSUM_NONE so that it can be recomputed from scratch. */ static inline void skb_postpull_rcsum(struct sk_buff *skb, - const void *start, int len) + const void *start, unsigned int len) { - if (skb->ip_summed == CHECKSUM_HW) + if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_sub(skb->csum, csum_partial(start, len, 0)); } +unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len); + /** * pskb_trim_rcsum - trim received skb and update checksum * @skb: buffer to trim @@ -1193,29 +1302,11 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) { if (likely(len >= skb->len)) return 0; - if (skb->ip_summed == CHECKSUM_HW) + if (skb->ip_summed == CHECKSUM_COMPLETE) skb->ip_summed = CHECKSUM_NONE; return __pskb_trim(skb, len); } -static inline void *kmap_skb_frag(const skb_frag_t *frag) -{ -#ifdef CONFIG_HIGHMEM - BUG_ON(in_irq()); - - local_bh_disable(); -#endif - return kmap_atomic(frag->page, KM_SKB_DATA_SOFTIRQ); -} - -static inline void kunmap_skb_frag(void *vaddr) -{ - kunmap_atomic(vaddr, KM_SKB_DATA_SOFTIRQ); -#ifdef CONFIG_HIGHMEM - local_bh_enable(); -#endif -} - #define skb_queue_walk(queue, skb) \ for (skb = (queue)->next; \ prefetch(skb->next), (skb != (struct sk_buff *)(queue)); \ @@ -1240,20 +1331,20 @@ extern int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, extern void skb_free_datagram(struct sock *sk, struct sk_buff *skb); extern void skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags); -extern unsigned int skb_checksum(const struct sk_buff *skb, int offset, - int len, unsigned int csum); +extern __wsum skb_checksum(const struct sk_buff *skb, int offset, + int len, __wsum csum); extern int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len); extern int skb_store_bits(const struct sk_buff *skb, int offset, void *from, int len); -extern unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, +extern __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, - unsigned int csum); + __wsum csum); extern void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); extern void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len); -extern void skb_release_data(struct sk_buff *skb); +extern struct sk_buff *skb_segment(struct sk_buff *skb, int features); static inline void *skb_header_pointer(const struct sk_buff *skb, int offset, int len, void *buffer) @@ -1304,7 +1395,7 @@ static inline void skb_set_timestamp(struct sk_buff *skb, const struct timeval * extern void __net_timestamp(struct sk_buff *skb); -extern unsigned int __skb_checksum_complete(struct sk_buff *skb); +extern __sum16 __skb_checksum_complete(struct sk_buff *skb); /** * skb_checksum_complete - Calculate checksum of an entire packet @@ -1351,16 +1442,6 @@ static inline void nf_conntrack_put_reasm(struct sk_buff *skb) kfree_skb(skb); } #endif -static inline void nf_reset(struct sk_buff *skb) -{ - nf_conntrack_put(skb->nfct); - skb->nfct = NULL; -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) - nf_conntrack_put_reasm(skb->nfct_reasm); - skb->nfct_reasm = NULL; -#endif -} - #ifdef CONFIG_BRIDGE_NETFILTER static inline void nf_bridge_put(struct nf_bridge_info *nf_bridge) { @@ -1373,9 +1454,46 @@ static inline void nf_bridge_get(struct nf_bridge_info *nf_bridge) atomic_inc(&nf_bridge->use); } #endif /* CONFIG_BRIDGE_NETFILTER */ +static inline void nf_reset(struct sk_buff *skb) +{ + nf_conntrack_put(skb->nfct); + skb->nfct = NULL; +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) + nf_conntrack_put_reasm(skb->nfct_reasm); + skb->nfct_reasm = NULL; +#endif +#ifdef CONFIG_BRIDGE_NETFILTER + nf_bridge_put(skb->nf_bridge); + skb->nf_bridge = NULL; +#endif +} + #else /* CONFIG_NETFILTER */ static inline void nf_reset(struct sk_buff *skb) {} #endif /* CONFIG_NETFILTER */ +#ifdef CONFIG_NETWORK_SECMARK +static inline void skb_copy_secmark(struct sk_buff *to, const struct sk_buff *from) +{ + to->secmark = from->secmark; +} + +static inline void skb_init_secmark(struct sk_buff *skb) +{ + skb->secmark = 0; +} +#else +static inline void skb_copy_secmark(struct sk_buff *to, const struct sk_buff *from) +{ } + +static inline void skb_init_secmark(struct sk_buff *skb) +{ } +#endif + +static inline int skb_is_gso(const struct sk_buff *skb) +{ + return skb_shinfo(skb)->gso_size; +} + #endif /* __KERNEL__ */ #endif /* _LINUX_SKBUFF_H */