X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=include%2Fnet%2Fsock.h;h=ecf63abce24c0098248cfeacb4e051e8f1998982;hb=refs%2Fheads%2Fvserver;hp=924d3481bdeb2183e5f828364ad5264879ab9027;hpb=4e76c8a9fa413ccc09d3f7f664183dcce3555d57;p=linux-2.6.git diff --git a/include/net/sock.h b/include/net/sock.h index 924d3481b..ecf63abce 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -40,13 +40,14 @@ #ifndef _SOCK_H #define _SOCK_H -#include #include #include #include #include +#include #include #include /* struct sk_buff */ +#include #include #include @@ -62,7 +63,7 @@ */ /* Define this to get the SOCK_DBG debugging facility. */ -//#define SOCK_DEBUGGING +#define SOCK_DEBUGGING #ifdef SOCK_DEBUGGING #define SOCK_DEBUG(sk, msg...) do { if ((sk) && sock_flag((sk), SOCK_DBG)) \ printk(KERN_DEBUG msg); } while (0) @@ -79,14 +80,17 @@ typedef struct { spinlock_t slock; struct sock_iocb *owner; wait_queue_head_t wq; + /* + * We express the mutex-alike socket_lock semantics + * to the lock validator by explicitly managing + * the slock as a lock variant (in addition to + * the slock itself): + */ +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif } socket_lock_t; -#define sock_lock_init(__sk) \ -do { spin_lock_init(&((__sk)->sk_lock.slock)); \ - (__sk)->sk_lock.owner = NULL; \ - init_waitqueue_head(&((__sk)->sk_lock.wq)); \ -} while(0) - struct sock; struct proto; @@ -136,6 +140,7 @@ struct sock_common { * @sk_receive_queue: incoming packets * @sk_wmem_alloc: transmit queue bytes committed * @sk_write_queue: Packet sending queue + * @sk_async_wait_queue: DMA copied packets * @sk_omem_alloc: "o" is "option" or "other" * @sk_wmem_queued: persistent queue size * @sk_forward_alloc: space allocated forward @@ -144,6 +149,7 @@ struct sock_common { * @sk_flags: %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE, %SO_OOBINLINE settings * @sk_no_check: %SO_NO_CHECK setting, wether or not checkup packets * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) + * @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4) * @sk_lingertime: %SO_LINGER l_linger setting * @sk_backlog: always used with the per-socket spinlock held * @sk_callback_lock: used with the callbacks in the end of this struct @@ -165,7 +171,7 @@ struct sock_common { * @sk_timer: sock cleanup timer * @sk_stamp: time stamp of last packet received * @sk_socket: Identd and reporting IO signals - * @sk_user_data: RPC and Tux layer private data + * @sk_user_data: RPC layer private data * @sk_sndmsg_page: cached page for sendmsg * @sk_sndmsg_off: cached offset for sendmsg * @sk_send_head: front of stuff to transmit @@ -175,7 +181,6 @@ struct sock_common { * @sk_data_ready: callback to indicate there is data to be processed * @sk_write_space: callback to indicate there is bf sending space available * @sk_error_report: callback to indicate errors (e.g. %MSG_ERRQUEUE) - * @sk_create_child - callback to get new socket events * @sk_backlog_rcv: callback to process the backlog * @sk_destruct: called at sock freeing time, i.e. when all refcnt == 0 */ @@ -214,11 +219,13 @@ struct sock { atomic_t sk_omem_alloc; struct sk_buff_head sk_receive_queue; struct sk_buff_head sk_write_queue; + struct sk_buff_head sk_async_wait_queue; int sk_wmem_queued; int sk_forward_alloc; gfp_t sk_allocation; int sk_sndbuf; int sk_route_caps; + int sk_gso_type; int sk_rcvlowat; unsigned long sk_flags; unsigned long sk_lingertime; @@ -259,7 +266,6 @@ struct sock { void (*sk_error_report)(struct sock *sk); int (*sk_backlog_rcv)(struct sock *sk, struct sk_buff *skb); - void (*sk_create_child)(struct sock *sk, struct sock *newsk); void (*sk_destruct)(struct sock *sk); }; @@ -392,7 +398,6 @@ enum sock_flags { SOCK_USE_WRITE_QUEUE, /* whether to call sk->sk_write_space in sock_wfree */ SOCK_DBG, /* %SO_DEBUG setting */ SOCK_RCVTSTAMP, /* %SO_TIMESTAMP setting */ - SOCK_NO_LARGESEND, /* whether to sent large segments or not */ SOCK_LOCALROUTE, /* route locally only, %SO_DONTROUTE setting */ SOCK_QUEUE_SHRUNK, /* write queue has been shrunk recently */ }; @@ -574,7 +579,7 @@ struct proto { int *sysctl_rmem; int max_header; - kmem_cache_t *slab; + struct kmem_cache *slab; unsigned int obj_size; atomic_t *orphan_count; @@ -669,7 +674,6 @@ struct sock_iocb { struct sock *sk; struct scm_cookie *scm; struct msghdr *msg, async_msg; - struct iovec async_iov; struct kiocb *kiocb; }; @@ -750,18 +754,46 @@ static inline int sk_stream_wmem_schedule(struct sock *sk, int size) */ #define sock_owned_by_user(sk) ((sk)->sk_lock.owner) -extern void FASTCALL(lock_sock(struct sock *sk)); +/* + * Macro so as to not evaluate some arguments when + * lockdep is not enabled. + * + * Mark both the sk_lock and the sk_lock.slock as a + * per-address-family lock class. + */ +#define sock_lock_init_class_and_name(sk, sname, skey, name, key) \ +do { \ + sk->sk_lock.owner = NULL; \ + init_waitqueue_head(&sk->sk_lock.wq); \ + spin_lock_init(&(sk)->sk_lock.slock); \ + debug_check_no_locks_freed((void *)&(sk)->sk_lock, \ + sizeof((sk)->sk_lock)); \ + lockdep_set_class_and_name(&(sk)->sk_lock.slock, \ + (skey), (sname)); \ + lockdep_init_map(&(sk)->sk_lock.dep_map, (name), (key), 0); \ +} while (0) + +extern void FASTCALL(lock_sock_nested(struct sock *sk, int subclass)); + +static inline void lock_sock(struct sock *sk) +{ + lock_sock_nested(sk, 0); +} + extern void FASTCALL(release_sock(struct sock *sk)); /* BH context may only use the following locking interface. */ #define bh_lock_sock(__sk) spin_lock(&((__sk)->sk_lock.slock)) +#define bh_lock_sock_nested(__sk) \ + spin_lock_nested(&((__sk)->sk_lock.slock), \ + SINGLE_DEPTH_NESTING) #define bh_unlock_sock(__sk) spin_unlock(&((__sk)->sk_lock.slock)) extern struct sock *sk_alloc(int family, gfp_t priority, struct proto *prot, int zero_it); extern void sk_free(struct sock *sk); -extern struct sock *sk_clone(struct sock *sk, +extern struct sock *sk_clone(const struct sock *sk, const gfp_t priority); extern struct sk_buff *sock_wmalloc(struct sock *sk, @@ -863,36 +895,37 @@ extern void sock_init_data(struct socket *sock, struct sock *sk); * */ -static inline int sk_filter(struct sock *sk, struct sk_buff *skb, int needlock) +static inline int sk_filter(struct sock *sk, struct sk_buff *skb) { int err; + struct sk_filter *filter; err = security_sock_rcv_skb(sk, skb); if (err) return err; - if (sk->sk_filter) { - struct sk_filter *filter; - - if (needlock) - bh_lock_sock(sk); - - filter = sk->sk_filter; - if (filter) { - unsigned int pkt_len = sk_run_filter(skb, filter->insns, - filter->len); - if (!pkt_len) - err = -EPERM; - else - skb_trim(skb, pkt_len); - } - - if (needlock) - bh_unlock_sock(sk); + rcu_read_lock_bh(); + filter = sk->sk_filter; + if (filter) { + unsigned int pkt_len = sk_run_filter(skb, filter->insns, + filter->len); + err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM; } + rcu_read_unlock_bh(); + return err; } +/** + * sk_filter_rcu_free: Free a socket filter + * @rcu: rcu_head that contains the sk_filter to free + */ +static inline void sk_filter_rcu_free(struct rcu_head *rcu) +{ + struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu); + kfree(fp); +} + /** * sk_filter_release: Release a socket filter * @sk: socket @@ -900,7 +933,7 @@ static inline int sk_filter(struct sock *sk, struct sk_buff *skb, int needlock) * * Remove a filter from a socket and release its resources. */ - + static inline void sk_filter_release(struct sock *sk, struct sk_filter *fp) { unsigned int size = sk_filter_len(fp); @@ -908,7 +941,7 @@ static inline void sk_filter_release(struct sock *sk, struct sk_filter *fp) atomic_sub(size, &sk->sk_omem_alloc); if (atomic_dec_and_test(&fp->refcnt)) - kfree(fp); + call_rcu_bh(&fp->rcu, sk_filter_rcu_free); } static inline void sk_filter_charge(struct sock *sk, struct sk_filter *fp) @@ -949,7 +982,8 @@ static inline void sock_put(struct sock *sk) sk_free(sk); } -extern int sk_receive_skb(struct sock *sk, struct sk_buff *skb); +extern int sk_receive_skb(struct sock *sk, struct sk_buff *skb, + const int nested); /* Detach socket from process context. * Announce socket dead, detach it from wait queue and inode. @@ -973,9 +1007,23 @@ static inline void sock_graft(struct sock *sk, struct socket *parent) sk->sk_sleep = &parent->wait; parent->sk = sk; sk->sk_socket = parent; + security_sock_graft(sk, parent); write_unlock_bh(&sk->sk_callback_lock); } +static inline void sock_copy(struct sock *nsk, const struct sock *osk) +{ +#ifdef CONFIG_SECURITY_NETWORK + void *sptr = nsk->sk_security; +#endif + + memcpy(nsk, osk, osk->sk_prot->obj_size); +#ifdef CONFIG_SECURITY_NETWORK + nsk->sk_security = sptr; + security_sk_clone(osk, nsk); +#endif +} + extern int sock_i_uid(struct sock *sk); extern unsigned long sock_i_ino(struct sock *sk); @@ -1038,15 +1086,20 @@ extern struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie); extern struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie); +static inline int sk_can_gso(const struct sock *sk) +{ + return net_gso_ok(sk->sk_route_caps, sk->sk_gso_type); +} + static inline void sk_setup_caps(struct sock *sk, struct dst_entry *dst) { __sk_dst_set(sk, dst); sk->sk_route_caps = dst->dev->features; if (sk->sk_route_caps & NETIF_F_GSO) - sk->sk_route_caps |= NETIF_F_TSO; - if (sk->sk_route_caps & NETIF_F_TSO) { - if (sock_flag(sk, SOCK_NO_LARGESEND) || dst->header_len) - sk->sk_route_caps &= ~NETIF_F_TSO; + sk->sk_route_caps |= NETIF_F_GSO_MASK; + if (sk_can_gso(sk)) { + if (dst->header_len) + sk->sk_route_caps &= ~NETIF_F_GSO_MASK; else sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM; } @@ -1064,7 +1117,7 @@ static inline int skb_copy_to_page(struct sock *sk, char __user *from, { if (skb->ip_summed == CHECKSUM_NONE) { int err = 0; - unsigned int csum = csum_and_copy_from_user(from, + __wsum csum = csum_and_copy_from_user(from, page_address(page) + off, copy, 0, &err); if (err) @@ -1281,15 +1334,27 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) * sk_eat_skb - Release a skb if it is no longer needed * @sk: socket to eat this skb from * @skb: socket buffer to eat + * @copied_early: flag indicating whether DMA operations copied this data early * * This routine must be called with interrupts disabled or with the socket * locked so that the sk_buff queue operation is ok. */ -static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb) +#ifdef CONFIG_NET_DMA +static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb, int copied_early) +{ + __skb_unlink(skb, &sk->sk_receive_queue); + if (!copied_early) + __kfree_skb(skb); + else + __skb_queue_tail(&sk->sk_async_wait_queue, skb); +} +#else +static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb, int copied_early) { __skb_unlink(skb, &sk->sk_receive_queue); __kfree_skb(skb); } +#endif extern void sock_enable_timestamp(struct sock *sk); extern int sock_get_timestamp(struct sock *, struct timeval __user *);