*/
#include <linux/capability.h>
-#include <linux/config.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/poll.h>
#include <linux/tcp.h>
#include <linux/init.h>
+#include <linux/highmem.h>
#include <asm/uaccess.h>
#include <asm/system.h>
#include <net/tcp.h>
#endif
+/*
+ * Each address family might have different locking rules, so we have
+ * one slock key per address family:
+ */
+static struct lock_class_key af_family_keys[AF_MAX];
+static struct lock_class_key af_family_slock_keys[AF_MAX];
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+/*
+ * Make lock validator output more readable. (we pre-construct these
+ * strings build-time, so that runtime initialization of socket
+ * locks is fast):
+ */
+static const char *af_family_key_strings[AF_MAX+1] = {
+ "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX" , "sk_lock-AF_INET" ,
+ "sk_lock-AF_AX25" , "sk_lock-AF_IPX" , "sk_lock-AF_APPLETALK",
+ "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE" , "sk_lock-AF_ATMPVC" ,
+ "sk_lock-AF_X25" , "sk_lock-AF_INET6" , "sk_lock-AF_ROSE" ,
+ "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI" , "sk_lock-AF_SECURITY" ,
+ "sk_lock-AF_KEY" , "sk_lock-AF_NETLINK" , "sk_lock-AF_PACKET" ,
+ "sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" ,
+ "sk_lock-21" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" ,
+ "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" ,
+ "sk_lock-27" , "sk_lock-28" , "sk_lock-29" ,
+ "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-AF_MAX"
+};
+static const char *af_family_slock_key_strings[AF_MAX+1] = {
+ "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" ,
+ "slock-AF_AX25" , "slock-AF_IPX" , "slock-AF_APPLETALK",
+ "slock-AF_NETROM", "slock-AF_BRIDGE" , "slock-AF_ATMPVC" ,
+ "slock-AF_X25" , "slock-AF_INET6" , "slock-AF_ROSE" ,
+ "slock-AF_DECnet", "slock-AF_NETBEUI" , "slock-AF_SECURITY" ,
+ "slock-AF_KEY" , "slock-AF_NETLINK" , "slock-AF_PACKET" ,
+ "slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" ,
+ "slock-21" , "slock-AF_SNA" , "slock-AF_IRDA" ,
+ "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" ,
+ "slock-27" , "slock-28" , "slock-29" ,
+ "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_MAX"
+};
+#endif
+
+/*
+ * sk_callback_lock locking rules are per-address-family,
+ * so split the lock classes by using a per-AF key:
+ */
+static struct lock_class_key af_callback_keys[AF_MAX];
+
/* Take into consideration the size of the struct sk_buff overhead in the
* determination of these values, since that is non-constant across
* platforms. This makes socket queueing behavior and performance
#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
/* Run time adjustable parameters. */
-__u32 sysctl_wmem_max = SK_WMEM_MAX;
-__u32 sysctl_rmem_max = SK_RMEM_MAX;
-__u32 sysctl_wmem_default = SK_WMEM_MAX;
-__u32 sysctl_rmem_default = SK_RMEM_MAX;
+__u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
+__u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
+__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
+__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
/* Maximal space eaten by iovec or ancilliary data plus some space */
-int sysctl_optmem_max = sizeof(unsigned long)*(2*UIO_MAXIOV + 512);
+int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
{
goto out;
}
- /* It would be deadlock, if sock_queue_rcv_skb is used
- with socket lock! We assume that users of this
- function are lock free.
- */
- err = sk_filter(sk, skb, 1);
+ err = sk_filter(sk, skb);
if (err)
goto out;
}
EXPORT_SYMBOL(sock_queue_rcv_skb);
-int sk_receive_skb(struct sock *sk, struct sk_buff *skb)
+int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
{
int rc = NET_RX_SUCCESS;
- if (sk_filter(sk, skb, 0))
+ if (sk_filter(sk, skb))
goto discard_and_relse;
skb->dev = NULL;
- bh_lock_sock(sk);
- if (!sock_owned_by_user(sk))
- rc = sk->sk_backlog_rcv(sk, skb);
+ if (nested)
+ bh_lock_sock_nested(sk);
else
+ bh_lock_sock(sk);
+ if (!sock_owned_by_user(sk)) {
+ /*
+ * trylock + unlock semantics:
+ */
+ mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
+
+ rc = sk->sk_backlog_rcv(sk, skb);
+
+ mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
+ } else
sk_add_backlog(sk, skb);
bh_unlock_sock(sk);
out:
break;
case SO_DETACH_FILTER:
- spin_lock_bh(&sk->sk_lock.slock);
- filter = sk->sk_filter;
+ rcu_read_lock_bh();
+ filter = rcu_dereference(sk->sk_filter);
if (filter) {
- sk->sk_filter = NULL;
- spin_unlock_bh(&sk->sk_lock.slock);
+ rcu_assign_pointer(sk->sk_filter, NULL);
sk_filter_release(sk, filter);
+ rcu_read_unlock_bh();
break;
}
- spin_unlock_bh(&sk->sk_lock.slock);
+ rcu_read_unlock_bh();
ret = -ENONET;
break;
+ case SO_PASSSEC:
+ if (valbool)
+ set_bit(SOCK_PASSSEC, &sock->flags);
+ else
+ clear_bit(SOCK_PASSSEC, &sock->flags);
+ break;
+
/* We implement the SO_SNDLOWAT etc to
not be settable (1003.1g 5.3) */
default:
v.val = sk->sk_state == TCP_LISTEN;
break;
+ case SO_PASSSEC:
+ v.val = test_bit(SOCK_PASSSEC, &sock->flags) ? 1 : 0;
+ break;
+
case SO_PEERSEC:
return security_socket_getpeersec_stream(sock, optval, optlen, len);
return 0;
}
+/*
+ * Initialize an sk_lock.
+ *
+ * (We also register the sk_lock with the lock validator.)
+ */
+static void inline sock_lock_init(struct sock *sk)
+{
+ sock_lock_init_class_and_name(sk,
+ af_family_slock_key_strings[sk->sk_family],
+ af_family_slock_keys + sk->sk_family,
+ af_family_key_strings[sk->sk_family],
+ af_family_keys + sk->sk_family);
+}
+
/**
* sk_alloc - All socket objects are allocated here
* @family: protocol family
struct proto *prot, int zero_it)
{
struct sock *sk = NULL;
- kmem_cache_t *slab = prot->slab;
+ struct kmem_cache *slab = prot->slab;
if (slab != NULL)
sk = kmem_cache_alloc(slab, priority);
if (sk->sk_destruct)
sk->sk_destruct(sk);
- filter = sk->sk_filter;
+ filter = rcu_dereference(sk->sk_filter);
if (filter) {
sk_filter_release(sk, filter);
- sk->sk_filter = NULL;
+ rcu_assign_pointer(sk->sk_filter, NULL);
}
sock_disable_timestamp(sk);
module_put(owner);
}
-struct sock *sk_clone(struct sock *sk, const gfp_t priority)
+struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
{
struct sock *newsk = sk_alloc(sk->sk_family, priority, sk->sk_prot, 0);
if (newsk != NULL) {
struct sk_filter *filter;
- memcpy(newsk, sk, sk->sk_prot->obj_size);
+ sock_copy(newsk, sk);
/* SANITY */
sock_vx_init(newsk);
atomic_set(&newsk->sk_omem_alloc, 0);
skb_queue_head_init(&newsk->sk_receive_queue);
skb_queue_head_init(&newsk->sk_write_queue);
+#ifdef CONFIG_NET_DMA
+ skb_queue_head_init(&newsk->sk_async_wait_queue);
+#endif
rwlock_init(&newsk->sk_dst_lock);
rwlock_init(&newsk->sk_callback_lock);
+ lockdep_set_class(&newsk->sk_callback_lock,
+ af_callback_keys + newsk->sk_family);
newsk->sk_dst_cache = NULL;
newsk->sk_wmem_queued = 0;
if (filter != NULL)
sk_filter_charge(newsk, filter);
- if (sk->sk_create_child)
- sk->sk_create_child(sk, newsk);
-
if (unlikely(xfrm_sk_clone_policy(newsk))) {
/* It is still raw copy of parent, so invalidate
* destructor and make plain sk_free() */
goto failure;
if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
- skb = alloc_skb(header_len, sk->sk_allocation);
+ skb = alloc_skb(header_len, gfp_mask);
if (skb) {
int npages;
int i;
skb_queue_head_init(&sk->sk_receive_queue);
skb_queue_head_init(&sk->sk_write_queue);
skb_queue_head_init(&sk->sk_error_queue);
+#ifdef CONFIG_NET_DMA
+ skb_queue_head_init(&sk->sk_async_wait_queue);
+#endif
sk->sk_send_head = NULL;
rwlock_init(&sk->sk_dst_lock);
rwlock_init(&sk->sk_callback_lock);
+ lockdep_set_class(&sk->sk_callback_lock,
+ af_callback_keys + sk->sk_family);
sk->sk_state_change = sock_def_wakeup;
sk->sk_data_ready = sock_def_readable;
atomic_set(&sk->sk_refcnt, 1);
}
-void fastcall lock_sock(struct sock *sk)
+void fastcall lock_sock_nested(struct sock *sk, int subclass)
{
might_sleep();
- spin_lock_bh(&(sk->sk_lock.slock));
+ spin_lock_bh(&sk->sk_lock.slock);
if (sk->sk_lock.owner)
__lock_sock(sk);
sk->sk_lock.owner = (void *)1;
- spin_unlock_bh(&(sk->sk_lock.slock));
+ spin_unlock(&sk->sk_lock.slock);
+ /*
+ * The sk_lock has mutex_lock() semantics here:
+ */
+ mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
+ local_bh_enable();
}
-EXPORT_SYMBOL(lock_sock);
+EXPORT_SYMBOL(lock_sock_nested);
void fastcall release_sock(struct sock *sk)
{
- spin_lock_bh(&(sk->sk_lock.slock));
+ /*
+ * The sk_lock has mutex_unlock() semantics:
+ */
+ mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
+
+ spin_lock_bh(&sk->sk_lock.slock);
if (sk->sk_backlog.tail)
__release_sock(sk);
sk->sk_lock.owner = NULL;
- if (waitqueue_active(&(sk->sk_lock.wq)))
- wake_up(&(sk->sk_lock.wq));
- spin_unlock_bh(&(sk->sk_lock.slock));
+ if (waitqueue_active(&sk->sk_lock.wq))
+ wake_up(&sk->sk_lock.wq);
+ spin_unlock_bh(&sk->sk_lock.slock);
}
EXPORT_SYMBOL(release_sock);
{
struct sock *sk = sock->sk;
- if (sk->sk_prot->compat_setsockopt != NULL)
+ if (sk->sk_prot->compat_getsockopt != NULL)
return sk->sk_prot->compat_getsockopt(sk, level, optname,
optval, optlen);
return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);