Add changes from the Linux-2.6 tree.
[linux-2.6.git] / net / core / sock.c
index 527af20..f48c116 100644 (file)
 #include <linux/poll.h>
 #include <linux/tcp.h>
 #include <linux/init.h>
+#include <linux/highmem.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -190,13 +191,13 @@ static struct lock_class_key af_callback_keys[AF_MAX];
 #define SK_RMEM_MAX            (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
 
 /* Run time adjustable parameters. */
-__u32 sysctl_wmem_max = SK_WMEM_MAX;
-__u32 sysctl_rmem_max = SK_RMEM_MAX;
-__u32 sysctl_wmem_default = SK_WMEM_MAX;
-__u32 sysctl_rmem_default = SK_RMEM_MAX;
+__u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
+__u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
+__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
+__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
 
 /* Maximal space eaten by iovec or ancilliary data plus some space */
-int sysctl_optmem_max = sizeof(unsigned long)*(2*UIO_MAXIOV + 512);
+int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
 
 static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
 {
@@ -241,18 +242,6 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
        int err = 0;
        int skb_len;
 
-#if defined(CONFIG_VNET) || defined(CONFIG_VNET_MODULE)
-       /* Silently drop if VNET is active (if INET bind() has been
-        * overridden) and the context is not entitled to read the
-        * packet.
-        */
-       if (vnet_active &&
-           (int) sk->sk_xid > 0 && sk->sk_xid != skb->xid) {
-               err = -EPERM;
-               goto out;
-       }
-#endif
-
        /* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
           number of warnings when compiling with -W --ANK
         */
@@ -262,11 +251,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
                goto out;
        }
 
-       /* It would be deadlock, if sock_queue_rcv_skb is used
-          with socket lock! We assume that users of this
-          function are lock free.
-       */
-       err = sk_filter(sk, skb, 1);
+       err = sk_filter(sk, skb);
        if (err)
                goto out;
 
@@ -289,16 +274,19 @@ out:
 }
 EXPORT_SYMBOL(sock_queue_rcv_skb);
 
-int sk_receive_skb(struct sock *sk, struct sk_buff *skb)
+int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
 {
        int rc = NET_RX_SUCCESS;
 
-       if (sk_filter(sk, skb, 0))
+       if (sk_filter(sk, skb))
                goto discard_and_relse;
 
        skb->dev = NULL;
 
-       bh_lock_sock(sk);
+       if (nested)
+               bh_lock_sock_nested(sk);
+       else
+               bh_lock_sock(sk);
        if (!sock_owned_by_user(sk)) {
                /*
                 * trylock + unlock semantics:
@@ -443,6 +431,18 @@ set_sndbuf:
                        }
                        goto set_sndbuf;
 
+               case SO_SETXID:
+                       if (current->xid) {
+                               ret = -EPERM;
+                               break;
+                       }
+                       if (val < 0 || val > MAX_S_CONTEXT) {
+                               ret = -EINVAL;
+                               break;
+                       }
+                       sk->sk_xid = val;
+                       break;
+
                case SO_RCVBUF:
                        /* Don't error on this BSD doesn't and if you think
                           about it this is right. Otherwise apps have to
@@ -537,20 +537,6 @@ set_rcvbuf:
                                clear_bit(SOCK_PASSCRED, &sock->flags);
                        break;
 
-#if defined(CONFIG_VNET) || defined(CONFIG_VNET_MODULE)
-               case SO_SETXID:
-                       if (current->xid) {
-                               ret = -EPERM;
-                               break;
-                       }
-                       if (val < 0 || val > MAX_S_CONTEXT) {
-                               ret = -EINVAL;
-                               break;
-                       }
-                       sk->sk_xid = val;
-                       break;
-#endif
-
                case SO_TIMESTAMP:
                        if (valbool)  {
                                sock_set_flag(sk, SOCK_RCVTSTAMP);
@@ -579,7 +565,7 @@ set_rcvbuf:
                        char devname[IFNAMSIZ]; 
 
                        /* Sorry... */ 
-                       if (!capable(CAP_NET_RAW)) {
+                       if (!nx_capable(CAP_NET_RAW, NXC_RAW_SOCKET)) {
                                ret = -EPERM;
                                break;
                        }
@@ -635,15 +621,15 @@ set_rcvbuf:
                        break;
 
                case SO_DETACH_FILTER:
-                       spin_lock_bh(&sk->sk_lock.slock);
-                       filter = sk->sk_filter;
+                       rcu_read_lock_bh();
+                       filter = rcu_dereference(sk->sk_filter);
                         if (filter) {
-                               sk->sk_filter = NULL;
-                               spin_unlock_bh(&sk->sk_lock.slock);
+                               rcu_assign_pointer(sk->sk_filter, NULL);
                                sk_filter_release(sk, filter);
+                               rcu_read_unlock_bh();
                                break;
                        }
-                       spin_unlock_bh(&sk->sk_lock.slock);
+                       rcu_read_unlock_bh();
                        ret = -ENONET;
                        break;
 
@@ -790,6 +776,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
                                len = sizeof(sk->sk_peercred);
                        if (copy_to_user(optval, &sk->sk_peercred, len))
                                return -EFAULT;
+
                        goto lenout;
 
                case SO_PEERNAME:
@@ -857,7 +844,7 @@ struct sock *sk_alloc(int family, gfp_t priority,
                      struct proto *prot, int zero_it)
 {
        struct sock *sk = NULL;
-       kmem_cache_t *slab = prot->slab;
+       struct kmem_cache *slab = prot->slab;
 
        if (slab != NULL)
                sk = kmem_cache_alloc(slab, priority);
@@ -902,10 +889,10 @@ void sk_free(struct sock *sk)
        if (sk->sk_destruct)
                sk->sk_destruct(sk);
 
-       filter = sk->sk_filter;
+       filter = rcu_dereference(sk->sk_filter);
        if (filter) {
                sk_filter_release(sk, filter);
-               sk->sk_filter = NULL;
+               rcu_assign_pointer(sk->sk_filter, NULL);
        }
 
        sock_disable_timestamp(sk);
@@ -927,14 +914,14 @@ void sk_free(struct sock *sk)
        module_put(owner);
 }
 
-struct sock *sk_clone(struct sock *sk, const gfp_t priority)
+struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
 {
        struct sock *newsk = sk_alloc(sk->sk_family, priority, sk->sk_prot, 0);
 
        if (newsk != NULL) {
                struct sk_filter *filter;
 
-               memcpy(newsk, sk, sk->sk_prot->obj_size);
+               sock_copy(newsk, sk);
 
                /* SANITY */
                sock_vx_init(newsk);
@@ -971,9 +958,6 @@ struct sock *sk_clone(struct sock *sk, const gfp_t priority)
                if (filter != NULL)
                        sk_filter_charge(newsk, filter);
 
-               if (sk->sk_create_child)
-                       sk->sk_create_child(sk, newsk);
-
                if (unlikely(xfrm_sk_clone_policy(newsk))) {
                        /* It is still raw copy of parent, so invalidate
                         * destructor and make plain sk_free() */
@@ -1570,7 +1554,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
        atomic_set(&sk->sk_refcnt, 1);
 }
 
-void fastcall lock_sock(struct sock *sk)
+void fastcall lock_sock_nested(struct sock *sk, int subclass)
 {
        might_sleep();
        spin_lock_bh(&sk->sk_lock.slock);
@@ -1581,11 +1565,11 @@ void fastcall lock_sock(struct sock *sk)
        /*
         * The sk_lock has mutex_lock() semantics here:
         */
-       mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
+       mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
        local_bh_enable();
 }
 
-EXPORT_SYMBOL(lock_sock);
+EXPORT_SYMBOL(lock_sock_nested);
 
 void fastcall release_sock(struct sock *sk)
 {
@@ -1649,7 +1633,7 @@ int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
 {
        struct sock *sk = sock->sk;
 
-       if (sk->sk_prot->compat_setsockopt != NULL)
+       if (sk->sk_prot->compat_getsockopt != NULL)
                return sk->sk_prot->compat_getsockopt(sk, level, optname,
                                                      optval, optlen);
        return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);