X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=net%2Fcore%2Fsock.c;h=449291f61005dfb90a829172cd73acba7bf85e8e;hb=8e8ece46a861c84343256819eaec77e608ff9217;hp=92bc77ad6ce95f3611bf4ef6414ac2de24091aa3;hpb=5273a3df6485dc2ad6aa7ddd441b9a21970f003b;p=linux-2.6.git diff --git a/net/core/sock.c b/net/core/sock.c index 92bc77ad6..449291f61 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -118,9 +118,13 @@ #include #include #include +#include #include #include +#include +#include +#include #ifdef CONFIG_INET #include @@ -165,7 +169,7 @@ static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) static void sock_warn_obsolete_bsdism(const char *name) { static int warned; - static char warncomm[16]; + static char warncomm[TASK_COMM_LEN]; if (strcmp(warncomm, current->comm) && warned < 5) { strcpy(warncomm, current->comm); printk(KERN_WARNING "process `%s' is using obsolete " @@ -174,6 +178,15 @@ static void sock_warn_obsolete_bsdism(const char *name) } } +static void sock_disable_timestamp(struct sock *sk) +{ + if (sock_flag(sk, SOCK_TIMESTAMP)) { + sock_reset_flag(sk, SOCK_TIMESTAMP); + net_disable_timestamp(); + } +} + + /* * This is meant for all protocols to use and covers goings on * at the socket level. Everything here is generic. @@ -323,7 +336,10 @@ int sock_setsockopt(struct socket *sock, int level, int optname, break; case SO_PASSCRED: - sock->passcred = valbool; + if (valbool) + set_bit(SOCK_PASS_CRED, &sock->flags); + else + clear_bit(SOCK_PASS_CRED, &sock->flags); break; case SO_TIMESTAMP: @@ -442,7 +458,8 @@ int sock_getsockopt(struct socket *sock, int level, int optname, struct timeval tm; } v; - unsigned int lv=sizeof(int),len; + unsigned int lv = sizeof(int); + int len; if(get_user(len,optlen)) return -EFAULT; @@ -522,7 +539,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.tm.tv_usec = 0; } else { v.tm.tv_sec = sk->sk_rcvtimeo / HZ; - v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000) / HZ; + v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ; } break; @@ -533,7 +550,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.tm.tv_usec = 0; } else { v.tm.tv_sec = sk->sk_sndtimeo / HZ; - v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000) / HZ; + v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ; } break; @@ -546,7 +563,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, break; case SO_PASSCRED: - v.val = sock->passcred; + v.val = test_bit(SOCK_PASS_CRED, &sock->flags)?1:0; break; case SO_PEERCRED: @@ -621,6 +638,8 @@ struct sock *sk_alloc(int family, int priority, int zero_it, kmem_cache_t *slab) sock_lock_init(sk); } sk->sk_slab = slab; + sock_vx_init(sk); + sock_nx_init(sk); if (security_sk_alloc(sk, family, priority)) { kmem_cache_free(slab, sk); @@ -651,6 +670,11 @@ void sk_free(struct sock *sk) __FUNCTION__, atomic_read(&sk->sk_omem_alloc)); security_sk_free(sk); + vx_sock_dec(sk); + clr_vx_info(&sk->sk_vx_info); + sk->sk_xid = -1; + clr_nx_info(&sk->sk_nx_info); + sk->sk_nid = -1; kmem_cache_free(sk->sk_slab, sk); module_put(owner); } @@ -658,7 +682,7 @@ void sk_free(struct sock *sk) void __init sk_init(void) { sk_cachep = kmem_cache_create("sock", sizeof(struct sock), 0, - SLAB_HWCACHE_ALIGN, 0, 0); + SLAB_HWCACHE_ALIGN, NULL, NULL); if (!sk_cachep) printk(KERN_CRIT "sk_init: Cannot create sock SLAB cache!"); @@ -702,6 +726,27 @@ void sock_rfree(struct sk_buff *skb) atomic_sub(skb->truesize, &sk->sk_rmem_alloc); } + +int sock_i_uid(struct sock *sk) +{ + int uid; + + read_lock(&sk->sk_callback_lock); + uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0; + read_unlock(&sk->sk_callback_lock); + return uid; +} + +unsigned long sock_i_ino(struct sock *sk) +{ + unsigned long ino; + + read_lock(&sk->sk_callback_lock); + ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0; + read_unlock(&sk->sk_callback_lock); + return ino; +} + /* * Allocate a skb from the socket's send buffer. */ @@ -793,8 +838,10 @@ static long sock_wait_for_wmem(struct sock * sk, long timeo) * Generic send/receive buffer handlers */ -struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, - unsigned long data_len, int noblock, int *errcode) +static struct sk_buff *sock_alloc_send_pskb(struct sock *sk, + unsigned long header_len, + unsigned long data_len, + int noblock, int *errcode) { struct sk_buff *skb; unsigned int gfp_mask; @@ -881,7 +928,7 @@ struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, return sock_alloc_send_pskb(sk, size, 0, noblock, errcode); } -void __lock_sock(struct sock *sk) +static void __lock_sock(struct sock *sk) { DEFINE_WAIT(wait); @@ -897,7 +944,7 @@ void __lock_sock(struct sock *sk) finish_wait(&sk->sk_lock.wq, &wait); } -void __release_sock(struct sock *sk) +static void __release_sock(struct sock *sk) { struct sk_buff *skb = sk->sk_backlog.head; @@ -910,6 +957,15 @@ void __release_sock(struct sock *sk) skb->next = NULL; sk->sk_backlog_rcv(sk, skb); + + /* + * We are in process context here with softirqs + * disabled, use cond_resched_softirq() to preempt. + * This is safe to do because we've taken the backlog + * queue private: + */ + cond_resched_softirq(); + skb = next; } while (skb != NULL); @@ -917,6 +973,31 @@ void __release_sock(struct sock *sk) } while((skb = sk->sk_backlog.head) != NULL); } +/** + * sk_wait_data - wait for data to arrive at sk_receive_queue + * sk - sock to wait on + * timeo - for how long + * + * Now socket state including sk->sk_err is changed only under lock, + * hence we may omit checks after joining wait queue. + * We check receive queue before schedule() only as optimization; + * it is very likely that release_sock() added new data. + */ +int sk_wait_data(struct sock *sk, long *timeo) +{ + int rc; + DEFINE_WAIT(wait); + + prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue)); + clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + finish_wait(sk->sk_sleep, &wait); + return rc; +} + +EXPORT_SYMBOL(sk_wait_data); + /* * Set of default routines for initialising struct proto_ops when * the protocol does not support a particular function. In certain @@ -924,11 +1005,6 @@ void __release_sock(struct sock *sk) * function, some default processing is provided. */ -int sock_no_release(struct socket *sock) -{ - return 0; -} - int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len) { return -EOPNOTSUPP; @@ -1009,30 +1085,12 @@ int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct * ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) { ssize_t res; - struct msghdr msg; - struct iovec iov; - mm_segment_t old_fs; - char *kaddr; - - kaddr = kmap(page); - - msg.msg_name = NULL; - msg.msg_namelen = 0; - msg.msg_iov = &iov; - msg.msg_iovlen = 1; - msg.msg_control = NULL; - msg.msg_controllen = 0; - msg.msg_flags = flags; - - /* This cast is ok because of the "set_fs(KERNEL_DS)" */ - iov.iov_base = (void __user *) (kaddr + offset); + struct msghdr msg = {.msg_flags = flags}; + struct kvec iov; + char *kaddr = kmap(page); + iov.iov_base = kaddr + offset; iov.iov_len = size; - - old_fs = get_fs(); - set_fs(KERNEL_DS); - res = sock_sendmsg(sock, &msg, size); - set_fs(old_fs); - + res = kernel_sendmsg(sock, &msg, &iov, 1, size); kunmap(page); return res; } @@ -1041,7 +1099,7 @@ ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, siz * Default Socket Callbacks */ -void sock_def_wakeup(struct sock *sk) +static void sock_def_wakeup(struct sock *sk) { read_lock(&sk->sk_callback_lock); if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) @@ -1049,7 +1107,7 @@ void sock_def_wakeup(struct sock *sk) read_unlock(&sk->sk_callback_lock); } -void sock_def_error_report(struct sock *sk) +static void sock_def_error_report(struct sock *sk) { read_lock(&sk->sk_callback_lock); if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) @@ -1058,7 +1116,7 @@ void sock_def_error_report(struct sock *sk) read_unlock(&sk->sk_callback_lock); } -void sock_def_readable(struct sock *sk, int len) +static void sock_def_readable(struct sock *sk, int len) { read_lock(&sk->sk_callback_lock); if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) @@ -1067,7 +1125,7 @@ void sock_def_readable(struct sock *sk, int len) read_unlock(&sk->sk_callback_lock); } -void sock_def_write_space(struct sock *sk) +static void sock_def_write_space(struct sock *sk) { read_lock(&sk->sk_callback_lock); @@ -1086,7 +1144,7 @@ void sock_def_write_space(struct sock *sk) read_unlock(&sk->sk_callback_lock); } -void sock_def_destruct(struct sock *sk) +static void sock_def_destruct(struct sock *sk) { if (sk->sk_protinfo) kfree(sk->sk_protinfo); @@ -1099,12 +1157,31 @@ void sk_send_sigurg(struct sock *sk) sk_wake_async(sk, 3, POLL_PRI); } +void sk_reset_timer(struct sock *sk, struct timer_list* timer, + unsigned long expires) +{ + if (!mod_timer(timer, expires)) + sock_hold(sk); +} + +EXPORT_SYMBOL(sk_reset_timer); + +void sk_stop_timer(struct sock *sk, struct timer_list* timer) +{ + if (timer_pending(timer) && del_timer(timer)) + __sock_put(sk); +} + +EXPORT_SYMBOL(sk_stop_timer); + void sock_init_data(struct socket *sock, struct sock *sk) { skb_queue_head_init(&sk->sk_receive_queue); skb_queue_head_init(&sk->sk_write_queue); skb_queue_head_init(&sk->sk_error_queue); + sk->sk_send_head = NULL; + init_timer(&sk->sk_timer); sk->sk_allocation = GFP_KERNEL; @@ -1122,8 +1199,8 @@ void sock_init_data(struct socket *sock, struct sock *sk) } else sk->sk_sleep = NULL; - sk->sk_dst_lock = RW_LOCK_UNLOCKED; - sk->sk_callback_lock = RW_LOCK_UNLOCKED; + rwlock_init(&sk->sk_dst_lock); + rwlock_init(&sk->sk_callback_lock); sk->sk_state_change = sock_def_wakeup; sk->sk_data_ready = sock_def_readable; @@ -1131,9 +1208,13 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_error_report = sock_def_error_report; sk->sk_destruct = sock_def_destruct; + sk->sk_sndmsg_page = NULL; + sk->sk_sndmsg_off = 0; + sk->sk_peercred.pid = 0; sk->sk_peercred.uid = -1; sk->sk_peercred.gid = -1; + sk->sk_write_pending = 0; sk->sk_rcvlowat = 1; sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT; @@ -1142,6 +1223,11 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_stamp.tv_sec = -1L; sk->sk_stamp.tv_usec = -1L; + set_vx_info(&sk->sk_vx_info, current->vx_info); + sk->sk_xid = vx_current_xid(); + vx_sock_inc(sk); + set_nx_info(&sk->sk_nx_info, current->nx_info); + sk->sk_nid = nx_current_nid(); atomic_set(&sk->sk_refcnt, 1); } @@ -1169,10 +1255,7 @@ void fastcall release_sock(struct sock *sk) } EXPORT_SYMBOL(release_sock); -/* When > 0 there are consumers of rx skb time stamps */ -atomic_t netstamp_needed = ATOMIC_INIT(0); - -int sock_get_timestamp(struct sock *sk, struct timeval *userstamp) +int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp) { if (!sock_flag(sk, SOCK_TIMESTAMP)) sock_enable_timestamp(sk); @@ -1189,28 +1272,123 @@ void sock_enable_timestamp(struct sock *sk) { if (!sock_flag(sk, SOCK_TIMESTAMP)) { sock_set_flag(sk, SOCK_TIMESTAMP); - atomic_inc(&netstamp_needed); + net_enable_timestamp(); } } EXPORT_SYMBOL(sock_enable_timestamp); -void sock_disable_timestamp(struct sock *sk) -{ - if (sock_flag(sk, SOCK_TIMESTAMP)) { - sock_reset_flag(sk, SOCK_TIMESTAMP); - atomic_dec(&netstamp_needed); +/* + * Get a socket option on an socket. + * + * FIX: POSIX 1003.1g is very ambiguous here. It states that + * asynchronous errors should be reported by getsockopt. We assume + * this means if you specify SO_ERROR (otherwise whats the point of it). + */ +int sock_common_getsockopt(struct socket *sock, int level, int optname, + char __user *optval, int __user *optlen) +{ + struct sock *sk = sock->sk; + + return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen); +} + +EXPORT_SYMBOL(sock_common_getsockopt); + +int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock, + struct msghdr *msg, size_t size, int flags) +{ + struct sock *sk = sock->sk; + int addr_len = 0; + int err; + + err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT, + flags & ~MSG_DONTWAIT, &addr_len); + if (err >= 0) + msg->msg_namelen = addr_len; + return err; +} + +EXPORT_SYMBOL(sock_common_recvmsg); + +/* + * Set socket options on an inet socket. + */ +int sock_common_setsockopt(struct socket *sock, int level, int optname, + char __user *optval, int optlen) +{ + struct sock *sk = sock->sk; + + return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen); +} + +EXPORT_SYMBOL(sock_common_setsockopt); + +void sk_common_release(struct sock *sk) +{ + if (sk->sk_prot->destroy) + sk->sk_prot->destroy(sk); + + /* + * Observation: when sock_common_release is called, processes have + * no access to socket. But net still has. + * Step one, detach it from networking: + * + * A. Remove from hash tables. + */ + + sk->sk_prot->unhash(sk); + + /* + * In this point socket cannot receive new packets, but it is possible + * that some packets are in flight because some CPU runs receiver and + * did hash table lookup before we unhashed socket. They will achieve + * receive queue and will be purged by socket destructor. + * + * Also we still have packets pending on receive queue and probably, + * our own packets waiting in device queues. sock_destroy will drain + * receive queue, but transmitted packets will delay socket destruction + * until the last reference will be released. + */ + + sock_orphan(sk); + + xfrm_sk_free_policy(sk); + +#ifdef INET_REFCNT_DEBUG + if (atomic_read(&sk->sk_refcnt) != 1) + printk(KERN_DEBUG "Destruction of the socket %p delayed, c=%d\n", + sk, atomic_read(&sk->sk_refcnt)); +#endif + sock_put(sk); +} + +EXPORT_SYMBOL(sk_common_release); + +int sk_alloc_slab(struct proto *prot, char *name) +{ + prot->slab = kmem_cache_create(name, + prot->slab_obj_size, 0, + SLAB_HWCACHE_ALIGN, NULL, NULL); + + return prot->slab != NULL ? 0 : -ENOBUFS; +} + +EXPORT_SYMBOL(sk_alloc_slab); + +void sk_free_slab(struct proto *prot) +{ + if (prot->slab != NULL) { + kmem_cache_destroy(prot->slab); + prot->slab = NULL; } } -EXPORT_SYMBOL(sock_disable_timestamp); -EXPORT_SYMBOL(__lock_sock); -EXPORT_SYMBOL(__release_sock); +EXPORT_SYMBOL(sk_free_slab); + EXPORT_SYMBOL(sk_alloc); EXPORT_SYMBOL(sk_free); EXPORT_SYMBOL(sk_send_sigurg); -EXPORT_SYMBOL(sock_alloc_send_pskb); EXPORT_SYMBOL(sock_alloc_send_skb); -EXPORT_SYMBOL(sock_getsockopt); EXPORT_SYMBOL(sock_init_data); EXPORT_SYMBOL(sock_kfree_s); EXPORT_SYMBOL(sock_kmalloc); @@ -1224,17 +1402,17 @@ EXPORT_SYMBOL(sock_no_listen); EXPORT_SYMBOL(sock_no_mmap); EXPORT_SYMBOL(sock_no_poll); EXPORT_SYMBOL(sock_no_recvmsg); -EXPORT_SYMBOL(sock_no_release); EXPORT_SYMBOL(sock_no_sendmsg); EXPORT_SYMBOL(sock_no_sendpage); EXPORT_SYMBOL(sock_no_setsockopt); EXPORT_SYMBOL(sock_no_shutdown); EXPORT_SYMBOL(sock_no_socketpair); EXPORT_SYMBOL(sock_rfree); -EXPORT_SYMBOL(sock_rmalloc); EXPORT_SYMBOL(sock_setsockopt); EXPORT_SYMBOL(sock_wfree); EXPORT_SYMBOL(sock_wmalloc); +EXPORT_SYMBOL(sock_i_uid); +EXPORT_SYMBOL(sock_i_ino); #ifdef CONFIG_SYSCTL EXPORT_SYMBOL(sysctl_optmem_max); EXPORT_SYMBOL(sysctl_rmem_max);