272b3e3fca0b8eab968be86989c571e1848074aa
[linux-2.6.git] / net / core / sock.c
1 /*
2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
3  *              operating system.  INET is implemented using the  BSD Socket
4  *              interface as the means of communication with the user level.
5  *
6  *              Generic socket support routines. Memory allocators, socket lock/release
7  *              handler for protocols to use and generic option handler.
8  *
9  *
10  * Version:     $Id: sock.c,v 1.117 2002/02/01 22:01:03 davem Exp $
11  *
12  * Authors:     Ross Biro
13  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14  *              Florian La Roche, <flla@stud.uni-sb.de>
15  *              Alan Cox, <A.Cox@swansea.ac.uk>
16  *
17  * Fixes:
18  *              Alan Cox        :       Numerous verify_area() problems
19  *              Alan Cox        :       Connecting on a connecting socket
20  *                                      now returns an error for tcp.
21  *              Alan Cox        :       sock->protocol is set correctly.
22  *                                      and is not sometimes left as 0.
23  *              Alan Cox        :       connect handles icmp errors on a
24  *                                      connect properly. Unfortunately there
25  *                                      is a restart syscall nasty there. I
26  *                                      can't match BSD without hacking the C
27  *                                      library. Ideas urgently sought!
28  *              Alan Cox        :       Disallow bind() to addresses that are
29  *                                      not ours - especially broadcast ones!!
30  *              Alan Cox        :       Socket 1024 _IS_ ok for users. (fencepost)
31  *              Alan Cox        :       sock_wfree/sock_rfree don't destroy sockets,
32  *                                      instead they leave that for the DESTROY timer.
33  *              Alan Cox        :       Clean up error flag in accept
34  *              Alan Cox        :       TCP ack handling is buggy, the DESTROY timer
35  *                                      was buggy. Put a remove_sock() in the handler
36  *                                      for memory when we hit 0. Also altered the timer
37  *                                      code. The ACK stuff can wait and needs major 
38  *                                      TCP layer surgery.
39  *              Alan Cox        :       Fixed TCP ack bug, removed remove sock
40  *                                      and fixed timer/inet_bh race.
41  *              Alan Cox        :       Added zapped flag for TCP
42  *              Alan Cox        :       Move kfree_skb into skbuff.c and tidied up surplus code
43  *              Alan Cox        :       for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
44  *              Alan Cox        :       kfree_s calls now are kfree_skbmem so we can track skb resources
45  *              Alan Cox        :       Supports socket option broadcast now as does udp. Packet and raw need fixing.
46  *              Alan Cox        :       Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
47  *              Rick Sladkey    :       Relaxed UDP rules for matching packets.
48  *              C.E.Hawkins     :       IFF_PROMISC/SIOCGHWADDR support
49  *      Pauline Middelink       :       identd support
50  *              Alan Cox        :       Fixed connect() taking signals I think.
51  *              Alan Cox        :       SO_LINGER supported
52  *              Alan Cox        :       Error reporting fixes
53  *              Anonymous       :       inet_create tidied up (sk->reuse setting)
54  *              Alan Cox        :       inet sockets don't set sk->type!
55  *              Alan Cox        :       Split socket option code
56  *              Alan Cox        :       Callbacks
57  *              Alan Cox        :       Nagle flag for Charles & Johannes stuff
58  *              Alex            :       Removed restriction on inet fioctl
59  *              Alan Cox        :       Splitting INET from NET core
60  *              Alan Cox        :       Fixed bogus SO_TYPE handling in getsockopt()
61  *              Adam Caldwell   :       Missing return in SO_DONTROUTE/SO_DEBUG code
62  *              Alan Cox        :       Split IP from generic code
63  *              Alan Cox        :       New kfree_skbmem()
64  *              Alan Cox        :       Make SO_DEBUG superuser only.
65  *              Alan Cox        :       Allow anyone to clear SO_DEBUG
66  *                                      (compatibility fix)
67  *              Alan Cox        :       Added optimistic memory grabbing for AF_UNIX throughput.
68  *              Alan Cox        :       Allocator for a socket is settable.
69  *              Alan Cox        :       SO_ERROR includes soft errors.
70  *              Alan Cox        :       Allow NULL arguments on some SO_ opts
71  *              Alan Cox        :       Generic socket allocation to make hooks
72  *                                      easier (suggested by Craig Metz).
73  *              Michael Pall    :       SO_ERROR returns positive errno again
74  *              Steve Whitehouse:       Added default destructor to free
75  *                                      protocol private data.
76  *              Steve Whitehouse:       Added various other default routines
77  *                                      common to several socket families.
78  *              Chris Evans     :       Call suser() check last on F_SETOWN
79  *              Jay Schulist    :       Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
80  *              Andi Kleen      :       Add sock_kmalloc()/sock_kfree_s()
81  *              Andi Kleen      :       Fix write_space callback
82  *              Chris Evans     :       Security fixes - signedness again
83  *              Arnaldo C. Melo :       cleanups, use skb_queue_purge
84  *
85  * To Fix:
86  *
87  *
88  *              This program is free software; you can redistribute it and/or
89  *              modify it under the terms of the GNU General Public License
90  *              as published by the Free Software Foundation; either version
91  *              2 of the License, or (at your option) any later version.
92  */
93
94 #include <linux/capability.h>
95 #include <linux/errno.h>
96 #include <linux/types.h>
97 #include <linux/socket.h>
98 #include <linux/in.h>
99 #include <linux/kernel.h>
100 #include <linux/module.h>
101 #include <linux/proc_fs.h>
102 #include <linux/seq_file.h>
103 #include <linux/sched.h>
104 #include <linux/timer.h>
105 #include <linux/string.h>
106 #include <linux/sockios.h>
107 #include <linux/net.h>
108 #include <linux/mm.h>
109 #include <linux/slab.h>
110 #include <linux/interrupt.h>
111 #include <linux/poll.h>
112 #include <linux/tcp.h>
113 #include <linux/init.h>
114
115 #include <asm/uaccess.h>
116 #include <asm/system.h>
117
118 #include <linux/netdevice.h>
119 #include <net/protocol.h>
120 #include <linux/skbuff.h>
121 #include <net/request_sock.h>
122 #include <net/sock.h>
123 #include <net/xfrm.h>
124 #include <linux/ipsec.h>
125
126 #include <linux/filter.h>
127 #include <linux/vs_socket.h>
128 #include <linux/vs_limit.h>
129 #include <linux/vs_context.h>
130
131 #ifdef CONFIG_INET
132 #include <net/tcp.h>
133 #endif
134
135 /*
136  * Each address family might have different locking rules, so we have
137  * one slock key per address family:
138  */
139 static struct lock_class_key af_family_keys[AF_MAX];
140 static struct lock_class_key af_family_slock_keys[AF_MAX];
141
142 #ifdef CONFIG_DEBUG_LOCK_ALLOC
143 /*
144  * Make lock validator output more readable. (we pre-construct these
145  * strings build-time, so that runtime initialization of socket
146  * locks is fast):
147  */
148 static const char *af_family_key_strings[AF_MAX+1] = {
149   "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX"     , "sk_lock-AF_INET"     ,
150   "sk_lock-AF_AX25"  , "sk_lock-AF_IPX"      , "sk_lock-AF_APPLETALK",
151   "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE"   , "sk_lock-AF_ATMPVC"   ,
152   "sk_lock-AF_X25"   , "sk_lock-AF_INET6"    , "sk_lock-AF_ROSE"     ,
153   "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI"  , "sk_lock-AF_SECURITY" ,
154   "sk_lock-AF_KEY"   , "sk_lock-AF_NETLINK"  , "sk_lock-AF_PACKET"   ,
155   "sk_lock-AF_ASH"   , "sk_lock-AF_ECONET"   , "sk_lock-AF_ATMSVC"   ,
156   "sk_lock-21"       , "sk_lock-AF_SNA"      , "sk_lock-AF_IRDA"     ,
157   "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE"  , "sk_lock-AF_LLC"      ,
158   "sk_lock-27"       , "sk_lock-28"          , "sk_lock-29"          ,
159   "sk_lock-AF_TIPC"  , "sk_lock-AF_BLUETOOTH", "sk_lock-AF_MAX"
160 };
161 static const char *af_family_slock_key_strings[AF_MAX+1] = {
162   "slock-AF_UNSPEC", "slock-AF_UNIX"     , "slock-AF_INET"     ,
163   "slock-AF_AX25"  , "slock-AF_IPX"      , "slock-AF_APPLETALK",
164   "slock-AF_NETROM", "slock-AF_BRIDGE"   , "slock-AF_ATMPVC"   ,
165   "slock-AF_X25"   , "slock-AF_INET6"    , "slock-AF_ROSE"     ,
166   "slock-AF_DECnet", "slock-AF_NETBEUI"  , "slock-AF_SECURITY" ,
167   "slock-AF_KEY"   , "slock-AF_NETLINK"  , "slock-AF_PACKET"   ,
168   "slock-AF_ASH"   , "slock-AF_ECONET"   , "slock-AF_ATMSVC"   ,
169   "slock-21"       , "slock-AF_SNA"      , "slock-AF_IRDA"     ,
170   "slock-AF_PPPOX" , "slock-AF_WANPIPE"  , "slock-AF_LLC"      ,
171   "slock-27"       , "slock-28"          , "slock-29"          ,
172   "slock-AF_TIPC"  , "slock-AF_BLUETOOTH", "slock-AF_MAX"
173 };
174 #endif
175
176 /*
177  * sk_callback_lock locking rules are per-address-family,
178  * so split the lock classes by using a per-AF key:
179  */
180 static struct lock_class_key af_callback_keys[AF_MAX];
181
182 /* Take into consideration the size of the struct sk_buff overhead in the
183  * determination of these values, since that is non-constant across
184  * platforms.  This makes socket queueing behavior and performance
185  * not depend upon such differences.
186  */
187 #define _SK_MEM_PACKETS         256
188 #define _SK_MEM_OVERHEAD        (sizeof(struct sk_buff) + 256)
189 #define SK_WMEM_MAX             (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
190 #define SK_RMEM_MAX             (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
191
192 /* Run time adjustable parameters. */
193 __u32 sysctl_wmem_max = SK_WMEM_MAX;
194 __u32 sysctl_rmem_max = SK_RMEM_MAX;
195 __u32 sysctl_wmem_default = SK_WMEM_MAX;
196 __u32 sysctl_rmem_default = SK_RMEM_MAX;
197
198 /* Maximal space eaten by iovec or ancilliary data plus some space */
199 int sysctl_optmem_max = sizeof(unsigned long)*(2*UIO_MAXIOV + 512);
200
201 static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
202 {
203         struct timeval tv;
204
205         if (optlen < sizeof(tv))
206                 return -EINVAL;
207         if (copy_from_user(&tv, optval, sizeof(tv)))
208                 return -EFAULT;
209
210         *timeo_p = MAX_SCHEDULE_TIMEOUT;
211         if (tv.tv_sec == 0 && tv.tv_usec == 0)
212                 return 0;
213         if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
214                 *timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
215         return 0;
216 }
217
218 static void sock_warn_obsolete_bsdism(const char *name)
219 {
220         static int warned;
221         static char warncomm[TASK_COMM_LEN];
222         if (strcmp(warncomm, current->comm) && warned < 5) { 
223                 strcpy(warncomm,  current->comm); 
224                 printk(KERN_WARNING "process `%s' is using obsolete "
225                        "%s SO_BSDCOMPAT\n", warncomm, name);
226                 warned++;
227         }
228 }
229
230 static void sock_disable_timestamp(struct sock *sk)
231 {       
232         if (sock_flag(sk, SOCK_TIMESTAMP)) { 
233                 sock_reset_flag(sk, SOCK_TIMESTAMP);
234                 net_disable_timestamp();
235         }
236 }
237
238
239 int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
240 {
241         int err = 0;
242         int skb_len;
243
244 #if defined(CONFIG_VNET) || defined(CONFIG_VNET_MODULE)
245         /* Silently drop if VNET is active (if INET bind() has been
246          * overridden) and the context is not entitled to read the
247          * packet.
248          */
249         if (vnet_active &&
250             (int) sk->sk_xid > 0 && sk->sk_xid != skb->xid) {
251                 err = -EPERM;
252                 goto out;
253         }
254 #endif
255
256         /* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
257            number of warnings when compiling with -W --ANK
258          */
259         if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
260             (unsigned)sk->sk_rcvbuf) {
261                 err = -ENOMEM;
262                 goto out;
263         }
264
265         /* It would be deadlock, if sock_queue_rcv_skb is used
266            with socket lock! We assume that users of this
267            function are lock free.
268         */
269         err = sk_filter(sk, skb, 1);
270         if (err)
271                 goto out;
272
273         skb->dev = NULL;
274         skb_set_owner_r(skb, sk);
275
276         /* Cache the SKB length before we tack it onto the receive
277          * queue.  Once it is added it no longer belongs to us and
278          * may be freed by other threads of control pulling packets
279          * from the queue.
280          */
281         skb_len = skb->len;
282
283         skb_queue_tail(&sk->sk_receive_queue, skb);
284
285         if (!sock_flag(sk, SOCK_DEAD))
286                 sk->sk_data_ready(sk, skb_len);
287 out:
288         return err;
289 }
290 EXPORT_SYMBOL(sock_queue_rcv_skb);
291
292 int sk_receive_skb(struct sock *sk, struct sk_buff *skb)
293 {
294         int rc = NET_RX_SUCCESS;
295
296         if (sk_filter(sk, skb, 0))
297                 goto discard_and_relse;
298
299         skb->dev = NULL;
300
301         bh_lock_sock(sk);
302         if (!sock_owned_by_user(sk)) {
303                 /*
304                  * trylock + unlock semantics:
305                  */
306                 mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
307
308                 rc = sk->sk_backlog_rcv(sk, skb);
309
310                 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
311         } else
312                 sk_add_backlog(sk, skb);
313         bh_unlock_sock(sk);
314 out:
315         sock_put(sk);
316         return rc;
317 discard_and_relse:
318         kfree_skb(skb);
319         goto out;
320 }
321 EXPORT_SYMBOL(sk_receive_skb);
322
323 struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
324 {
325         struct dst_entry *dst = sk->sk_dst_cache;
326
327         if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
328                 sk->sk_dst_cache = NULL;
329                 dst_release(dst);
330                 return NULL;
331         }
332
333         return dst;
334 }
335 EXPORT_SYMBOL(__sk_dst_check);
336
337 struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
338 {
339         struct dst_entry *dst = sk_dst_get(sk);
340
341         if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
342                 sk_dst_reset(sk);
343                 dst_release(dst);
344                 return NULL;
345         }
346
347         return dst;
348 }
349 EXPORT_SYMBOL(sk_dst_check);
350
351 /*
352  *      This is meant for all protocols to use and covers goings on
353  *      at the socket level. Everything here is generic.
354  */
355
356 int sock_setsockopt(struct socket *sock, int level, int optname,
357                     char __user *optval, int optlen)
358 {
359         struct sock *sk=sock->sk;
360         struct sk_filter *filter;
361         int val;
362         int valbool;
363         struct linger ling;
364         int ret = 0;
365         
366         /*
367          *      Options without arguments
368          */
369
370 #ifdef SO_DONTLINGER            /* Compatibility item... */
371         if (optname == SO_DONTLINGER) {
372                 lock_sock(sk);
373                 sock_reset_flag(sk, SOCK_LINGER);
374                 release_sock(sk);
375                 return 0;
376         }
377 #endif
378         
379         if(optlen<sizeof(int))
380                 return(-EINVAL);
381         
382         if (get_user(val, (int __user *)optval))
383                 return -EFAULT;
384         
385         valbool = val?1:0;
386
387         lock_sock(sk);
388
389         switch(optname) 
390         {
391                 case SO_DEBUG:  
392                         if(val && !capable(CAP_NET_ADMIN))
393                         {
394                                 ret = -EACCES;
395                         }
396                         else if (valbool)
397                                 sock_set_flag(sk, SOCK_DBG);
398                         else
399                                 sock_reset_flag(sk, SOCK_DBG);
400                         break;
401                 case SO_REUSEADDR:
402                         sk->sk_reuse = valbool;
403                         break;
404                 case SO_TYPE:
405                 case SO_ERROR:
406                         ret = -ENOPROTOOPT;
407                         break;
408                 case SO_DONTROUTE:
409                         if (valbool)
410                                 sock_set_flag(sk, SOCK_LOCALROUTE);
411                         else
412                                 sock_reset_flag(sk, SOCK_LOCALROUTE);
413                         break;
414                 case SO_BROADCAST:
415                         sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
416                         break;
417                 case SO_SNDBUF:
418                         /* Don't error on this BSD doesn't and if you think
419                            about it this is right. Otherwise apps have to
420                            play 'guess the biggest size' games. RCVBUF/SNDBUF
421                            are treated in BSD as hints */
422                            
423                         if (val > sysctl_wmem_max)
424                                 val = sysctl_wmem_max;
425 set_sndbuf:
426                         sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
427                         if ((val * 2) < SOCK_MIN_SNDBUF)
428                                 sk->sk_sndbuf = SOCK_MIN_SNDBUF;
429                         else
430                                 sk->sk_sndbuf = val * 2;
431
432                         /*
433                          *      Wake up sending tasks if we
434                          *      upped the value.
435                          */
436                         sk->sk_write_space(sk);
437                         break;
438
439                 case SO_SNDBUFFORCE:
440                         if (!capable(CAP_NET_ADMIN)) {
441                                 ret = -EPERM;
442                                 break;
443                         }
444                         goto set_sndbuf;
445
446                 case SO_RCVBUF:
447                         /* Don't error on this BSD doesn't and if you think
448                            about it this is right. Otherwise apps have to
449                            play 'guess the biggest size' games. RCVBUF/SNDBUF
450                            are treated in BSD as hints */
451                           
452                         if (val > sysctl_rmem_max)
453                                 val = sysctl_rmem_max;
454 set_rcvbuf:
455                         sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
456                         /*
457                          * We double it on the way in to account for
458                          * "struct sk_buff" etc. overhead.   Applications
459                          * assume that the SO_RCVBUF setting they make will
460                          * allow that much actual data to be received on that
461                          * socket.
462                          *
463                          * Applications are unaware that "struct sk_buff" and
464                          * other overheads allocate from the receive buffer
465                          * during socket buffer allocation.
466                          *
467                          * And after considering the possible alternatives,
468                          * returning the value we actually used in getsockopt
469                          * is the most desirable behavior.
470                          */
471                         if ((val * 2) < SOCK_MIN_RCVBUF)
472                                 sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
473                         else
474                                 sk->sk_rcvbuf = val * 2;
475                         break;
476
477                 case SO_RCVBUFFORCE:
478                         if (!capable(CAP_NET_ADMIN)) {
479                                 ret = -EPERM;
480                                 break;
481                         }
482                         goto set_rcvbuf;
483
484                 case SO_KEEPALIVE:
485 #ifdef CONFIG_INET
486                         if (sk->sk_protocol == IPPROTO_TCP)
487                                 tcp_set_keepalive(sk, valbool);
488 #endif
489                         sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
490                         break;
491
492                 case SO_OOBINLINE:
493                         sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
494                         break;
495
496                 case SO_NO_CHECK:
497                         sk->sk_no_check = valbool;
498                         break;
499
500                 case SO_PRIORITY:
501                         if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN)) 
502                                 sk->sk_priority = val;
503                         else
504                                 ret = -EPERM;
505                         break;
506
507                 case SO_LINGER:
508                         if(optlen<sizeof(ling)) {
509                                 ret = -EINVAL;  /* 1003.1g */
510                                 break;
511                         }
512                         if (copy_from_user(&ling,optval,sizeof(ling))) {
513                                 ret = -EFAULT;
514                                 break;
515                         }
516                         if (!ling.l_onoff)
517                                 sock_reset_flag(sk, SOCK_LINGER);
518                         else {
519 #if (BITS_PER_LONG == 32)
520                                 if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
521                                         sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
522                                 else
523 #endif
524                                         sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
525                                 sock_set_flag(sk, SOCK_LINGER);
526                         }
527                         break;
528
529                 case SO_BSDCOMPAT:
530                         sock_warn_obsolete_bsdism("setsockopt");
531                         break;
532
533                 case SO_PASSCRED:
534                         if (valbool)
535                                 set_bit(SOCK_PASSCRED, &sock->flags);
536                         else
537                                 clear_bit(SOCK_PASSCRED, &sock->flags);
538                         break;
539
540 #if defined(CONFIG_VNET) || defined(CONFIG_VNET_MODULE)
541                 case SO_SETXID:
542                         if (current->xid) {
543                                 ret = -EPERM;
544                                 break;
545                         }
546                         if (val < 0 || val > MAX_S_CONTEXT) {
547                                 ret = -EINVAL;
548                                 break;
549                         }
550                         sk->sk_xid = val;
551                         break;
552 #endif
553
554                 case SO_TIMESTAMP:
555                         if (valbool)  {
556                                 sock_set_flag(sk, SOCK_RCVTSTAMP);
557                                 sock_enable_timestamp(sk);
558                         } else
559                                 sock_reset_flag(sk, SOCK_RCVTSTAMP);
560                         break;
561
562                 case SO_RCVLOWAT:
563                         if (val < 0)
564                                 val = INT_MAX;
565                         sk->sk_rcvlowat = val ? : 1;
566                         break;
567
568                 case SO_RCVTIMEO:
569                         ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
570                         break;
571
572                 case SO_SNDTIMEO:
573                         ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
574                         break;
575
576 #ifdef CONFIG_NETDEVICES
577                 case SO_BINDTODEVICE:
578                 {
579                         char devname[IFNAMSIZ]; 
580
581                         /* Sorry... */ 
582                         if (!capable(CAP_NET_RAW)) {
583                                 ret = -EPERM;
584                                 break;
585                         }
586
587                         /* Bind this socket to a particular device like "eth0",
588                          * as specified in the passed interface name. If the
589                          * name is "" or the option length is zero the socket 
590                          * is not bound. 
591                          */ 
592
593                         if (!valbool) {
594                                 sk->sk_bound_dev_if = 0;
595                         } else {
596                                 if (optlen > IFNAMSIZ - 1)
597                                         optlen = IFNAMSIZ - 1;
598                                 memset(devname, 0, sizeof(devname));
599                                 if (copy_from_user(devname, optval, optlen)) {
600                                         ret = -EFAULT;
601                                         break;
602                                 }
603
604                                 /* Remove any cached route for this socket. */
605                                 sk_dst_reset(sk);
606
607                                 if (devname[0] == '\0') {
608                                         sk->sk_bound_dev_if = 0;
609                                 } else {
610                                         struct net_device *dev = dev_get_by_name(devname);
611                                         if (!dev) {
612                                                 ret = -ENODEV;
613                                                 break;
614                                         }
615                                         sk->sk_bound_dev_if = dev->ifindex;
616                                         dev_put(dev);
617                                 }
618                         }
619                         break;
620                 }
621 #endif
622
623
624                 case SO_ATTACH_FILTER:
625                         ret = -EINVAL;
626                         if (optlen == sizeof(struct sock_fprog)) {
627                                 struct sock_fprog fprog;
628
629                                 ret = -EFAULT;
630                                 if (copy_from_user(&fprog, optval, sizeof(fprog)))
631                                         break;
632
633                                 ret = sk_attach_filter(&fprog, sk);
634                         }
635                         break;
636
637                 case SO_DETACH_FILTER:
638                         spin_lock_bh(&sk->sk_lock.slock);
639                         filter = sk->sk_filter;
640                         if (filter) {
641                                 sk->sk_filter = NULL;
642                                 spin_unlock_bh(&sk->sk_lock.slock);
643                                 sk_filter_release(sk, filter);
644                                 break;
645                         }
646                         spin_unlock_bh(&sk->sk_lock.slock);
647                         ret = -ENONET;
648                         break;
649
650                 case SO_PASSSEC:
651                         if (valbool)
652                                 set_bit(SOCK_PASSSEC, &sock->flags);
653                         else
654                                 clear_bit(SOCK_PASSSEC, &sock->flags);
655                         break;
656
657                 /* We implement the SO_SNDLOWAT etc to
658                    not be settable (1003.1g 5.3) */
659                 default:
660                         ret = -ENOPROTOOPT;
661                         break;
662         }
663         release_sock(sk);
664         return ret;
665 }
666
667
668 int sock_getsockopt(struct socket *sock, int level, int optname,
669                     char __user *optval, int __user *optlen)
670 {
671         struct sock *sk = sock->sk;
672         
673         union
674         {
675                 int val;
676                 struct linger ling;
677                 struct timeval tm;
678         } v;
679         
680         unsigned int lv = sizeof(int);
681         int len;
682         
683         if(get_user(len,optlen))
684                 return -EFAULT;
685         if(len < 0)
686                 return -EINVAL;
687                 
688         switch(optname) 
689         {
690                 case SO_DEBUG:          
691                         v.val = sock_flag(sk, SOCK_DBG);
692                         break;
693                 
694                 case SO_DONTROUTE:
695                         v.val = sock_flag(sk, SOCK_LOCALROUTE);
696                         break;
697                 
698                 case SO_BROADCAST:
699                         v.val = !!sock_flag(sk, SOCK_BROADCAST);
700                         break;
701
702                 case SO_SNDBUF:
703                         v.val = sk->sk_sndbuf;
704                         break;
705                 
706                 case SO_RCVBUF:
707                         v.val = sk->sk_rcvbuf;
708                         break;
709
710                 case SO_REUSEADDR:
711                         v.val = sk->sk_reuse;
712                         break;
713
714                 case SO_KEEPALIVE:
715                         v.val = !!sock_flag(sk, SOCK_KEEPOPEN);
716                         break;
717
718                 case SO_TYPE:
719                         v.val = sk->sk_type;                            
720                         break;
721
722                 case SO_ERROR:
723                         v.val = -sock_error(sk);
724                         if(v.val==0)
725                                 v.val = xchg(&sk->sk_err_soft, 0);
726                         break;
727
728                 case SO_OOBINLINE:
729                         v.val = !!sock_flag(sk, SOCK_URGINLINE);
730                         break;
731         
732                 case SO_NO_CHECK:
733                         v.val = sk->sk_no_check;
734                         break;
735
736                 case SO_PRIORITY:
737                         v.val = sk->sk_priority;
738                         break;
739                 
740                 case SO_LINGER: 
741                         lv              = sizeof(v.ling);
742                         v.ling.l_onoff  = !!sock_flag(sk, SOCK_LINGER);
743                         v.ling.l_linger = sk->sk_lingertime / HZ;
744                         break;
745                                         
746                 case SO_BSDCOMPAT:
747                         sock_warn_obsolete_bsdism("getsockopt");
748                         break;
749
750                 case SO_TIMESTAMP:
751                         v.val = sock_flag(sk, SOCK_RCVTSTAMP);
752                         break;
753
754                 case SO_RCVTIMEO:
755                         lv=sizeof(struct timeval);
756                         if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
757                                 v.tm.tv_sec = 0;
758                                 v.tm.tv_usec = 0;
759                         } else {
760                                 v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
761                                 v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
762                         }
763                         break;
764
765                 case SO_SNDTIMEO:
766                         lv=sizeof(struct timeval);
767                         if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
768                                 v.tm.tv_sec = 0;
769                                 v.tm.tv_usec = 0;
770                         } else {
771                                 v.tm.tv_sec = sk->sk_sndtimeo / HZ;
772                                 v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
773                         }
774                         break;
775
776                 case SO_RCVLOWAT:
777                         v.val = sk->sk_rcvlowat;
778                         break;
779
780                 case SO_SNDLOWAT:
781                         v.val=1;
782                         break; 
783
784                 case SO_PASSCRED:
785                         v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0;
786                         break;
787
788                 case SO_PEERCRED:
789                         if (len > sizeof(sk->sk_peercred))
790                                 len = sizeof(sk->sk_peercred);
791                         if (copy_to_user(optval, &sk->sk_peercred, len))
792                                 return -EFAULT;
793                         goto lenout;
794
795                 case SO_PEERNAME:
796                 {
797                         char address[128];
798
799                         if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
800                                 return -ENOTCONN;
801                         if (lv < len)
802                                 return -EINVAL;
803                         if (copy_to_user(optval, address, len))
804                                 return -EFAULT;
805                         goto lenout;
806                 }
807
808                 /* Dubious BSD thing... Probably nobody even uses it, but
809                  * the UNIX standard wants it for whatever reason... -DaveM
810                  */
811                 case SO_ACCEPTCONN:
812                         v.val = sk->sk_state == TCP_LISTEN;
813                         break;
814
815                 case SO_PASSSEC:
816                         v.val = test_bit(SOCK_PASSSEC, &sock->flags) ? 1 : 0;
817                         break;
818
819                 case SO_PEERSEC:
820                         return security_socket_getpeersec_stream(sock, optval, optlen, len);
821
822                 default:
823                         return(-ENOPROTOOPT);
824         }
825         if (len > lv)
826                 len = lv;
827         if (copy_to_user(optval, &v, len))
828                 return -EFAULT;
829 lenout:
830         if (put_user(len, optlen))
831                 return -EFAULT;
832         return 0;
833 }
834
835 /*
836  * Initialize an sk_lock.
837  *
838  * (We also register the sk_lock with the lock validator.)
839  */
840 static void inline sock_lock_init(struct sock *sk)
841 {
842         sock_lock_init_class_and_name(sk,
843                         af_family_slock_key_strings[sk->sk_family],
844                         af_family_slock_keys + sk->sk_family,
845                         af_family_key_strings[sk->sk_family],
846                         af_family_keys + sk->sk_family);
847 }
848
849 /**
850  *      sk_alloc - All socket objects are allocated here
851  *      @family: protocol family
852  *      @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
853  *      @prot: struct proto associated with this new sock instance
854  *      @zero_it: if we should zero the newly allocated sock
855  */
856 struct sock *sk_alloc(int family, gfp_t priority,
857                       struct proto *prot, int zero_it)
858 {
859         struct sock *sk = NULL;
860         kmem_cache_t *slab = prot->slab;
861
862         if (slab != NULL)
863                 sk = kmem_cache_alloc(slab, priority);
864         else
865                 sk = kmalloc(prot->obj_size, priority);
866
867         if (sk) {
868                 if (zero_it) {
869                         memset(sk, 0, prot->obj_size);
870                         sk->sk_family = family;
871                         /*
872                          * See comment in struct sock definition to understand
873                          * why we need sk_prot_creator -acme
874                          */
875                         sk->sk_prot = sk->sk_prot_creator = prot;
876                         sock_lock_init(sk);
877                 }
878                 sock_vx_init(sk);
879                 sock_nx_init(sk);
880                 
881                 if (security_sk_alloc(sk, family, priority))
882                         goto out_free;
883
884                 if (!try_module_get(prot->owner))
885                         goto out_free;
886         }
887         return sk;
888
889 out_free:
890         if (slab != NULL)
891                 kmem_cache_free(slab, sk);
892         else
893                 kfree(sk);
894         return NULL;
895 }
896
897 void sk_free(struct sock *sk)
898 {
899         struct sk_filter *filter;
900         struct module *owner = sk->sk_prot_creator->owner;
901
902         if (sk->sk_destruct)
903                 sk->sk_destruct(sk);
904
905         filter = sk->sk_filter;
906         if (filter) {
907                 sk_filter_release(sk, filter);
908                 sk->sk_filter = NULL;
909         }
910
911         sock_disable_timestamp(sk);
912
913         if (atomic_read(&sk->sk_omem_alloc))
914                 printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
915                        __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
916
917         security_sk_free(sk);
918         vx_sock_dec(sk);
919         clr_vx_info(&sk->sk_vx_info);
920         sk->sk_xid = -1;
921         clr_nx_info(&sk->sk_nx_info);
922         sk->sk_nid = -1;
923         if (sk->sk_prot_creator->slab != NULL)
924                 kmem_cache_free(sk->sk_prot_creator->slab, sk);
925         else
926                 kfree(sk);
927         module_put(owner);
928 }
929
930 struct sock *sk_clone(struct sock *sk, const gfp_t priority)
931 {
932         struct sock *newsk = sk_alloc(sk->sk_family, priority, sk->sk_prot, 0);
933
934         if (newsk != NULL) {
935                 struct sk_filter *filter;
936
937                 memcpy(newsk, sk, sk->sk_prot->obj_size);
938
939                 /* SANITY */
940                 sock_vx_init(newsk);
941                 sock_nx_init(newsk);
942                 sk_node_init(&newsk->sk_node);
943                 sock_lock_init(newsk);
944                 bh_lock_sock(newsk);
945
946                 atomic_set(&newsk->sk_rmem_alloc, 0);
947                 atomic_set(&newsk->sk_wmem_alloc, 0);
948                 atomic_set(&newsk->sk_omem_alloc, 0);
949                 skb_queue_head_init(&newsk->sk_receive_queue);
950                 skb_queue_head_init(&newsk->sk_write_queue);
951 #ifdef CONFIG_NET_DMA
952                 skb_queue_head_init(&newsk->sk_async_wait_queue);
953 #endif
954
955                 rwlock_init(&newsk->sk_dst_lock);
956                 rwlock_init(&newsk->sk_callback_lock);
957                 lockdep_set_class(&newsk->sk_callback_lock,
958                                    af_callback_keys + newsk->sk_family);
959
960                 newsk->sk_dst_cache     = NULL;
961                 newsk->sk_wmem_queued   = 0;
962                 newsk->sk_forward_alloc = 0;
963                 newsk->sk_send_head     = NULL;
964                 newsk->sk_backlog.head  = newsk->sk_backlog.tail = NULL;
965                 newsk->sk_userlocks     = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
966
967                 sock_reset_flag(newsk, SOCK_DONE);
968                 skb_queue_head_init(&newsk->sk_error_queue);
969
970                 filter = newsk->sk_filter;
971                 if (filter != NULL)
972                         sk_filter_charge(newsk, filter);
973
974                 if (sk->sk_create_child)
975                         sk->sk_create_child(sk, newsk);
976
977                 if (unlikely(xfrm_sk_clone_policy(newsk))) {
978                         /* It is still raw copy of parent, so invalidate
979                          * destructor and make plain sk_free() */
980                         newsk->sk_destruct = NULL;
981                         sk_free(newsk);
982                         newsk = NULL;
983                         goto out;
984                 }
985
986                 newsk->sk_err      = 0;
987                 newsk->sk_priority = 0;
988                 atomic_set(&newsk->sk_refcnt, 2);
989
990                 set_vx_info(&newsk->sk_vx_info, sk->sk_vx_info);
991                 newsk->sk_xid = sk->sk_xid;
992                 vx_sock_inc(newsk);
993                 set_nx_info(&newsk->sk_nx_info, sk->sk_nx_info);
994                 newsk->sk_nid = sk->sk_nid;
995
996                 /*
997                  * Increment the counter in the same struct proto as the master
998                  * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
999                  * is the same as sk->sk_prot->socks, as this field was copied
1000                  * with memcpy).
1001                  *
1002                  * This _changes_ the previous behaviour, where
1003                  * tcp_create_openreq_child always was incrementing the
1004                  * equivalent to tcp_prot->socks (inet_sock_nr), so this have
1005                  * to be taken into account in all callers. -acme
1006                  */
1007                 sk_refcnt_debug_inc(newsk);
1008                 newsk->sk_socket = NULL;
1009                 newsk->sk_sleep  = NULL;
1010
1011                 if (newsk->sk_prot->sockets_allocated)
1012                         atomic_inc(newsk->sk_prot->sockets_allocated);
1013         }
1014 out:
1015         return newsk;
1016 }
1017
1018 EXPORT_SYMBOL_GPL(sk_clone);
1019
1020 void __init sk_init(void)
1021 {
1022         if (num_physpages <= 4096) {
1023                 sysctl_wmem_max = 32767;
1024                 sysctl_rmem_max = 32767;
1025                 sysctl_wmem_default = 32767;
1026                 sysctl_rmem_default = 32767;
1027         } else if (num_physpages >= 131072) {
1028                 sysctl_wmem_max = 131071;
1029                 sysctl_rmem_max = 131071;
1030         }
1031 }
1032
1033 /*
1034  *      Simple resource managers for sockets.
1035  */
1036
1037
1038 /* 
1039  * Write buffer destructor automatically called from kfree_skb. 
1040  */
1041 void sock_wfree(struct sk_buff *skb)
1042 {
1043         struct sock *sk = skb->sk;
1044
1045         /* In case it might be waiting for more memory. */
1046         atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
1047         if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE))
1048                 sk->sk_write_space(sk);
1049         sock_put(sk);
1050 }
1051
1052 /* 
1053  * Read buffer destructor automatically called from kfree_skb. 
1054  */
1055 void sock_rfree(struct sk_buff *skb)
1056 {
1057         struct sock *sk = skb->sk;
1058
1059         atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
1060 }
1061
1062
1063 int sock_i_uid(struct sock *sk)
1064 {
1065         int uid;
1066
1067         read_lock(&sk->sk_callback_lock);
1068         uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0;
1069         read_unlock(&sk->sk_callback_lock);
1070         return uid;
1071 }
1072
1073 unsigned long sock_i_ino(struct sock *sk)
1074 {
1075         unsigned long ino;
1076
1077         read_lock(&sk->sk_callback_lock);
1078         ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
1079         read_unlock(&sk->sk_callback_lock);
1080         return ino;
1081 }
1082
1083 /*
1084  * Allocate a skb from the socket's send buffer.
1085  */
1086 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
1087                              gfp_t priority)
1088 {
1089         if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1090                 struct sk_buff * skb = alloc_skb(size, priority);
1091                 if (skb) {
1092                         skb_set_owner_w(skb, sk);
1093                         return skb;
1094                 }
1095         }
1096         return NULL;
1097 }
1098
1099 /*
1100  * Allocate a skb from the socket's receive buffer.
1101  */ 
1102 struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
1103                              gfp_t priority)
1104 {
1105         if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
1106                 struct sk_buff *skb = alloc_skb(size, priority);
1107                 if (skb) {
1108                         skb_set_owner_r(skb, sk);
1109                         return skb;
1110                 }
1111         }
1112         return NULL;
1113 }
1114
1115 /* 
1116  * Allocate a memory block from the socket's option memory buffer.
1117  */ 
1118 void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
1119 {
1120         if ((unsigned)size <= sysctl_optmem_max &&
1121             atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
1122                 void *mem;
1123                 /* First do the add, to avoid the race if kmalloc
1124                  * might sleep.
1125                  */
1126                 atomic_add(size, &sk->sk_omem_alloc);
1127                 mem = kmalloc(size, priority);
1128                 if (mem)
1129                         return mem;
1130                 atomic_sub(size, &sk->sk_omem_alloc);
1131         }
1132         return NULL;
1133 }
1134
1135 /*
1136  * Free an option memory block.
1137  */
1138 void sock_kfree_s(struct sock *sk, void *mem, int size)
1139 {
1140         kfree(mem);
1141         atomic_sub(size, &sk->sk_omem_alloc);
1142 }
1143
1144 /* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
1145    I think, these locks should be removed for datagram sockets.
1146  */
1147 static long sock_wait_for_wmem(struct sock * sk, long timeo)
1148 {
1149         DEFINE_WAIT(wait);
1150
1151         clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1152         for (;;) {
1153                 if (!timeo)
1154                         break;
1155                 if (signal_pending(current))
1156                         break;
1157                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1158                 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1159                 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
1160                         break;
1161                 if (sk->sk_shutdown & SEND_SHUTDOWN)
1162                         break;
1163                 if (sk->sk_err)
1164                         break;
1165                 timeo = schedule_timeout(timeo);
1166         }
1167         finish_wait(sk->sk_sleep, &wait);
1168         return timeo;
1169 }
1170
1171
1172 /*
1173  *      Generic send/receive buffer handlers
1174  */
1175
1176 static struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
1177                                             unsigned long header_len,
1178                                             unsigned long data_len,
1179                                             int noblock, int *errcode)
1180 {
1181         struct sk_buff *skb;
1182         gfp_t gfp_mask;
1183         long timeo;
1184         int err;
1185
1186         gfp_mask = sk->sk_allocation;
1187         if (gfp_mask & __GFP_WAIT)
1188                 gfp_mask |= __GFP_REPEAT;
1189
1190         timeo = sock_sndtimeo(sk, noblock);
1191         while (1) {
1192                 err = sock_error(sk);
1193                 if (err != 0)
1194                         goto failure;
1195
1196                 err = -EPIPE;
1197                 if (sk->sk_shutdown & SEND_SHUTDOWN)
1198                         goto failure;
1199
1200                 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1201                         skb = alloc_skb(header_len, sk->sk_allocation);
1202                         if (skb) {
1203                                 int npages;
1204                                 int i;
1205
1206                                 /* No pages, we're done... */
1207                                 if (!data_len)
1208                                         break;
1209
1210                                 npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
1211                                 skb->truesize += data_len;
1212                                 skb_shinfo(skb)->nr_frags = npages;
1213                                 for (i = 0; i < npages; i++) {
1214                                         struct page *page;
1215                                         skb_frag_t *frag;
1216
1217                                         page = alloc_pages(sk->sk_allocation, 0);
1218                                         if (!page) {
1219                                                 err = -ENOBUFS;
1220                                                 skb_shinfo(skb)->nr_frags = i;
1221                                                 kfree_skb(skb);
1222                                                 goto failure;
1223                                         }
1224
1225                                         frag = &skb_shinfo(skb)->frags[i];
1226                                         frag->page = page;
1227                                         frag->page_offset = 0;
1228                                         frag->size = (data_len >= PAGE_SIZE ?
1229                                                       PAGE_SIZE :
1230                                                       data_len);
1231                                         data_len -= PAGE_SIZE;
1232                                 }
1233
1234                                 /* Full success... */
1235                                 break;
1236                         }
1237                         err = -ENOBUFS;
1238                         goto failure;
1239                 }
1240                 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1241                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1242                 err = -EAGAIN;
1243                 if (!timeo)
1244                         goto failure;
1245                 if (signal_pending(current))
1246                         goto interrupted;
1247                 timeo = sock_wait_for_wmem(sk, timeo);
1248         }
1249
1250         skb_set_owner_w(skb, sk);
1251         return skb;
1252
1253 interrupted:
1254         err = sock_intr_errno(timeo);
1255 failure:
1256         *errcode = err;
1257         return NULL;
1258 }
1259
1260 struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, 
1261                                     int noblock, int *errcode)
1262 {
1263         return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
1264 }
1265
1266 static void __lock_sock(struct sock *sk)
1267 {
1268         DEFINE_WAIT(wait);
1269
1270         for(;;) {
1271                 prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
1272                                         TASK_UNINTERRUPTIBLE);
1273                 spin_unlock_bh(&sk->sk_lock.slock);
1274                 schedule();
1275                 spin_lock_bh(&sk->sk_lock.slock);
1276                 if(!sock_owned_by_user(sk))
1277                         break;
1278         }
1279         finish_wait(&sk->sk_lock.wq, &wait);
1280 }
1281
1282 static void __release_sock(struct sock *sk)
1283 {
1284         struct sk_buff *skb = sk->sk_backlog.head;
1285
1286         do {
1287                 sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
1288                 bh_unlock_sock(sk);
1289
1290                 do {
1291                         struct sk_buff *next = skb->next;
1292
1293                         skb->next = NULL;
1294                         sk->sk_backlog_rcv(sk, skb);
1295
1296                         /*
1297                          * We are in process context here with softirqs
1298                          * disabled, use cond_resched_softirq() to preempt.
1299                          * This is safe to do because we've taken the backlog
1300                          * queue private:
1301                          */
1302                         cond_resched_softirq();
1303
1304                         skb = next;
1305                 } while (skb != NULL);
1306
1307                 bh_lock_sock(sk);
1308         } while((skb = sk->sk_backlog.head) != NULL);
1309 }
1310
1311 /**
1312  * sk_wait_data - wait for data to arrive at sk_receive_queue
1313  * @sk:    sock to wait on
1314  * @timeo: for how long
1315  *
1316  * Now socket state including sk->sk_err is changed only under lock,
1317  * hence we may omit checks after joining wait queue.
1318  * We check receive queue before schedule() only as optimization;
1319  * it is very likely that release_sock() added new data.
1320  */
1321 int sk_wait_data(struct sock *sk, long *timeo)
1322 {
1323         int rc;
1324         DEFINE_WAIT(wait);
1325
1326         prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1327         set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1328         rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
1329         clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1330         finish_wait(sk->sk_sleep, &wait);
1331         return rc;
1332 }
1333
1334 EXPORT_SYMBOL(sk_wait_data);
1335
1336 /*
1337  * Set of default routines for initialising struct proto_ops when
1338  * the protocol does not support a particular function. In certain
1339  * cases where it makes no sense for a protocol to have a "do nothing"
1340  * function, some default processing is provided.
1341  */
1342
1343 int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
1344 {
1345         return -EOPNOTSUPP;
1346 }
1347
1348 int sock_no_connect(struct socket *sock, struct sockaddr *saddr, 
1349                     int len, int flags)
1350 {
1351         return -EOPNOTSUPP;
1352 }
1353
1354 int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
1355 {
1356         return -EOPNOTSUPP;
1357 }
1358
1359 int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
1360 {
1361         return -EOPNOTSUPP;
1362 }
1363
1364 int sock_no_getname(struct socket *sock, struct sockaddr *saddr, 
1365                     int *len, int peer)
1366 {
1367         return -EOPNOTSUPP;
1368 }
1369
1370 unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt)
1371 {
1372         return 0;
1373 }
1374
1375 int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1376 {
1377         return -EOPNOTSUPP;
1378 }
1379
1380 int sock_no_listen(struct socket *sock, int backlog)
1381 {
1382         return -EOPNOTSUPP;
1383 }
1384
1385 int sock_no_shutdown(struct socket *sock, int how)
1386 {
1387         return -EOPNOTSUPP;
1388 }
1389
1390 int sock_no_setsockopt(struct socket *sock, int level, int optname,
1391                     char __user *optval, int optlen)
1392 {
1393         return -EOPNOTSUPP;
1394 }
1395
1396 int sock_no_getsockopt(struct socket *sock, int level, int optname,
1397                     char __user *optval, int __user *optlen)
1398 {
1399         return -EOPNOTSUPP;
1400 }
1401
1402 int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1403                     size_t len)
1404 {
1405         return -EOPNOTSUPP;
1406 }
1407
1408 int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1409                     size_t len, int flags)
1410 {
1411         return -EOPNOTSUPP;
1412 }
1413
1414 int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1415 {
1416         /* Mirror missing mmap method error code */
1417         return -ENODEV;
1418 }
1419
1420 ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
1421 {
1422         ssize_t res;
1423         struct msghdr msg = {.msg_flags = flags};
1424         struct kvec iov;
1425         char *kaddr = kmap(page);
1426         iov.iov_base = kaddr + offset;
1427         iov.iov_len = size;
1428         res = kernel_sendmsg(sock, &msg, &iov, 1, size);
1429         kunmap(page);
1430         return res;
1431 }
1432
1433 /*
1434  *      Default Socket Callbacks
1435  */
1436
1437 static void sock_def_wakeup(struct sock *sk)
1438 {
1439         read_lock(&sk->sk_callback_lock);
1440         if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1441                 wake_up_interruptible_all(sk->sk_sleep);
1442         read_unlock(&sk->sk_callback_lock);
1443 }
1444
1445 static void sock_def_error_report(struct sock *sk)
1446 {
1447         read_lock(&sk->sk_callback_lock);
1448         if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1449                 wake_up_interruptible(sk->sk_sleep);
1450         sk_wake_async(sk,0,POLL_ERR); 
1451         read_unlock(&sk->sk_callback_lock);
1452 }
1453
1454 static void sock_def_readable(struct sock *sk, int len)
1455 {
1456         read_lock(&sk->sk_callback_lock);
1457         if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1458                 wake_up_interruptible(sk->sk_sleep);
1459         sk_wake_async(sk,1,POLL_IN);
1460         read_unlock(&sk->sk_callback_lock);
1461 }
1462
1463 static void sock_def_write_space(struct sock *sk)
1464 {
1465         read_lock(&sk->sk_callback_lock);
1466
1467         /* Do not wake up a writer until he can make "significant"
1468          * progress.  --DaveM
1469          */
1470         if((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
1471                 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1472                         wake_up_interruptible(sk->sk_sleep);
1473
1474                 /* Should agree with poll, otherwise some programs break */
1475                 if (sock_writeable(sk))
1476                         sk_wake_async(sk, 2, POLL_OUT);
1477         }
1478
1479         read_unlock(&sk->sk_callback_lock);
1480 }
1481
1482 static void sock_def_destruct(struct sock *sk)
1483 {
1484         kfree(sk->sk_protinfo);
1485 }
1486
1487 void sk_send_sigurg(struct sock *sk)
1488 {
1489         if (sk->sk_socket && sk->sk_socket->file)
1490                 if (send_sigurg(&sk->sk_socket->file->f_owner))
1491                         sk_wake_async(sk, 3, POLL_PRI);
1492 }
1493
1494 void sk_reset_timer(struct sock *sk, struct timer_list* timer,
1495                     unsigned long expires)
1496 {
1497         if (!mod_timer(timer, expires))
1498                 sock_hold(sk);
1499 }
1500
1501 EXPORT_SYMBOL(sk_reset_timer);
1502
1503 void sk_stop_timer(struct sock *sk, struct timer_list* timer)
1504 {
1505         if (timer_pending(timer) && del_timer(timer))
1506                 __sock_put(sk);
1507 }
1508
1509 EXPORT_SYMBOL(sk_stop_timer);
1510
1511 void sock_init_data(struct socket *sock, struct sock *sk)
1512 {
1513         skb_queue_head_init(&sk->sk_receive_queue);
1514         skb_queue_head_init(&sk->sk_write_queue);
1515         skb_queue_head_init(&sk->sk_error_queue);
1516 #ifdef CONFIG_NET_DMA
1517         skb_queue_head_init(&sk->sk_async_wait_queue);
1518 #endif
1519
1520         sk->sk_send_head        =       NULL;
1521
1522         init_timer(&sk->sk_timer);
1523         
1524         sk->sk_allocation       =       GFP_KERNEL;
1525         sk->sk_rcvbuf           =       sysctl_rmem_default;
1526         sk->sk_sndbuf           =       sysctl_wmem_default;
1527         sk->sk_state            =       TCP_CLOSE;
1528         sk->sk_socket           =       sock;
1529
1530         sock_set_flag(sk, SOCK_ZAPPED);
1531
1532         if(sock)
1533         {
1534                 sk->sk_type     =       sock->type;
1535                 sk->sk_sleep    =       &sock->wait;
1536                 sock->sk        =       sk;
1537         } else
1538                 sk->sk_sleep    =       NULL;
1539
1540         rwlock_init(&sk->sk_dst_lock);
1541         rwlock_init(&sk->sk_callback_lock);
1542         lockdep_set_class(&sk->sk_callback_lock,
1543                            af_callback_keys + sk->sk_family);
1544
1545         sk->sk_state_change     =       sock_def_wakeup;
1546         sk->sk_data_ready       =       sock_def_readable;
1547         sk->sk_write_space      =       sock_def_write_space;
1548         sk->sk_error_report     =       sock_def_error_report;
1549         sk->sk_destruct         =       sock_def_destruct;
1550
1551         sk->sk_sndmsg_page      =       NULL;
1552         sk->sk_sndmsg_off       =       0;
1553
1554         sk->sk_peercred.pid     =       0;
1555         sk->sk_peercred.uid     =       -1;
1556         sk->sk_peercred.gid     =       -1;
1557         sk->sk_write_pending    =       0;
1558         sk->sk_rcvlowat         =       1;
1559         sk->sk_rcvtimeo         =       MAX_SCHEDULE_TIMEOUT;
1560         sk->sk_sndtimeo         =       MAX_SCHEDULE_TIMEOUT;
1561
1562         sk->sk_stamp.tv_sec     = -1L;
1563         sk->sk_stamp.tv_usec    = -1L;
1564
1565         set_vx_info(&sk->sk_vx_info, current->vx_info);
1566         sk->sk_xid = vx_current_xid();
1567         vx_sock_inc(sk);
1568         set_nx_info(&sk->sk_nx_info, current->nx_info);
1569         sk->sk_nid = nx_current_nid();
1570         atomic_set(&sk->sk_refcnt, 1);
1571 }
1572
1573 void fastcall lock_sock(struct sock *sk)
1574 {
1575         might_sleep();
1576         spin_lock_bh(&sk->sk_lock.slock);
1577         if (sk->sk_lock.owner)
1578                 __lock_sock(sk);
1579         sk->sk_lock.owner = (void *)1;
1580         spin_unlock(&sk->sk_lock.slock);
1581         /*
1582          * The sk_lock has mutex_lock() semantics here:
1583          */
1584         mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
1585         local_bh_enable();
1586 }
1587
1588 EXPORT_SYMBOL(lock_sock);
1589
1590 void fastcall release_sock(struct sock *sk)
1591 {
1592         /*
1593          * The sk_lock has mutex_unlock() semantics:
1594          */
1595         mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
1596
1597         spin_lock_bh(&sk->sk_lock.slock);
1598         if (sk->sk_backlog.tail)
1599                 __release_sock(sk);
1600         sk->sk_lock.owner = NULL;
1601         if (waitqueue_active(&sk->sk_lock.wq))
1602                 wake_up(&sk->sk_lock.wq);
1603         spin_unlock_bh(&sk->sk_lock.slock);
1604 }
1605 EXPORT_SYMBOL(release_sock);
1606
1607 int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
1608
1609         if (!sock_flag(sk, SOCK_TIMESTAMP))
1610                 sock_enable_timestamp(sk);
1611         if (sk->sk_stamp.tv_sec == -1) 
1612                 return -ENOENT;
1613         if (sk->sk_stamp.tv_sec == 0)
1614                 do_gettimeofday(&sk->sk_stamp);
1615         return copy_to_user(userstamp, &sk->sk_stamp, sizeof(struct timeval)) ?
1616                 -EFAULT : 0; 
1617
1618 EXPORT_SYMBOL(sock_get_timestamp);
1619
1620 void sock_enable_timestamp(struct sock *sk)
1621 {       
1622         if (!sock_flag(sk, SOCK_TIMESTAMP)) { 
1623                 sock_set_flag(sk, SOCK_TIMESTAMP);
1624                 net_enable_timestamp();
1625         }
1626 }
1627 EXPORT_SYMBOL(sock_enable_timestamp); 
1628
1629 /*
1630  *      Get a socket option on an socket.
1631  *
1632  *      FIX: POSIX 1003.1g is very ambiguous here. It states that
1633  *      asynchronous errors should be reported by getsockopt. We assume
1634  *      this means if you specify SO_ERROR (otherwise whats the point of it).
1635  */
1636 int sock_common_getsockopt(struct socket *sock, int level, int optname,
1637                            char __user *optval, int __user *optlen)
1638 {
1639         struct sock *sk = sock->sk;
1640
1641         return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1642 }
1643
1644 EXPORT_SYMBOL(sock_common_getsockopt);
1645
1646 #ifdef CONFIG_COMPAT
1647 int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
1648                                   char __user *optval, int __user *optlen)
1649 {
1650         struct sock *sk = sock->sk;
1651
1652         if (sk->sk_prot->compat_setsockopt != NULL)
1653                 return sk->sk_prot->compat_getsockopt(sk, level, optname,
1654                                                       optval, optlen);
1655         return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1656 }
1657 EXPORT_SYMBOL(compat_sock_common_getsockopt);
1658 #endif
1659
1660 int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
1661                         struct msghdr *msg, size_t size, int flags)
1662 {
1663         struct sock *sk = sock->sk;
1664         int addr_len = 0;
1665         int err;
1666
1667         err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
1668                                    flags & ~MSG_DONTWAIT, &addr_len);
1669         if (err >= 0)
1670                 msg->msg_namelen = addr_len;
1671         return err;
1672 }
1673
1674 EXPORT_SYMBOL(sock_common_recvmsg);
1675
1676 /*
1677  *      Set socket options on an inet socket.
1678  */
1679 int sock_common_setsockopt(struct socket *sock, int level, int optname,
1680                            char __user *optval, int optlen)
1681 {
1682         struct sock *sk = sock->sk;
1683
1684         return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1685 }
1686
1687 EXPORT_SYMBOL(sock_common_setsockopt);
1688
1689 #ifdef CONFIG_COMPAT
1690 int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
1691                                   char __user *optval, int optlen)
1692 {
1693         struct sock *sk = sock->sk;
1694
1695         if (sk->sk_prot->compat_setsockopt != NULL)
1696                 return sk->sk_prot->compat_setsockopt(sk, level, optname,
1697                                                       optval, optlen);
1698         return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1699 }
1700 EXPORT_SYMBOL(compat_sock_common_setsockopt);
1701 #endif
1702
1703 void sk_common_release(struct sock *sk)
1704 {
1705         if (sk->sk_prot->destroy)
1706                 sk->sk_prot->destroy(sk);
1707
1708         /*
1709          * Observation: when sock_common_release is called, processes have
1710          * no access to socket. But net still has.
1711          * Step one, detach it from networking:
1712          *
1713          * A. Remove from hash tables.
1714          */
1715
1716         sk->sk_prot->unhash(sk);
1717
1718         /*
1719          * In this point socket cannot receive new packets, but it is possible
1720          * that some packets are in flight because some CPU runs receiver and
1721          * did hash table lookup before we unhashed socket. They will achieve
1722          * receive queue and will be purged by socket destructor.
1723          *
1724          * Also we still have packets pending on receive queue and probably,
1725          * our own packets waiting in device queues. sock_destroy will drain
1726          * receive queue, but transmitted packets will delay socket destruction
1727          * until the last reference will be released.
1728          */
1729
1730         sock_orphan(sk);
1731
1732         xfrm_sk_free_policy(sk);
1733
1734         sk_refcnt_debug_release(sk);
1735         sock_put(sk);
1736 }
1737
1738 EXPORT_SYMBOL(sk_common_release);
1739
1740 static DEFINE_RWLOCK(proto_list_lock);
1741 static LIST_HEAD(proto_list);
1742
1743 int proto_register(struct proto *prot, int alloc_slab)
1744 {
1745         char *request_sock_slab_name = NULL;
1746         char *timewait_sock_slab_name;
1747         int rc = -ENOBUFS;
1748
1749         if (alloc_slab) {
1750                 prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
1751                                                SLAB_HWCACHE_ALIGN, NULL, NULL);
1752
1753                 if (prot->slab == NULL) {
1754                         printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
1755                                prot->name);
1756                         goto out;
1757                 }
1758
1759                 if (prot->rsk_prot != NULL) {
1760                         static const char mask[] = "request_sock_%s";
1761
1762                         request_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1763                         if (request_sock_slab_name == NULL)
1764                                 goto out_free_sock_slab;
1765
1766                         sprintf(request_sock_slab_name, mask, prot->name);
1767                         prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name,
1768                                                                  prot->rsk_prot->obj_size, 0,
1769                                                                  SLAB_HWCACHE_ALIGN, NULL, NULL);
1770
1771                         if (prot->rsk_prot->slab == NULL) {
1772                                 printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n",
1773                                        prot->name);
1774                                 goto out_free_request_sock_slab_name;
1775                         }
1776                 }
1777
1778                 if (prot->twsk_prot != NULL) {
1779                         static const char mask[] = "tw_sock_%s";
1780
1781                         timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1782
1783                         if (timewait_sock_slab_name == NULL)
1784                                 goto out_free_request_sock_slab;
1785
1786                         sprintf(timewait_sock_slab_name, mask, prot->name);
1787                         prot->twsk_prot->twsk_slab =
1788                                 kmem_cache_create(timewait_sock_slab_name,
1789                                                   prot->twsk_prot->twsk_obj_size,
1790                                                   0, SLAB_HWCACHE_ALIGN,
1791                                                   NULL, NULL);
1792                         if (prot->twsk_prot->twsk_slab == NULL)
1793                                 goto out_free_timewait_sock_slab_name;
1794                 }
1795         }
1796
1797         write_lock(&proto_list_lock);
1798         list_add(&prot->node, &proto_list);
1799         write_unlock(&proto_list_lock);
1800         rc = 0;
1801 out:
1802         return rc;
1803 out_free_timewait_sock_slab_name:
1804         kfree(timewait_sock_slab_name);
1805 out_free_request_sock_slab:
1806         if (prot->rsk_prot && prot->rsk_prot->slab) {
1807                 kmem_cache_destroy(prot->rsk_prot->slab);
1808                 prot->rsk_prot->slab = NULL;
1809         }
1810 out_free_request_sock_slab_name:
1811         kfree(request_sock_slab_name);
1812 out_free_sock_slab:
1813         kmem_cache_destroy(prot->slab);
1814         prot->slab = NULL;
1815         goto out;
1816 }
1817
1818 EXPORT_SYMBOL(proto_register);
1819
1820 void proto_unregister(struct proto *prot)
1821 {
1822         write_lock(&proto_list_lock);
1823         list_del(&prot->node);
1824         write_unlock(&proto_list_lock);
1825
1826         if (prot->slab != NULL) {
1827                 kmem_cache_destroy(prot->slab);
1828                 prot->slab = NULL;
1829         }
1830
1831         if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
1832                 const char *name = kmem_cache_name(prot->rsk_prot->slab);
1833
1834                 kmem_cache_destroy(prot->rsk_prot->slab);
1835                 kfree(name);
1836                 prot->rsk_prot->slab = NULL;
1837         }
1838
1839         if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
1840                 const char *name = kmem_cache_name(prot->twsk_prot->twsk_slab);
1841
1842                 kmem_cache_destroy(prot->twsk_prot->twsk_slab);
1843                 kfree(name);
1844                 prot->twsk_prot->twsk_slab = NULL;
1845         }
1846 }
1847
1848 EXPORT_SYMBOL(proto_unregister);
1849
1850 #ifdef CONFIG_PROC_FS
1851 static inline struct proto *__proto_head(void)
1852 {
1853         return list_entry(proto_list.next, struct proto, node);
1854 }
1855
1856 static inline struct proto *proto_head(void)
1857 {
1858         return list_empty(&proto_list) ? NULL : __proto_head();
1859 }
1860
1861 static inline struct proto *proto_next(struct proto *proto)
1862 {
1863         return proto->node.next == &proto_list ? NULL :
1864                 list_entry(proto->node.next, struct proto, node);
1865 }
1866
1867 static inline struct proto *proto_get_idx(loff_t pos)
1868 {
1869         struct proto *proto;
1870         loff_t i = 0;
1871
1872         list_for_each_entry(proto, &proto_list, node)
1873                 if (i++ == pos)
1874                         goto out;
1875
1876         proto = NULL;
1877 out:
1878         return proto;
1879 }
1880
1881 static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
1882 {
1883         read_lock(&proto_list_lock);
1884         return *pos ? proto_get_idx(*pos - 1) : SEQ_START_TOKEN;
1885 }
1886
1887 static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1888 {
1889         ++*pos;
1890         return v == SEQ_START_TOKEN ? proto_head() : proto_next(v);
1891 }
1892
1893 static void proto_seq_stop(struct seq_file *seq, void *v)
1894 {
1895         read_unlock(&proto_list_lock);
1896 }
1897
1898 static char proto_method_implemented(const void *method)
1899 {
1900         return method == NULL ? 'n' : 'y';
1901 }
1902
1903 static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
1904 {
1905         seq_printf(seq, "%-9s %4u %6d  %6d   %-3s %6u   %-3s  %-10s "
1906                         "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
1907                    proto->name,
1908                    proto->obj_size,
1909                    proto->sockets_allocated != NULL ? atomic_read(proto->sockets_allocated) : -1,
1910                    proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
1911                    proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
1912                    proto->max_header,
1913                    proto->slab == NULL ? "no" : "yes",
1914                    module_name(proto->owner),
1915                    proto_method_implemented(proto->close),
1916                    proto_method_implemented(proto->connect),
1917                    proto_method_implemented(proto->disconnect),
1918                    proto_method_implemented(proto->accept),
1919                    proto_method_implemented(proto->ioctl),
1920                    proto_method_implemented(proto->init),
1921                    proto_method_implemented(proto->destroy),
1922                    proto_method_implemented(proto->shutdown),
1923                    proto_method_implemented(proto->setsockopt),
1924                    proto_method_implemented(proto->getsockopt),
1925                    proto_method_implemented(proto->sendmsg),
1926                    proto_method_implemented(proto->recvmsg),
1927                    proto_method_implemented(proto->sendpage),
1928                    proto_method_implemented(proto->bind),
1929                    proto_method_implemented(proto->backlog_rcv),
1930                    proto_method_implemented(proto->hash),
1931                    proto_method_implemented(proto->unhash),
1932                    proto_method_implemented(proto->get_port),
1933                    proto_method_implemented(proto->enter_memory_pressure));
1934 }
1935
1936 static int proto_seq_show(struct seq_file *seq, void *v)
1937 {
1938         if (v == SEQ_START_TOKEN)
1939                 seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
1940                            "protocol",
1941                            "size",
1942                            "sockets",
1943                            "memory",
1944                            "press",
1945                            "maxhdr",
1946                            "slab",
1947                            "module",
1948                            "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
1949         else
1950                 proto_seq_printf(seq, v);
1951         return 0;
1952 }
1953
1954 static struct seq_operations proto_seq_ops = {
1955         .start  = proto_seq_start,
1956         .next   = proto_seq_next,
1957         .stop   = proto_seq_stop,
1958         .show   = proto_seq_show,
1959 };
1960
1961 static int proto_seq_open(struct inode *inode, struct file *file)
1962 {
1963         return seq_open(file, &proto_seq_ops);
1964 }
1965
1966 static struct file_operations proto_seq_fops = {
1967         .owner          = THIS_MODULE,
1968         .open           = proto_seq_open,
1969         .read           = seq_read,
1970         .llseek         = seq_lseek,
1971         .release        = seq_release,
1972 };
1973
1974 static int __init proto_init(void)
1975 {
1976         /* register /proc/net/protocols */
1977         return proc_net_fops_create("protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0;
1978 }
1979
1980 subsys_initcall(proto_init);
1981
1982 #endif /* PROC_FS */
1983
1984 EXPORT_SYMBOL(sk_alloc);
1985 EXPORT_SYMBOL(sk_free);
1986 EXPORT_SYMBOL(sk_send_sigurg);
1987 EXPORT_SYMBOL(sock_alloc_send_skb);
1988 EXPORT_SYMBOL(sock_init_data);
1989 EXPORT_SYMBOL(sock_kfree_s);
1990 EXPORT_SYMBOL(sock_kmalloc);
1991 EXPORT_SYMBOL(sock_no_accept);
1992 EXPORT_SYMBOL(sock_no_bind);
1993 EXPORT_SYMBOL(sock_no_connect);
1994 EXPORT_SYMBOL(sock_no_getname);
1995 EXPORT_SYMBOL(sock_no_getsockopt);
1996 EXPORT_SYMBOL(sock_no_ioctl);
1997 EXPORT_SYMBOL(sock_no_listen);
1998 EXPORT_SYMBOL(sock_no_mmap);
1999 EXPORT_SYMBOL(sock_no_poll);
2000 EXPORT_SYMBOL(sock_no_recvmsg);
2001 EXPORT_SYMBOL(sock_no_sendmsg);
2002 EXPORT_SYMBOL(sock_no_sendpage);
2003 EXPORT_SYMBOL(sock_no_setsockopt);
2004 EXPORT_SYMBOL(sock_no_shutdown);
2005 EXPORT_SYMBOL(sock_no_socketpair);
2006 EXPORT_SYMBOL(sock_rfree);
2007 EXPORT_SYMBOL(sock_setsockopt);
2008 EXPORT_SYMBOL(sock_wfree);
2009 EXPORT_SYMBOL(sock_wmalloc);
2010 EXPORT_SYMBOL(sock_i_uid);
2011 EXPORT_SYMBOL(sock_i_ino);
2012 EXPORT_SYMBOL(sysctl_optmem_max);
2013 #ifdef CONFIG_SYSCTL
2014 EXPORT_SYMBOL(sysctl_rmem_max);
2015 EXPORT_SYMBOL(sysctl_wmem_max);
2016 #endif