upgrade to linux 2.6.10-1.12_FC2
[linux-2.6.git] / net / packet / af_packet.c
1 /*
2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
3  *              operating system.  INET is implemented using the  BSD Socket
4  *              interface as the means of communication with the user level.
5  *
6  *              PACKET - implements raw packet sockets.
7  *
8  * Version:     $Id: af_packet.c,v 1.61 2002/02/08 03:57:19 davem Exp $
9  *
10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
13  *
14  * Fixes:       
15  *              Alan Cox        :       verify_area() now used correctly
16  *              Alan Cox        :       new skbuff lists, look ma no backlogs!
17  *              Alan Cox        :       tidied skbuff lists.
18  *              Alan Cox        :       Now uses generic datagram routines I
19  *                                      added. Also fixed the peek/read crash
20  *                                      from all old Linux datagram code.
21  *              Alan Cox        :       Uses the improved datagram code.
22  *              Alan Cox        :       Added NULL's for socket options.
23  *              Alan Cox        :       Re-commented the code.
24  *              Alan Cox        :       Use new kernel side addressing
25  *              Rob Janssen     :       Correct MTU usage.
26  *              Dave Platt      :       Counter leaks caused by incorrect
27  *                                      interrupt locking and some slightly
28  *                                      dubious gcc output. Can you read
29  *                                      compiler: it said _VOLATILE_
30  *      Richard Kooijman        :       Timestamp fixes.
31  *              Alan Cox        :       New buffers. Use sk->mac.raw.
32  *              Alan Cox        :       sendmsg/recvmsg support.
33  *              Alan Cox        :       Protocol setting support
34  *      Alexey Kuznetsov        :       Untied from IPv4 stack.
35  *      Cyrus Durgin            :       Fixed kerneld for kmod.
36  *      Michal Ostrowski        :       Module initialization cleanup.
37  *         Ulises Alonso        :       Frame number limit removal and 
38  *                                      packet_set_ring memory leak.
39  *
40  *              This program is free software; you can redistribute it and/or
41  *              modify it under the terms of the GNU General Public License
42  *              as published by the Free Software Foundation; either version
43  *              2 of the License, or (at your option) any later version.
44  *
45  */
46  
47 #include <linux/config.h>
48 #include <linux/types.h>
49 #include <linux/sched.h>
50 #include <linux/mm.h>
51 #include <linux/fcntl.h>
52 #include <linux/socket.h>
53 #include <linux/in.h>
54 #include <linux/inet.h>
55 #include <linux/netdevice.h>
56 #include <linux/if_packet.h>
57 #include <linux/wireless.h>
58 #include <linux/kmod.h>
59 #include <net/ip.h>
60 #include <net/protocol.h>
61 #include <linux/skbuff.h>
62 #include <net/sock.h>
63 #include <linux/errno.h>
64 #include <linux/timer.h>
65 #include <asm/system.h>
66 #include <asm/uaccess.h>
67 #include <asm/ioctls.h>
68 #include <asm/page.h>
69 #include <asm/io.h>
70 #include <linux/proc_fs.h>
71 #include <linux/seq_file.h>
72 #include <linux/poll.h>
73 #include <linux/module.h>
74 #include <linux/init.h>
75
76 #ifdef CONFIG_INET
77 #include <net/inet_common.h>
78 #endif
79
80 #define CONFIG_SOCK_PACKET      1
81
82 /*
83    Proposed replacement for SIOC{ADD,DEL}MULTI and
84    IFF_PROMISC, IFF_ALLMULTI flags.
85
86    It is more expensive, but I believe,
87    it is really correct solution: reentereble, safe and fault tolerant.
88
89    IFF_PROMISC/IFF_ALLMULTI/SIOC{ADD/DEL}MULTI are faked by keeping
90    reference count and global flag, so that real status is
91    (gflag|(count != 0)), so that we can use obsolete faulty interface
92    not harming clever users.
93  */
94 #define CONFIG_PACKET_MULTICAST 1
95
96 /*
97    Assumptions:
98    - if device has no dev->hard_header routine, it adds and removes ll header
99      inside itself. In this case ll header is invisible outside of device,
100      but higher levels still should reserve dev->hard_header_len.
101      Some devices are enough clever to reallocate skb, when header
102      will not fit to reserved space (tunnel), another ones are silly
103      (PPP).
104    - packet socket receives packets with pulled ll header,
105      so that SOCK_RAW should push it back.
106
107 On receive:
108 -----------
109
110 Incoming, dev->hard_header!=NULL
111    mac.raw -> ll header
112    data    -> data
113
114 Outgoing, dev->hard_header!=NULL
115    mac.raw -> ll header
116    data    -> ll header
117
118 Incoming, dev->hard_header==NULL
119    mac.raw -> UNKNOWN position. It is very likely, that it points to ll header.
120               PPP makes it, that is wrong, because introduce assymetry
121               between rx and tx paths.
122    data    -> data
123
124 Outgoing, dev->hard_header==NULL
125    mac.raw -> data. ll header is still not built!
126    data    -> data
127
128 Resume
129   If dev->hard_header==NULL we are unlikely to restore sensible ll header.
130
131
132 On transmit:
133 ------------
134
135 dev->hard_header != NULL
136    mac.raw -> ll header
137    data    -> ll header
138
139 dev->hard_header == NULL (ll header is added by device, we cannot control it)
140    mac.raw -> data
141    data -> data
142
143    We should set nh.raw on output to correct posistion,
144    packet classifier depends on it.
145  */
146
147 /* List of all packet sockets. */
148 HLIST_HEAD(packet_sklist);
149 static rwlock_t packet_sklist_lock = RW_LOCK_UNLOCKED;
150
151 atomic_t packet_socks_nr;
152
153
154 /* Private packet socket structures. */
155
156 #ifdef CONFIG_PACKET_MULTICAST
157 struct packet_mclist
158 {
159         struct packet_mclist    *next;
160         int                     ifindex;
161         int                     count;
162         unsigned short          type;
163         unsigned short          alen;
164         unsigned char           addr[8];
165 };
166 #endif
167 #ifdef CONFIG_PACKET_MMAP
168 static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing);
169 #endif
170
171 static void packet_flush_mclist(struct sock *sk);
172
173 struct packet_opt
174 {
175         struct tpacket_stats    stats;
176 #ifdef CONFIG_PACKET_MMAP
177         char *                  *pg_vec;
178         unsigned int            head;
179         unsigned int            frames_per_block;
180         unsigned int            frame_size;
181         unsigned int            frame_max;
182         int                     copy_thresh;
183 #endif
184         struct packet_type      prot_hook;
185         spinlock_t              bind_lock;
186         char                    running;        /* prot_hook is attached*/
187         int                     ifindex;        /* bound device         */
188         unsigned short          num;
189 #ifdef CONFIG_PACKET_MULTICAST
190         struct packet_mclist    *mclist;
191 #endif
192 #ifdef CONFIG_PACKET_MMAP
193         atomic_t                mapped;
194         unsigned int            pg_vec_order;
195         unsigned int            pg_vec_pages;
196         unsigned int            pg_vec_len;
197 #endif
198 };
199
200 #ifdef CONFIG_PACKET_MMAP
201
202 static inline char *packet_lookup_frame(struct packet_opt *po, unsigned int position)
203 {
204         unsigned int pg_vec_pos, frame_offset;
205         char *frame;
206
207         pg_vec_pos = position / po->frames_per_block;
208         frame_offset = position % po->frames_per_block;
209
210         frame = po->pg_vec[pg_vec_pos] + (frame_offset * po->frame_size);
211         
212         return frame;
213 }
214 #endif
215
216 #define pkt_sk(__sk) ((struct packet_opt *)(__sk)->sk_protinfo)
217
218 void packet_sock_destruct(struct sock *sk)
219 {
220         BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
221         BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
222         BUG_ON(sk->sk_nx_info);
223         BUG_ON(sk->sk_vx_info);
224
225         if (!sock_flag(sk, SOCK_DEAD)) {
226                 printk("Attempt to release alive packet socket: %p\n", sk);
227                 return;
228         }
229
230         if (pkt_sk(sk))
231                 kfree(pkt_sk(sk));
232         atomic_dec(&packet_socks_nr);
233 #ifdef PACKET_REFCNT_DEBUG
234         printk(KERN_DEBUG "PACKET socket %p is free, %d are alive\n", sk, atomic_read(&packet_socks_nr));
235 #endif
236 }
237
238
239 extern struct proto_ops packet_ops;
240
241 #ifdef CONFIG_SOCK_PACKET
242 extern struct proto_ops packet_ops_spkt;
243
244 static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt)
245 {
246         struct sock *sk;
247         struct sockaddr_pkt *spkt;
248
249         /*
250          *      When we registered the protocol we saved the socket in the data
251          *      field for just this event.
252          */
253
254         sk = pt->af_packet_priv;
255         
256         /*
257          *      Yank back the headers [hope the device set this
258          *      right or kerboom...]
259          *
260          *      Incoming packets have ll header pulled,
261          *      push it back.
262          *
263          *      For outgoing ones skb->data == skb->mac.raw
264          *      so that this procedure is noop.
265          */
266
267         if (skb->pkt_type == PACKET_LOOPBACK)
268                 goto out;
269
270         if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
271                 goto oom;
272
273         /* drop any routing info */
274         dst_release(skb->dst);
275         skb->dst = NULL;
276
277         spkt = (struct sockaddr_pkt*)skb->cb;
278
279         skb_push(skb, skb->data-skb->mac.raw);
280
281         /*
282          *      The SOCK_PACKET socket receives _all_ frames.
283          */
284
285         spkt->spkt_family = dev->type;
286         strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
287         spkt->spkt_protocol = skb->protocol;
288
289         /*
290          *      Charge the memory to the socket. This is done specifically
291          *      to prevent sockets using all the memory up.
292          */
293
294         if (sock_queue_rcv_skb(sk,skb) == 0)
295                 return 0;
296
297 out:
298         kfree_skb(skb);
299 oom:
300         return 0;
301 }
302
303
304 /*
305  *      Output a raw packet to a device layer. This bypasses all the other
306  *      protocol layers and you must therefore supply it with a complete frame
307  */
308  
309 static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
310                                struct msghdr *msg, size_t len)
311 {
312         struct sock *sk = sock->sk;
313         struct sockaddr_pkt *saddr=(struct sockaddr_pkt *)msg->msg_name;
314         struct sk_buff *skb;
315         struct net_device *dev;
316         unsigned short proto=0;
317         int err;
318         
319         /*
320          *      Get and verify the address. 
321          */
322
323         if (saddr)
324         {
325                 if (msg->msg_namelen < sizeof(struct sockaddr))
326                         return(-EINVAL);
327                 if (msg->msg_namelen==sizeof(struct sockaddr_pkt))
328                         proto=saddr->spkt_protocol;
329         }
330         else
331                 return(-ENOTCONN);      /* SOCK_PACKET must be sent giving an address */
332
333         /*
334          *      Find the device first to size check it 
335          */
336
337         saddr->spkt_device[13] = 0;
338         dev = dev_get_by_name(saddr->spkt_device);
339         err = -ENODEV;
340         if (dev == NULL)
341                 goto out_unlock;
342         
343         /*
344          *      You may not queue a frame bigger than the mtu. This is the lowest level
345          *      raw protocol and you must do your own fragmentation at this level.
346          */
347          
348         err = -EMSGSIZE;
349         if(len>dev->mtu+dev->hard_header_len)
350                 goto out_unlock;
351
352         err = -ENOBUFS;
353         skb = sock_wmalloc(sk, len + LL_RESERVED_SPACE(dev), 0, GFP_KERNEL);
354
355         /*
356          *      If the write buffer is full, then tough. At this level the user gets to
357          *      deal with the problem - do your own algorithmic backoffs. That's far
358          *      more flexible.
359          */
360          
361         if (skb == NULL) 
362                 goto out_unlock;
363
364         /*
365          *      Fill it in 
366          */
367          
368         /* FIXME: Save some space for broken drivers that write a
369          * hard header at transmission time by themselves. PPP is the
370          * notable one here. This should really be fixed at the driver level.
371          */
372         skb_reserve(skb, LL_RESERVED_SPACE(dev));
373         skb->nh.raw = skb->data;
374
375         /* Try to align data part correctly */
376         if (dev->hard_header) {
377                 skb->data -= dev->hard_header_len;
378                 skb->tail -= dev->hard_header_len;
379                 if (len < dev->hard_header_len)
380                         skb->nh.raw = skb->data;
381         }
382
383         /* Returns -EFAULT on error */
384         err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
385         skb->protocol = proto;
386         skb->dev = dev;
387         skb->priority = sk->sk_priority;
388         if (err)
389                 goto out_free;
390
391         err = -ENETDOWN;
392         if (!(dev->flags & IFF_UP))
393                 goto out_free;
394
395         /*
396          *      Now send it
397          */
398
399         dev_queue_xmit(skb);
400         dev_put(dev);
401         return(len);
402
403 out_free:
404         kfree_skb(skb);
405 out_unlock:
406         if (dev)
407                 dev_put(dev);
408         return err;
409 }
410 #endif
411
412 static inline unsigned run_filter(struct sk_buff *skb, struct sock *sk, unsigned res)
413 {
414         struct sk_filter *filter;
415
416         bh_lock_sock(sk);
417         filter = sk->sk_filter;
418         /*
419          * Our caller already checked that filter != NULL but we need to
420          * verify that under bh_lock_sock() to be safe
421          */
422         if (likely(filter != NULL))
423                 res = sk_run_filter(skb, filter->insns, filter->len);
424         bh_unlock_sock(sk);
425
426         return res;
427 }
428
429 /*
430    This function makes lazy skb cloning in hope that most of packets
431    are discarded by BPF.
432
433    Note tricky part: we DO mangle shared skb! skb->data, skb->len
434    and skb->cb are mangled. It works because (and until) packets
435    falling here are owned by current CPU. Output packets are cloned
436    by dev_queue_xmit_nit(), input packets are processed by net_bh
437    sequencially, so that if we return skb to original state on exit,
438    we will not harm anyone.
439  */
440
441 static int packet_rcv(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt)
442 {
443         struct sock *sk;
444         struct sockaddr_ll *sll;
445         struct packet_opt *po;
446         u8 * skb_head = skb->data;
447         int skb_len = skb->len;
448         unsigned snaplen;
449
450         if (skb->pkt_type == PACKET_LOOPBACK)
451                 goto drop;
452
453         sk = pt->af_packet_priv;
454         po = pkt_sk(sk);
455
456 #if defined(CONFIG_VNET) || defined(CONFIG_VNET_MODULE)
457         if (vnet_active &&
458             (int) sk->sk_xid > 0 && sk->sk_xid != skb->xid)
459                 goto drop;
460 #endif
461
462         skb->dev = dev;
463
464         if (dev->hard_header) {
465                 /* The device has an explicit notion of ll header,
466                    exported to higher levels.
467
468                    Otherwise, the device hides datails of it frame
469                    structure, so that corresponding packet head
470                    never delivered to user.
471                  */
472                 if (sk->sk_type != SOCK_DGRAM)
473                         skb_push(skb, skb->data - skb->mac.raw);
474                 else if (skb->pkt_type == PACKET_OUTGOING) {
475                         /* Special case: outgoing packets have ll header at head */
476                         skb_pull(skb, skb->nh.raw - skb->data);
477                 }
478         }
479
480         snaplen = skb->len;
481
482         if (sk->sk_filter) {
483                 unsigned res = run_filter(skb, sk, snaplen);
484                 if (res == 0)
485                         goto drop_n_restore;
486                 if (snaplen > res)
487                         snaplen = res;
488         }
489
490         if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
491             (unsigned)sk->sk_rcvbuf)
492                 goto drop_n_acct;
493
494         if (skb_shared(skb)) {
495                 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
496                 if (nskb == NULL)
497                         goto drop_n_acct;
498
499                 if (skb_head != skb->data) {
500                         skb->data = skb_head;
501                         skb->len = skb_len;
502                 }
503                 kfree_skb(skb);
504                 skb = nskb;
505         }
506
507         sll = (struct sockaddr_ll*)skb->cb;
508         sll->sll_family = AF_PACKET;
509         sll->sll_hatype = dev->type;
510         sll->sll_protocol = skb->protocol;
511         sll->sll_pkttype = skb->pkt_type;
512         sll->sll_ifindex = dev->ifindex;
513         sll->sll_halen = 0;
514
515         if (dev->hard_header_parse)
516                 sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
517
518         if (pskb_trim(skb, snaplen))
519                 goto drop_n_acct;
520
521         skb_set_owner_r(skb, sk);
522         skb->dev = NULL;
523         dst_release(skb->dst);
524         skb->dst = NULL;
525
526         spin_lock(&sk->sk_receive_queue.lock);
527         po->stats.tp_packets++;
528         __skb_queue_tail(&sk->sk_receive_queue, skb);
529         spin_unlock(&sk->sk_receive_queue.lock);
530         sk->sk_data_ready(sk, skb->len);
531         return 0;
532
533 drop_n_acct:
534         spin_lock(&sk->sk_receive_queue.lock);
535         po->stats.tp_drops++;
536         spin_unlock(&sk->sk_receive_queue.lock);
537
538 drop_n_restore:
539         if (skb_head != skb->data && skb_shared(skb)) {
540                 skb->data = skb_head;
541                 skb->len = skb_len;
542         }
543 drop:
544         kfree_skb(skb);
545         return 0;
546 }
547
548 #ifdef CONFIG_PACKET_MMAP
549 static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt)
550 {
551         struct sock *sk;
552         struct packet_opt *po;
553         struct sockaddr_ll *sll;
554         struct tpacket_hdr *h;
555         u8 * skb_head = skb->data;
556         int skb_len = skb->len;
557         unsigned snaplen;
558         unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
559         unsigned short macoff, netoff;
560         struct sk_buff *copy_skb = NULL;
561
562         if (skb->pkt_type == PACKET_LOOPBACK)
563                 goto drop;
564
565         sk = pt->af_packet_priv;
566         po = pkt_sk(sk);
567
568         if (dev->hard_header) {
569                 if (sk->sk_type != SOCK_DGRAM)
570                         skb_push(skb, skb->data - skb->mac.raw);
571                 else if (skb->pkt_type == PACKET_OUTGOING) {
572                         /* Special case: outgoing packets have ll header at head */
573                         skb_pull(skb, skb->nh.raw - skb->data);
574                         if (skb->ip_summed == CHECKSUM_HW)
575                                 status |= TP_STATUS_CSUMNOTREADY;
576                 }
577         }
578
579         snaplen = skb->len;
580
581         if (sk->sk_filter) {
582                 unsigned res = run_filter(skb, sk, snaplen);
583                 if (res == 0)
584                         goto drop_n_restore;
585                 if (snaplen > res)
586                         snaplen = res;
587         }
588
589         if (sk->sk_type == SOCK_DGRAM) {
590                 macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
591         } else {
592                 unsigned maclen = skb->nh.raw - skb->data;
593                 netoff = TPACKET_ALIGN(TPACKET_HDRLEN + (maclen < 16 ? 16 : maclen));
594                 macoff = netoff - maclen;
595         }
596
597         if (macoff + snaplen > po->frame_size) {
598                 if (po->copy_thresh &&
599                     atomic_read(&sk->sk_rmem_alloc) + skb->truesize <
600                     (unsigned)sk->sk_rcvbuf) {
601                         if (skb_shared(skb)) {
602                                 copy_skb = skb_clone(skb, GFP_ATOMIC);
603                         } else {
604                                 copy_skb = skb_get(skb);
605                                 skb_head = skb->data;
606                         }
607                         if (copy_skb)
608                                 skb_set_owner_r(copy_skb, sk);
609                 }
610                 snaplen = po->frame_size - macoff;
611                 if ((int)snaplen < 0)
612                         snaplen = 0;
613         }
614         if (snaplen > skb->len-skb->data_len)
615                 snaplen = skb->len-skb->data_len;
616
617         spin_lock(&sk->sk_receive_queue.lock);
618         h = (struct tpacket_hdr *)packet_lookup_frame(po, po->head);
619         
620         if (h->tp_status)
621                 goto ring_is_full;
622         po->head = po->head != po->frame_max ? po->head+1 : 0;
623         po->stats.tp_packets++;
624         if (copy_skb) {
625                 status |= TP_STATUS_COPY;
626                 __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
627         }
628         if (!po->stats.tp_drops)
629                 status &= ~TP_STATUS_LOSING;
630         spin_unlock(&sk->sk_receive_queue.lock);
631
632         memcpy((u8*)h + macoff, skb->data, snaplen);
633
634         h->tp_len = skb->len;
635         h->tp_snaplen = snaplen;
636         h->tp_mac = macoff;
637         h->tp_net = netoff;
638         if (skb->stamp.tv_sec == 0) { 
639                 do_gettimeofday(&skb->stamp);
640                 sock_enable_timestamp(sk);
641         }
642         h->tp_sec = skb->stamp.tv_sec;
643         h->tp_usec = skb->stamp.tv_usec;
644
645         sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h)));
646         sll->sll_halen = 0;
647         if (dev->hard_header_parse)
648                 sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
649         sll->sll_family = AF_PACKET;
650         sll->sll_hatype = dev->type;
651         sll->sll_protocol = skb->protocol;
652         sll->sll_pkttype = skb->pkt_type;
653         sll->sll_ifindex = dev->ifindex;
654
655         h->tp_status = status;
656         mb();
657
658         {
659                 struct page *p_start, *p_end;
660                 u8 *h_end = (u8 *)h + macoff + snaplen - 1;
661
662                 p_start = virt_to_page(h);
663                 p_end = virt_to_page(h_end);
664                 while (p_start <= p_end) {
665                         flush_dcache_page(p_start);
666                         p_start++;
667                 }
668         }
669
670         sk->sk_data_ready(sk, 0);
671
672 drop_n_restore:
673         if (skb_head != skb->data && skb_shared(skb)) {
674                 skb->data = skb_head;
675                 skb->len = skb_len;
676         }
677 drop:
678         kfree_skb(skb);
679         return 0;
680
681 ring_is_full:
682         po->stats.tp_drops++;
683         spin_unlock(&sk->sk_receive_queue.lock);
684
685         sk->sk_data_ready(sk, 0);
686         if (copy_skb)
687                 kfree_skb(copy_skb);
688         goto drop_n_restore;
689 }
690
691 #endif
692
693
694 static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
695                           struct msghdr *msg, size_t len)
696 {
697         struct sock *sk = sock->sk;
698         struct sockaddr_ll *saddr=(struct sockaddr_ll *)msg->msg_name;
699         struct sk_buff *skb;
700         struct net_device *dev;
701         unsigned short proto;
702         unsigned char *addr;
703         int ifindex, err, reserve = 0;
704
705         /*
706          *      Get and verify the address. 
707          */
708          
709         if (saddr == NULL) {
710                 struct packet_opt *po = pkt_sk(sk);
711
712                 ifindex = po->ifindex;
713                 proto   = po->num;
714                 addr    = NULL;
715         } else {
716                 err = -EINVAL;
717                 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
718                         goto out;
719                 ifindex = saddr->sll_ifindex;
720                 proto   = saddr->sll_protocol;
721                 addr    = saddr->sll_addr;
722         }
723
724
725         dev = dev_get_by_index(ifindex);
726         err = -ENXIO;
727         if (dev == NULL)
728                 goto out_unlock;
729         if (sock->type == SOCK_RAW)
730                 reserve = dev->hard_header_len;
731
732         err = -EMSGSIZE;
733         if (len > dev->mtu+reserve)
734                 goto out_unlock;
735
736         skb = sock_alloc_send_skb(sk, len + LL_RESERVED_SPACE(dev),
737                                 msg->msg_flags & MSG_DONTWAIT, &err);
738         if (skb==NULL)
739                 goto out_unlock;
740
741         skb_reserve(skb, LL_RESERVED_SPACE(dev));
742         skb->nh.raw = skb->data;
743
744         if (dev->hard_header) {
745                 int res;
746                 err = -EINVAL;
747                 res = dev->hard_header(skb, dev, ntohs(proto), addr, NULL, len);
748                 if (sock->type != SOCK_DGRAM) {
749                         skb->tail = skb->data;
750                         skb->len = 0;
751                 } else if (res < 0)
752                         goto out_free;
753         }
754
755         /* Returns -EFAULT on error */
756         err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
757         if (err)
758                 goto out_free;
759
760         skb->protocol = proto;
761         skb->dev = dev;
762         skb->priority = sk->sk_priority;
763
764         err = -ENETDOWN;
765         if (!(dev->flags & IFF_UP))
766                 goto out_free;
767
768         /*
769          *      Now send it
770          */
771
772         err = dev_queue_xmit(skb);
773         if (err > 0 && (err = net_xmit_errno(err)) != 0)
774                 goto out_unlock;
775
776         dev_put(dev);
777
778         return(len);
779
780 out_free:
781         kfree_skb(skb);
782 out_unlock:
783         if (dev)
784                 dev_put(dev);
785 out:
786         return err;
787 }
788
789 /*
790  *      Close a PACKET socket. This is fairly simple. We immediately go
791  *      to 'closed' state and remove our protocol entry in the device list.
792  */
793
794 static int packet_release(struct socket *sock)
795 {
796         struct sock *sk = sock->sk;
797         struct packet_opt *po;
798
799         if (!sk)
800                 return 0;
801
802         po = pkt_sk(sk);
803
804         write_lock_bh(&packet_sklist_lock);
805         sk_del_node_init(sk);
806         write_unlock_bh(&packet_sklist_lock);
807
808         /*
809          *      Unhook packet receive handler.
810          */
811
812         if (po->running) {
813                 /*
814                  *      Remove the protocol hook
815                  */
816                 dev_remove_pack(&po->prot_hook);
817                 po->running = 0;
818                 po->num = 0;
819                 __sock_put(sk);
820         }
821
822 #ifdef CONFIG_PACKET_MULTICAST
823         packet_flush_mclist(sk);
824 #endif
825
826 #ifdef CONFIG_PACKET_MMAP
827         if (po->pg_vec) {
828                 struct tpacket_req req;
829                 memset(&req, 0, sizeof(req));
830                 packet_set_ring(sk, &req, 1);
831         }
832 #endif
833
834         clr_vx_info(&sk->sk_vx_info);
835         clr_nx_info(&sk->sk_nx_info);
836
837         /*
838          *      Now the socket is dead. No more input will appear.
839          */
840
841         sock_orphan(sk);
842         sock->sk = NULL;
843
844         /* Purge queues */
845
846         skb_queue_purge(&sk->sk_receive_queue);
847
848         sock_put(sk);
849         return 0;
850 }
851
852 /*
853  *      Attach a packet hook.
854  */
855
856 static int packet_do_bind(struct sock *sk, struct net_device *dev, int protocol)
857 {
858         struct packet_opt *po = pkt_sk(sk);
859         /*
860          *      Detach an existing hook if present.
861          */
862
863         lock_sock(sk);
864
865         spin_lock(&po->bind_lock);
866         if (po->running) {
867                 __sock_put(sk);
868                 po->running = 0;
869                 po->num = 0;
870                 spin_unlock(&po->bind_lock);
871                 dev_remove_pack(&po->prot_hook);
872                 spin_lock(&po->bind_lock);
873         }
874
875         po->num = protocol;
876         po->prot_hook.type = protocol;
877         po->prot_hook.dev = dev;
878
879         po->ifindex = dev ? dev->ifindex : 0;
880
881         if (protocol == 0)
882                 goto out_unlock;
883
884         if (dev) {
885                 if (dev->flags&IFF_UP) {
886                         dev_add_pack(&po->prot_hook);
887                         sock_hold(sk);
888                         po->running = 1;
889                 } else {
890                         sk->sk_err = ENETDOWN;
891                         if (!sock_flag(sk, SOCK_DEAD))
892                                 sk->sk_error_report(sk);
893                 }
894         } else {
895                 dev_add_pack(&po->prot_hook);
896                 sock_hold(sk);
897                 po->running = 1;
898         }
899
900 out_unlock:
901         spin_unlock(&po->bind_lock);
902         release_sock(sk);
903         return 0;
904 }
905
906 /*
907  *      Bind a packet socket to a device
908  */
909
910 #ifdef CONFIG_SOCK_PACKET
911
912 static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len)
913 {
914         struct sock *sk=sock->sk;
915         char name[15];
916         struct net_device *dev;
917         int err = -ENODEV;
918         
919         /*
920          *      Check legality
921          */
922          
923         if(addr_len!=sizeof(struct sockaddr))
924                 return -EINVAL;
925         strlcpy(name,uaddr->sa_data,sizeof(name));
926
927         dev = dev_get_by_name(name);
928         if (dev) {
929                 err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
930                 dev_put(dev);
931         }
932         return err;
933 }
934 #endif
935
936 static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
937 {
938         struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
939         struct sock *sk=sock->sk;
940         struct net_device *dev = NULL;
941         int err;
942
943
944         /*
945          *      Check legality
946          */
947          
948         if (addr_len < sizeof(struct sockaddr_ll))
949                 return -EINVAL;
950         if (sll->sll_family != AF_PACKET)
951                 return -EINVAL;
952
953         if (sll->sll_ifindex) {
954                 err = -ENODEV;
955                 dev = dev_get_by_index(sll->sll_ifindex);
956                 if (dev == NULL)
957                         goto out;
958         }
959         err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
960         if (dev)
961                 dev_put(dev);
962
963 out:
964         return err;
965 }
966
967
968 /*
969  *      Create a packet of type SOCK_PACKET. 
970  */
971
972 static int packet_create(struct socket *sock, int protocol)
973 {
974         struct sock *sk;
975         struct packet_opt *po;
976         int err;
977
978         if (!capable(CAP_NET_RAW))
979                 return -EPERM;
980         if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW
981 #ifdef CONFIG_SOCK_PACKET
982             && sock->type != SOCK_PACKET
983 #endif
984             )
985                 return -ESOCKTNOSUPPORT;
986
987         sock->state = SS_UNCONNECTED;
988
989         err = -ENOBUFS;
990         sk = sk_alloc(PF_PACKET, GFP_KERNEL, 1, NULL);
991         if (sk == NULL)
992                 goto out;
993
994         sock->ops = &packet_ops;
995 #ifdef CONFIG_SOCK_PACKET
996         if (sock->type == SOCK_PACKET)
997                 sock->ops = &packet_ops_spkt;
998 #endif
999         sock_init_data(sock,sk);
1000         sk_set_owner(sk, THIS_MODULE);
1001
1002         po = sk->sk_protinfo = kmalloc(sizeof(*po), GFP_KERNEL);
1003         if (!po)
1004                 goto out_free;
1005         memset(po, 0, sizeof(*po));
1006         sk->sk_family = PF_PACKET;
1007         po->num = protocol;
1008
1009         sk->sk_destruct = packet_sock_destruct;
1010         atomic_inc(&packet_socks_nr);
1011
1012         set_vx_info(&sk->sk_vx_info, current->vx_info);
1013         sk->sk_xid = vx_current_xid();
1014         set_nx_info(&sk->sk_nx_info, current->nx_info);
1015         sk->sk_nid = nx_current_nid();
1016
1017         /*
1018          *      Attach a protocol block
1019          */
1020
1021         spin_lock_init(&po->bind_lock);
1022         po->prot_hook.func = packet_rcv;
1023 #ifdef CONFIG_SOCK_PACKET
1024         if (sock->type == SOCK_PACKET)
1025                 po->prot_hook.func = packet_rcv_spkt;
1026 #endif
1027         po->prot_hook.af_packet_priv = sk;
1028
1029         if (protocol) {
1030                 po->prot_hook.type = protocol;
1031                 dev_add_pack(&po->prot_hook);
1032                 sock_hold(sk);
1033                 po->running = 1;
1034         }
1035
1036         write_lock_bh(&packet_sklist_lock);
1037         sk_add_node(sk, &packet_sklist);
1038         write_unlock_bh(&packet_sklist_lock);
1039         return(0);
1040
1041 out_free:
1042         sk_free(sk);
1043 out:
1044         return err;
1045 }
1046
1047 /*
1048  *      Pull a packet from our receive queue and hand it to the user.
1049  *      If necessary we block.
1050  */
1051
1052 static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1053                           struct msghdr *msg, size_t len, int flags)
1054 {
1055         struct sock *sk = sock->sk;
1056         struct sk_buff *skb;
1057         int copied, err;
1058
1059         err = -EINVAL;
1060         if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
1061                 goto out;
1062
1063 #if 0
1064         /* What error should we return now? EUNATTACH? */
1065         if (pkt_sk(sk)->ifindex < 0)
1066                 return -ENODEV;
1067 #endif
1068
1069         /*
1070          *      If the address length field is there to be filled in, we fill
1071          *      it in now.
1072          */
1073
1074         if (sock->type == SOCK_PACKET)
1075                 msg->msg_namelen = sizeof(struct sockaddr_pkt);
1076         else
1077                 msg->msg_namelen = sizeof(struct sockaddr_ll);
1078
1079         /*
1080          *      Call the generic datagram receiver. This handles all sorts
1081          *      of horrible races and re-entrancy so we can forget about it
1082          *      in the protocol layers.
1083          *
1084          *      Now it will return ENETDOWN, if device have just gone down,
1085          *      but then it will block.
1086          */
1087
1088         skb=skb_recv_datagram(sk,flags,flags&MSG_DONTWAIT,&err);
1089
1090         /*
1091          *      An error occurred so return it. Because skb_recv_datagram() 
1092          *      handles the blocking we don't see and worry about blocking
1093          *      retries.
1094          */
1095
1096         if(skb==NULL)
1097                 goto out;
1098
1099         /*
1100          *      You lose any data beyond the buffer you gave. If it worries a
1101          *      user program they can ask the device for its MTU anyway.
1102          */
1103
1104         copied = skb->len;
1105         if (copied > len)
1106         {
1107                 copied=len;
1108                 msg->msg_flags|=MSG_TRUNC;
1109         }
1110
1111         err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1112         if (err)
1113                 goto out_free;
1114
1115         sock_recv_timestamp(msg, sk, skb);
1116
1117         if (msg->msg_name)
1118                 memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
1119
1120         /*
1121          *      Free or return the buffer as appropriate. Again this
1122          *      hides all the races and re-entrancy issues from us.
1123          */
1124         err = (flags&MSG_TRUNC) ? skb->len : copied;
1125
1126 out_free:
1127         skb_free_datagram(sk, skb);
1128 out:
1129         return err;
1130 }
1131
1132 #ifdef CONFIG_SOCK_PACKET
1133 static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1134                                int *uaddr_len, int peer)
1135 {
1136         struct net_device *dev;
1137         struct sock *sk = sock->sk;
1138
1139         if (peer)
1140                 return -EOPNOTSUPP;
1141
1142         uaddr->sa_family = AF_PACKET;
1143         dev = dev_get_by_index(pkt_sk(sk)->ifindex);
1144         if (dev) {
1145                 strlcpy(uaddr->sa_data, dev->name, 15);
1146                 dev_put(dev);
1147         } else
1148                 memset(uaddr->sa_data, 0, 14);
1149         *uaddr_len = sizeof(*uaddr);
1150
1151         return 0;
1152 }
1153 #endif
1154
1155 static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1156                           int *uaddr_len, int peer)
1157 {
1158         struct net_device *dev;
1159         struct sock *sk = sock->sk;
1160         struct packet_opt *po = pkt_sk(sk);
1161         struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
1162
1163         if (peer)
1164                 return -EOPNOTSUPP;
1165
1166         sll->sll_family = AF_PACKET;
1167         sll->sll_ifindex = po->ifindex;
1168         sll->sll_protocol = po->num;
1169         dev = dev_get_by_index(po->ifindex);
1170         if (dev) {
1171                 sll->sll_hatype = dev->type;
1172                 sll->sll_halen = dev->addr_len;
1173                 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
1174                 dev_put(dev);
1175         } else {
1176                 sll->sll_hatype = 0;    /* Bad: we have no ARPHRD_UNSPEC */
1177                 sll->sll_halen = 0;
1178         }
1179         *uaddr_len = sizeof(*sll);
1180
1181         return 0;
1182 }
1183
1184 #ifdef CONFIG_PACKET_MULTICAST
1185 static void packet_dev_mc(struct net_device *dev, struct packet_mclist *i, int what)
1186 {
1187         switch (i->type) {
1188         case PACKET_MR_MULTICAST:
1189                 if (what > 0)
1190                         dev_mc_add(dev, i->addr, i->alen, 0);
1191                 else
1192                         dev_mc_delete(dev, i->addr, i->alen, 0);
1193                 break;
1194         case PACKET_MR_PROMISC:
1195                 dev_set_promiscuity(dev, what);
1196                 break;
1197         case PACKET_MR_ALLMULTI:
1198                 dev_set_allmulti(dev, what);
1199                 break;
1200         default:;
1201         }
1202 }
1203
1204 static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1205 {
1206         for ( ; i; i=i->next) {
1207                 if (i->ifindex == dev->ifindex)
1208                         packet_dev_mc(dev, i, what);
1209         }
1210 }
1211
1212 static int packet_mc_add(struct sock *sk, struct packet_mreq *mreq)
1213 {
1214         struct packet_opt *po = pkt_sk(sk);
1215         struct packet_mclist *ml, *i;
1216         struct net_device *dev;
1217         int err;
1218
1219         rtnl_lock();
1220
1221         err = -ENODEV;
1222         dev = __dev_get_by_index(mreq->mr_ifindex);
1223         if (!dev)
1224                 goto done;
1225
1226         err = -EINVAL;
1227         if (mreq->mr_alen > dev->addr_len)
1228                 goto done;
1229
1230         err = -ENOBUFS;
1231         i = (struct packet_mclist *)kmalloc(sizeof(*i), GFP_KERNEL);
1232         if (i == NULL)
1233                 goto done;
1234
1235         err = 0;
1236         for (ml = po->mclist; ml; ml = ml->next) {
1237                 if (ml->ifindex == mreq->mr_ifindex &&
1238                     ml->type == mreq->mr_type &&
1239                     ml->alen == mreq->mr_alen &&
1240                     memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1241                         ml->count++;
1242                         /* Free the new element ... */
1243                         kfree(i);
1244                         goto done;
1245                 }
1246         }
1247
1248         i->type = mreq->mr_type;
1249         i->ifindex = mreq->mr_ifindex;
1250         i->alen = mreq->mr_alen;
1251         memcpy(i->addr, mreq->mr_address, i->alen);
1252         i->count = 1;
1253         i->next = po->mclist;
1254         po->mclist = i;
1255         packet_dev_mc(dev, i, +1);
1256
1257 done:
1258         rtnl_unlock();
1259         return err;
1260 }
1261
1262 static int packet_mc_drop(struct sock *sk, struct packet_mreq *mreq)
1263 {
1264         struct packet_mclist *ml, **mlp;
1265
1266         rtnl_lock();
1267
1268         for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
1269                 if (ml->ifindex == mreq->mr_ifindex &&
1270                     ml->type == mreq->mr_type &&
1271                     ml->alen == mreq->mr_alen &&
1272                     memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1273                         if (--ml->count == 0) {
1274                                 struct net_device *dev;
1275                                 *mlp = ml->next;
1276                                 dev = dev_get_by_index(ml->ifindex);
1277                                 if (dev) {
1278                                         packet_dev_mc(dev, ml, -1);
1279                                         dev_put(dev);
1280                                 }
1281                                 kfree(ml);
1282                         }
1283                         rtnl_unlock();
1284                         return 0;
1285                 }
1286         }
1287         rtnl_unlock();
1288         return -EADDRNOTAVAIL;
1289 }
1290
1291 static void packet_flush_mclist(struct sock *sk)
1292 {
1293         struct packet_opt *po = pkt_sk(sk);
1294         struct packet_mclist *ml;
1295
1296         if (!po->mclist)
1297                 return;
1298
1299         rtnl_lock();
1300         while ((ml = po->mclist) != NULL) {
1301                 struct net_device *dev;
1302
1303                 po->mclist = ml->next;
1304                 if ((dev = dev_get_by_index(ml->ifindex)) != NULL) {
1305                         packet_dev_mc(dev, ml, -1);
1306                         dev_put(dev);
1307                 }
1308                 kfree(ml);
1309         }
1310         rtnl_unlock();
1311 }
1312 #endif
1313
1314 static int
1315 packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen)
1316 {
1317         struct sock *sk = sock->sk;
1318         int ret;
1319
1320         if (level != SOL_PACKET)
1321                 return -ENOPROTOOPT;
1322
1323         switch(optname) {
1324 #ifdef CONFIG_PACKET_MULTICAST
1325         case PACKET_ADD_MEMBERSHIP:     
1326         case PACKET_DROP_MEMBERSHIP:
1327         {
1328                 struct packet_mreq mreq;
1329                 if (optlen<sizeof(mreq))
1330                         return -EINVAL;
1331                 if (copy_from_user(&mreq,optval,sizeof(mreq)))
1332                         return -EFAULT;
1333                 if (optname == PACKET_ADD_MEMBERSHIP)
1334                         ret = packet_mc_add(sk, &mreq);
1335                 else
1336                         ret = packet_mc_drop(sk, &mreq);
1337                 return ret;
1338         }
1339 #endif
1340 #ifdef CONFIG_PACKET_MMAP
1341         case PACKET_RX_RING:
1342         {
1343                 struct tpacket_req req;
1344
1345                 if (optlen<sizeof(req))
1346                         return -EINVAL;
1347                 if (copy_from_user(&req,optval,sizeof(req)))
1348                         return -EFAULT;
1349                 return packet_set_ring(sk, &req, 0);
1350         }
1351         case PACKET_COPY_THRESH:
1352         {
1353                 int val;
1354
1355                 if (optlen!=sizeof(val))
1356                         return -EINVAL;
1357                 if (copy_from_user(&val,optval,sizeof(val)))
1358                         return -EFAULT;
1359
1360                 pkt_sk(sk)->copy_thresh = val;
1361                 return 0;
1362         }
1363 #endif
1364         default:
1365                 return -ENOPROTOOPT;
1366         }
1367 }
1368
1369 int packet_getsockopt(struct socket *sock, int level, int optname,
1370                       char __user *optval, int __user *optlen)
1371 {
1372         int len;
1373         struct sock *sk = sock->sk;
1374         struct packet_opt *po = pkt_sk(sk);
1375
1376         if (level != SOL_PACKET)
1377                 return -ENOPROTOOPT;
1378
1379         if (get_user(len,optlen))
1380                 return -EFAULT;
1381
1382         if (len < 0)
1383                 return -EINVAL;
1384                 
1385         switch(optname) {
1386         case PACKET_STATISTICS:
1387         {
1388                 struct tpacket_stats st;
1389
1390                 if (len > sizeof(struct tpacket_stats))
1391                         len = sizeof(struct tpacket_stats);
1392                 spin_lock_bh(&sk->sk_receive_queue.lock);
1393                 st = po->stats;
1394                 memset(&po->stats, 0, sizeof(st));
1395                 spin_unlock_bh(&sk->sk_receive_queue.lock);
1396                 st.tp_packets += st.tp_drops;
1397
1398                 if (copy_to_user(optval, &st, len))
1399                         return -EFAULT;
1400                 break;
1401         }
1402         default:
1403                 return -ENOPROTOOPT;
1404         }
1405
1406         if (put_user(len, optlen))
1407                 return -EFAULT;
1408         return 0;
1409 }
1410
1411
1412 static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
1413 {
1414         struct sock *sk;
1415         struct hlist_node *node;
1416         struct net_device *dev = (struct net_device*)data;
1417
1418         read_lock(&packet_sklist_lock);
1419         sk_for_each(sk, node, &packet_sklist) {
1420                 struct packet_opt *po = pkt_sk(sk);
1421
1422                 switch (msg) {
1423                 case NETDEV_UNREGISTER:
1424 #ifdef CONFIG_PACKET_MULTICAST
1425                         if (po->mclist)
1426                                 packet_dev_mclist(dev, po->mclist, -1);
1427                         // fallthrough
1428 #endif
1429                 case NETDEV_DOWN:
1430                         if (dev->ifindex == po->ifindex) {
1431                                 spin_lock(&po->bind_lock);
1432                                 if (po->running) {
1433                                         __dev_remove_pack(&po->prot_hook);
1434                                         __sock_put(sk);
1435                                         po->running = 0;
1436                                         sk->sk_err = ENETDOWN;
1437                                         if (!sock_flag(sk, SOCK_DEAD))
1438                                                 sk->sk_error_report(sk);
1439                                 }
1440                                 if (msg == NETDEV_UNREGISTER) {
1441                                         po->ifindex = -1;
1442                                         po->prot_hook.dev = NULL;
1443                                 }
1444                                 spin_unlock(&po->bind_lock);
1445                         }
1446                         break;
1447                 case NETDEV_UP:
1448                         spin_lock(&po->bind_lock);
1449                         if (dev->ifindex == po->ifindex && po->num &&
1450                             !po->running) {
1451                                 dev_add_pack(&po->prot_hook);
1452                                 sock_hold(sk);
1453                                 po->running = 1;
1454                         }
1455                         spin_unlock(&po->bind_lock);
1456                         break;
1457                 }
1458         }
1459         read_unlock(&packet_sklist_lock);
1460         return NOTIFY_DONE;
1461 }
1462
1463
1464 static int packet_ioctl(struct socket *sock, unsigned int cmd,
1465                         unsigned long arg)
1466 {
1467         struct sock *sk = sock->sk;
1468
1469         switch(cmd) {
1470                 case SIOCOUTQ:
1471                 {
1472                         int amount = atomic_read(&sk->sk_wmem_alloc);
1473                         return put_user(amount, (int __user *)arg);
1474                 }
1475                 case SIOCINQ:
1476                 {
1477                         struct sk_buff *skb;
1478                         int amount = 0;
1479
1480                         spin_lock_bh(&sk->sk_receive_queue.lock);
1481                         skb = skb_peek(&sk->sk_receive_queue);
1482                         if (skb)
1483                                 amount = skb->len;
1484                         spin_unlock_bh(&sk->sk_receive_queue.lock);
1485                         return put_user(amount, (int __user *)arg);
1486                 }
1487                 case SIOCGSTAMP:
1488                         return sock_get_timestamp(sk, (struct timeval __user *)arg);
1489                         
1490 #ifdef CONFIG_INET
1491                 case SIOCADDRT:
1492                 case SIOCDELRT:
1493                 case SIOCDARP:
1494                 case SIOCGARP:
1495                 case SIOCSARP:
1496                 case SIOCGIFADDR:
1497                 case SIOCSIFADDR:
1498                 case SIOCGIFBRDADDR:
1499                 case SIOCSIFBRDADDR:
1500                 case SIOCGIFNETMASK:
1501                 case SIOCSIFNETMASK:
1502                 case SIOCGIFDSTADDR:
1503                 case SIOCSIFDSTADDR:
1504                 case SIOCSIFFLAGS:
1505                         return inet_dgram_ops.ioctl(sock, cmd, arg);
1506 #endif
1507
1508                 default:
1509                         return dev_ioctl(cmd, (void __user *)arg);
1510         }
1511         return 0;
1512 }
1513
1514 #ifndef CONFIG_PACKET_MMAP
1515 #define packet_mmap sock_no_mmap
1516 #define packet_poll datagram_poll
1517 #else
1518
1519 unsigned int packet_poll(struct file * file, struct socket *sock, poll_table *wait)
1520 {
1521         struct sock *sk = sock->sk;
1522         struct packet_opt *po = pkt_sk(sk);
1523         unsigned int mask = datagram_poll(file, sock, wait);
1524
1525         spin_lock_bh(&sk->sk_receive_queue.lock);
1526         if (po->pg_vec) {
1527                 unsigned last = po->head ? po->head-1 : po->frame_max;
1528                 struct tpacket_hdr *h;
1529
1530                 h = (struct tpacket_hdr *)packet_lookup_frame(po, last);
1531
1532                 if (h->tp_status)
1533                         mask |= POLLIN | POLLRDNORM;
1534         }
1535         spin_unlock_bh(&sk->sk_receive_queue.lock);
1536         return mask;
1537 }
1538
1539
1540 /* Dirty? Well, I still did not learn better way to account
1541  * for user mmaps.
1542  */
1543
1544 static void packet_mm_open(struct vm_area_struct *vma)
1545 {
1546         struct file *file = vma->vm_file;
1547         struct inode *inode = file->f_dentry->d_inode;
1548         struct socket * sock = SOCKET_I(inode);
1549         struct sock *sk = sock->sk;
1550         
1551         if (sk)
1552                 atomic_inc(&pkt_sk(sk)->mapped);
1553 }
1554
1555 static void packet_mm_close(struct vm_area_struct *vma)
1556 {
1557         struct file *file = vma->vm_file;
1558         struct inode *inode = file->f_dentry->d_inode;
1559         struct socket * sock = SOCKET_I(inode);
1560         struct sock *sk = sock->sk;
1561         
1562         if (sk)
1563                 atomic_dec(&pkt_sk(sk)->mapped);
1564 }
1565
1566 static struct vm_operations_struct packet_mmap_ops = {
1567         .open = packet_mm_open,
1568         .close =packet_mm_close,
1569 };
1570
1571 static inline struct page *pg_vec_endpage(char *one_pg_vec, unsigned int order)
1572 {
1573         return virt_to_page(one_pg_vec + (PAGE_SIZE << order) - 1);
1574 }
1575
1576 static void free_pg_vec(char **pg_vec, unsigned order, unsigned len)
1577 {
1578         int i;
1579
1580         for (i=0; i<len; i++) {
1581                 if (pg_vec[i]) {
1582                         struct page *page, *pend;
1583
1584                         pend = pg_vec_endpage(pg_vec[i], order);
1585                         for (page = virt_to_page(pg_vec[i]); page <= pend; page++)
1586                                 ClearPageReserved(page);
1587                         free_pages((unsigned long)pg_vec[i], order);
1588                 }
1589         }
1590         kfree(pg_vec);
1591 }
1592
1593
1594 static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing)
1595 {
1596         char **pg_vec = NULL;
1597         struct packet_opt *po = pkt_sk(sk);
1598         int was_running, num, order = 0;
1599         int err = 0;
1600         
1601         if (req->tp_block_nr) {
1602                 int i, l;
1603
1604                 /* Sanity tests and some calculations */
1605
1606                 if (po->pg_vec)
1607                         return -EBUSY;
1608
1609                 if ((int)req->tp_block_size <= 0)
1610                         return -EINVAL;
1611                 if (req->tp_block_size&(PAGE_SIZE-1))
1612                         return -EINVAL;
1613                 if (req->tp_frame_size < TPACKET_HDRLEN)
1614                         return -EINVAL;
1615                 if (req->tp_frame_size&(TPACKET_ALIGNMENT-1))
1616                         return -EINVAL;
1617
1618                 po->frames_per_block = req->tp_block_size/req->tp_frame_size;
1619                 if (po->frames_per_block <= 0)
1620                         return -EINVAL;
1621                 if (po->frames_per_block*req->tp_block_nr != req->tp_frame_nr)
1622                         return -EINVAL;
1623                 /* OK! */
1624
1625                 /* Allocate page vector */
1626                 while ((PAGE_SIZE<<order) < req->tp_block_size)
1627                         order++;
1628
1629                 err = -ENOMEM;
1630
1631                 pg_vec = kmalloc(req->tp_block_nr*sizeof(char *), GFP_KERNEL);
1632                 if (pg_vec == NULL)
1633                         goto out;
1634                 memset(pg_vec, 0, req->tp_block_nr*sizeof(char **));
1635
1636                 for (i=0; i<req->tp_block_nr; i++) {
1637                         struct page *page, *pend;
1638                         pg_vec[i] = (char *)__get_free_pages(GFP_KERNEL, order);
1639                         if (!pg_vec[i])
1640                                 goto out_free_pgvec;
1641
1642                         pend = pg_vec_endpage(pg_vec[i], order);
1643                         for (page = virt_to_page(pg_vec[i]); page <= pend; page++)
1644                                 SetPageReserved(page);
1645                 }
1646                 /* Page vector is allocated */
1647
1648                 l = 0;
1649                 for (i=0; i<req->tp_block_nr; i++) {
1650                         char *ptr = pg_vec[i];
1651                         struct tpacket_hdr *header;
1652                         int k;
1653
1654                         for (k=0; k<po->frames_per_block; k++) {
1655                                 
1656                                 header = (struct tpacket_hdr*)ptr;
1657                                 header->tp_status = TP_STATUS_KERNEL;
1658                                 ptr += req->tp_frame_size;
1659                         }
1660                 }
1661                 /* Done */
1662         } else {
1663                 if (req->tp_frame_nr)
1664                         return -EINVAL;
1665         }
1666
1667         lock_sock(sk);
1668
1669         /* Detach socket from network */
1670         spin_lock(&po->bind_lock);
1671         was_running = po->running;
1672         num = po->num;
1673         if (was_running) {
1674                 __dev_remove_pack(&po->prot_hook);
1675                 po->num = 0;
1676                 po->running = 0;
1677                 __sock_put(sk);
1678         }
1679         spin_unlock(&po->bind_lock);
1680                 
1681         synchronize_net();
1682
1683         err = -EBUSY;
1684         if (closing || atomic_read(&po->mapped) == 0) {
1685                 err = 0;
1686 #define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
1687
1688                 spin_lock_bh(&sk->sk_receive_queue.lock);
1689                 pg_vec = XC(po->pg_vec, pg_vec);
1690                 po->frame_max = req->tp_frame_nr-1;
1691                 po->head = 0;
1692                 po->frame_size = req->tp_frame_size;
1693                 spin_unlock_bh(&sk->sk_receive_queue.lock);
1694
1695                 order = XC(po->pg_vec_order, order);
1696                 req->tp_block_nr = XC(po->pg_vec_len, req->tp_block_nr);
1697
1698                 po->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
1699                 po->prot_hook.func = po->pg_vec ? tpacket_rcv : packet_rcv;
1700                 skb_queue_purge(&sk->sk_receive_queue);
1701 #undef XC
1702                 if (atomic_read(&po->mapped))
1703                         printk(KERN_DEBUG "packet_mmap: vma is busy: %d\n", atomic_read(&po->mapped));
1704         }
1705
1706         spin_lock(&po->bind_lock);
1707         if (was_running && !po->running) {
1708                 sock_hold(sk);
1709                 po->running = 1;
1710                 po->num = num;
1711                 dev_add_pack(&po->prot_hook);
1712         }
1713         spin_unlock(&po->bind_lock);
1714
1715         release_sock(sk);
1716
1717 out_free_pgvec:
1718         if (pg_vec)
1719                 free_pg_vec(pg_vec, order, req->tp_block_nr);
1720 out:
1721         return err;
1722 }
1723
1724 static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1725 {
1726         struct sock *sk = sock->sk;
1727         struct packet_opt *po = pkt_sk(sk);
1728         unsigned long size;
1729         unsigned long start;
1730         int err = -EINVAL;
1731         int i;
1732
1733         if (vma->vm_pgoff)
1734                 return -EINVAL;
1735
1736         size = vma->vm_end - vma->vm_start;
1737
1738         lock_sock(sk);
1739         if (po->pg_vec == NULL)
1740                 goto out;
1741         if (size != po->pg_vec_len*po->pg_vec_pages*PAGE_SIZE)
1742                 goto out;
1743
1744         atomic_inc(&po->mapped);
1745         start = vma->vm_start;
1746         err = -EAGAIN;
1747         for (i=0; i<po->pg_vec_len; i++) {
1748                 if (remap_pfn_range(vma, start,
1749                                      __pa(po->pg_vec[i]) >> PAGE_SHIFT,
1750                                      po->pg_vec_pages*PAGE_SIZE,
1751                                      vma->vm_page_prot))
1752                         goto out;
1753                 start += po->pg_vec_pages*PAGE_SIZE;
1754         }
1755         vma->vm_ops = &packet_mmap_ops;
1756         err = 0;
1757
1758 out:
1759         release_sock(sk);
1760         return err;
1761 }
1762 #endif
1763
1764
1765 #ifdef CONFIG_SOCK_PACKET
1766 struct proto_ops packet_ops_spkt = {
1767         .family =       PF_PACKET,
1768         .owner =        THIS_MODULE,
1769         .release =      packet_release,
1770         .bind =         packet_bind_spkt,
1771         .connect =      sock_no_connect,
1772         .socketpair =   sock_no_socketpair,
1773         .accept =       sock_no_accept,
1774         .getname =      packet_getname_spkt,
1775         .poll =         datagram_poll,
1776         .ioctl =        packet_ioctl,
1777         .listen =       sock_no_listen,
1778         .shutdown =     sock_no_shutdown,
1779         .setsockopt =   sock_no_setsockopt,
1780         .getsockopt =   sock_no_getsockopt,
1781         .sendmsg =      packet_sendmsg_spkt,
1782         .recvmsg =      packet_recvmsg,
1783         .mmap =         sock_no_mmap,
1784         .sendpage =     sock_no_sendpage,
1785 };
1786 #endif
1787
1788 struct proto_ops packet_ops = {
1789         .family =       PF_PACKET,
1790         .owner =        THIS_MODULE,
1791         .release =      packet_release,
1792         .bind =         packet_bind,
1793         .connect =      sock_no_connect,
1794         .socketpair =   sock_no_socketpair,
1795         .accept =       sock_no_accept,
1796         .getname =      packet_getname, 
1797         .poll =         packet_poll,
1798         .ioctl =        packet_ioctl,
1799         .listen =       sock_no_listen,
1800         .shutdown =     sock_no_shutdown,
1801         .setsockopt =   packet_setsockopt,
1802         .getsockopt =   packet_getsockopt,
1803         .sendmsg =      packet_sendmsg,
1804         .recvmsg =      packet_recvmsg,
1805         .mmap =         packet_mmap,
1806         .sendpage =     sock_no_sendpage,
1807 };
1808
1809 #if defined(CONFIG_VNET) || defined(CONFIG_VNET_MODULE)
1810 EXPORT_SYMBOL(packet_ops);
1811 struct net_proto_family packet_family_ops;
1812 EXPORT_SYMBOL(packet_family_ops);
1813 #else
1814 static
1815 #endif
1816 struct net_proto_family packet_family_ops = {
1817         .family =       PF_PACKET,
1818         .create =       packet_create,
1819         .owner  =       THIS_MODULE,
1820 };
1821
1822 static struct notifier_block packet_netdev_notifier = {
1823         .notifier_call =packet_notifier,
1824 };
1825
1826 #ifdef CONFIG_PROC_FS
1827 static inline struct sock *packet_seq_idx(loff_t off)
1828 {
1829         struct sock *s;
1830         struct hlist_node *node;
1831
1832         sk_for_each(s, node, &packet_sklist) {
1833                 if (!off--)
1834                         return s;
1835         }
1836         return NULL;
1837 }
1838
1839 static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
1840 {
1841         read_lock(&packet_sklist_lock);
1842         return *pos ? packet_seq_idx(*pos - 1) : SEQ_START_TOKEN;
1843 }
1844
1845 static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1846 {
1847         ++*pos;
1848         return  (v == SEQ_START_TOKEN) 
1849                 ? sk_head(&packet_sklist) 
1850                 : sk_next((struct sock*)v) ;
1851 }
1852
1853 static void packet_seq_stop(struct seq_file *seq, void *v)
1854 {
1855         read_unlock(&packet_sklist_lock);               
1856 }
1857
1858 static int packet_seq_show(struct seq_file *seq, void *v) 
1859 {
1860         if (v == SEQ_START_TOKEN)
1861                 seq_puts(seq, "sk       RefCnt Type Proto  Iface R Rmem   User   Inode\n");
1862         else {
1863                 struct sock *s = v;
1864                 const struct packet_opt *po = pkt_sk(s);
1865
1866                 seq_printf(seq,
1867                            "%p %-6d %-4d %04x   %-5d %1d %-6u %-6u %-6lu\n",
1868                            s,
1869                            atomic_read(&s->sk_refcnt),
1870                            s->sk_type,
1871                            ntohs(po->num),
1872                            po->ifindex,
1873                            po->running,
1874                            atomic_read(&s->sk_rmem_alloc),
1875                            sock_i_uid(s),
1876                            sock_i_ino(s) );
1877         }
1878
1879         return 0;
1880 }
1881
1882 static struct seq_operations packet_seq_ops = {
1883         .start  = packet_seq_start,
1884         .next   = packet_seq_next,
1885         .stop   = packet_seq_stop,
1886         .show   = packet_seq_show,
1887 };
1888
1889 static int packet_seq_open(struct inode *inode, struct file *file)
1890 {
1891         return seq_open(file, &packet_seq_ops);
1892 }
1893
1894 static struct file_operations packet_seq_fops = {
1895         .owner          = THIS_MODULE,
1896         .open           = packet_seq_open,
1897         .read           = seq_read,
1898         .llseek         = seq_lseek,
1899         .release        = seq_release,
1900 };
1901
1902 #endif
1903
1904 static void __exit packet_exit(void)
1905 {
1906         proc_net_remove("packet");
1907         unregister_netdevice_notifier(&packet_netdev_notifier);
1908         sock_unregister(PF_PACKET);
1909         return;
1910 }
1911
1912 static int __init packet_init(void)
1913 {
1914         sock_register(&packet_family_ops);
1915         register_netdevice_notifier(&packet_netdev_notifier);
1916         proc_net_fops_create("packet", 0, &packet_seq_fops);
1917
1918         return 0;
1919 }
1920
1921 module_init(packet_init);
1922 module_exit(packet_exit);
1923 MODULE_LICENSE("GPL");
1924 MODULE_ALIAS_NETPROTO(PF_PACKET);