upgrade to linux 2.6.9-1.11_FC2
[linux-2.6.git] / net / packet / af_packet.c
1 /*
2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
3  *              operating system.  INET is implemented using the  BSD Socket
4  *              interface as the means of communication with the user level.
5  *
6  *              PACKET - implements raw packet sockets.
7  *
8  * Version:     $Id: af_packet.c,v 1.61 2002/02/08 03:57:19 davem Exp $
9  *
10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
13  *
14  * Fixes:       
15  *              Alan Cox        :       verify_area() now used correctly
16  *              Alan Cox        :       new skbuff lists, look ma no backlogs!
17  *              Alan Cox        :       tidied skbuff lists.
18  *              Alan Cox        :       Now uses generic datagram routines I
19  *                                      added. Also fixed the peek/read crash
20  *                                      from all old Linux datagram code.
21  *              Alan Cox        :       Uses the improved datagram code.
22  *              Alan Cox        :       Added NULL's for socket options.
23  *              Alan Cox        :       Re-commented the code.
24  *              Alan Cox        :       Use new kernel side addressing
25  *              Rob Janssen     :       Correct MTU usage.
26  *              Dave Platt      :       Counter leaks caused by incorrect
27  *                                      interrupt locking and some slightly
28  *                                      dubious gcc output. Can you read
29  *                                      compiler: it said _VOLATILE_
30  *      Richard Kooijman        :       Timestamp fixes.
31  *              Alan Cox        :       New buffers. Use sk->mac.raw.
32  *              Alan Cox        :       sendmsg/recvmsg support.
33  *              Alan Cox        :       Protocol setting support
34  *      Alexey Kuznetsov        :       Untied from IPv4 stack.
35  *      Cyrus Durgin            :       Fixed kerneld for kmod.
36  *      Michal Ostrowski        :       Module initialization cleanup.
37  *         Ulises Alonso        :       Frame number limit removal and 
38  *                                      packet_set_ring memory leak.
39  *
40  *              This program is free software; you can redistribute it and/or
41  *              modify it under the terms of the GNU General Public License
42  *              as published by the Free Software Foundation; either version
43  *              2 of the License, or (at your option) any later version.
44  *
45  */
46  
47 #include <linux/config.h>
48 #include <linux/types.h>
49 #include <linux/sched.h>
50 #include <linux/mm.h>
51 #include <linux/fcntl.h>
52 #include <linux/socket.h>
53 #include <linux/in.h>
54 #include <linux/inet.h>
55 #include <linux/netdevice.h>
56 #include <linux/if_packet.h>
57 #include <linux/wireless.h>
58 #include <linux/kmod.h>
59 #include <net/ip.h>
60 #include <net/protocol.h>
61 #include <linux/skbuff.h>
62 #include <net/sock.h>
63 #include <linux/errno.h>
64 #include <linux/timer.h>
65 #include <asm/system.h>
66 #include <asm/uaccess.h>
67 #include <asm/ioctls.h>
68 #include <asm/page.h>
69 #include <asm/io.h>
70 #include <linux/proc_fs.h>
71 #include <linux/seq_file.h>
72 #include <linux/poll.h>
73 #include <linux/module.h>
74 #include <linux/init.h>
75
76 #ifdef CONFIG_INET
77 #include <net/inet_common.h>
78 #endif
79
80 #define CONFIG_SOCK_PACKET      1
81
82 /*
83    Proposed replacement for SIOC{ADD,DEL}MULTI and
84    IFF_PROMISC, IFF_ALLMULTI flags.
85
86    It is more expensive, but I believe,
87    it is really correct solution: reentereble, safe and fault tolerant.
88
89    IFF_PROMISC/IFF_ALLMULTI/SIOC{ADD/DEL}MULTI are faked by keeping
90    reference count and global flag, so that real status is
91    (gflag|(count != 0)), so that we can use obsolete faulty interface
92    not harming clever users.
93  */
94 #define CONFIG_PACKET_MULTICAST 1
95
96 /*
97    Assumptions:
98    - if device has no dev->hard_header routine, it adds and removes ll header
99      inside itself. In this case ll header is invisible outside of device,
100      but higher levels still should reserve dev->hard_header_len.
101      Some devices are enough clever to reallocate skb, when header
102      will not fit to reserved space (tunnel), another ones are silly
103      (PPP).
104    - packet socket receives packets with pulled ll header,
105      so that SOCK_RAW should push it back.
106
107 On receive:
108 -----------
109
110 Incoming, dev->hard_header!=NULL
111    mac.raw -> ll header
112    data    -> data
113
114 Outgoing, dev->hard_header!=NULL
115    mac.raw -> ll header
116    data    -> ll header
117
118 Incoming, dev->hard_header==NULL
119    mac.raw -> UNKNOWN position. It is very likely, that it points to ll header.
120               PPP makes it, that is wrong, because introduce assymetry
121               between rx and tx paths.
122    data    -> data
123
124 Outgoing, dev->hard_header==NULL
125    mac.raw -> data. ll header is still not built!
126    data    -> data
127
128 Resume
129   If dev->hard_header==NULL we are unlikely to restore sensible ll header.
130
131
132 On transmit:
133 ------------
134
135 dev->hard_header != NULL
136    mac.raw -> ll header
137    data    -> ll header
138
139 dev->hard_header == NULL (ll header is added by device, we cannot control it)
140    mac.raw -> data
141    data -> data
142
143    We should set nh.raw on output to correct posistion,
144    packet classifier depends on it.
145  */
146
147 /* List of all packet sockets. */
148 HLIST_HEAD(packet_sklist);
149 static rwlock_t packet_sklist_lock = RW_LOCK_UNLOCKED;
150
151 atomic_t packet_socks_nr;
152
153
154 /* Private packet socket structures. */
155
156 #ifdef CONFIG_PACKET_MULTICAST
157 struct packet_mclist
158 {
159         struct packet_mclist    *next;
160         int                     ifindex;
161         int                     count;
162         unsigned short          type;
163         unsigned short          alen;
164         unsigned char           addr[8];
165 };
166 #endif
167 #ifdef CONFIG_PACKET_MMAP
168 static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing);
169 #endif
170
171 static void packet_flush_mclist(struct sock *sk);
172
173 struct packet_opt
174 {
175         struct tpacket_stats    stats;
176 #ifdef CONFIG_PACKET_MMAP
177         char *                  *pg_vec;
178         unsigned int            head;
179         unsigned int            frames_per_block;
180         unsigned int            frame_size;
181         unsigned int            frame_max;
182         int                     copy_thresh;
183 #endif
184         struct packet_type      prot_hook;
185         spinlock_t              bind_lock;
186         char                    running;        /* prot_hook is attached*/
187         int                     ifindex;        /* bound device         */
188         unsigned short          num;
189 #ifdef CONFIG_PACKET_MULTICAST
190         struct packet_mclist    *mclist;
191 #endif
192 #ifdef CONFIG_PACKET_MMAP
193         atomic_t                mapped;
194         unsigned int            pg_vec_order;
195         unsigned int            pg_vec_pages;
196         unsigned int            pg_vec_len;
197 #endif
198 };
199
200 #ifdef CONFIG_PACKET_MMAP
201
202 static inline char *packet_lookup_frame(struct packet_opt *po, unsigned int position)
203 {
204         unsigned int pg_vec_pos, frame_offset;
205         char *frame;
206
207         pg_vec_pos = position / po->frames_per_block;
208         frame_offset = position % po->frames_per_block;
209
210         frame = po->pg_vec[pg_vec_pos] + (frame_offset * po->frame_size);
211         
212         return frame;
213 }
214 #endif
215
216 #define pkt_sk(__sk) ((struct packet_opt *)(__sk)->sk_protinfo)
217
218 void packet_sock_destruct(struct sock *sk)
219 {
220         BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
221         BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
222         BUG_ON(sk->sk_nx_info);
223         BUG_ON(sk->sk_vx_info);
224
225         if (!sock_flag(sk, SOCK_DEAD)) {
226                 printk("Attempt to release alive packet socket: %p\n", sk);
227                 return;
228         }
229
230         if (pkt_sk(sk))
231                 kfree(pkt_sk(sk));
232         atomic_dec(&packet_socks_nr);
233 #ifdef PACKET_REFCNT_DEBUG
234         printk(KERN_DEBUG "PACKET socket %p is free, %d are alive\n", sk, atomic_read(&packet_socks_nr));
235 #endif
236 }
237
238
239 extern struct proto_ops packet_ops;
240
241 #ifdef CONFIG_SOCK_PACKET
242 extern struct proto_ops packet_ops_spkt;
243
244 static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt)
245 {
246         struct sock *sk;
247         struct sockaddr_pkt *spkt;
248
249         /*
250          *      When we registered the protocol we saved the socket in the data
251          *      field for just this event.
252          */
253
254         sk = pt->af_packet_priv;
255         
256         /*
257          *      Yank back the headers [hope the device set this
258          *      right or kerboom...]
259          *
260          *      Incoming packets have ll header pulled,
261          *      push it back.
262          *
263          *      For outgoing ones skb->data == skb->mac.raw
264          *      so that this procedure is noop.
265          */
266
267         if (skb->pkt_type == PACKET_LOOPBACK)
268                 goto out;
269
270         if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
271                 goto oom;
272
273         /* drop any routing info */
274         dst_release(skb->dst);
275         skb->dst = NULL;
276
277         spkt = (struct sockaddr_pkt*)skb->cb;
278
279         skb_push(skb, skb->data-skb->mac.raw);
280
281         /*
282          *      The SOCK_PACKET socket receives _all_ frames.
283          */
284
285         spkt->spkt_family = dev->type;
286         strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
287         spkt->spkt_protocol = skb->protocol;
288
289         /*
290          *      Charge the memory to the socket. This is done specifically
291          *      to prevent sockets using all the memory up.
292          */
293
294         if (sock_queue_rcv_skb(sk,skb) == 0)
295                 return 0;
296
297 out:
298         kfree_skb(skb);
299 oom:
300         return 0;
301 }
302
303
304 /*
305  *      Output a raw packet to a device layer. This bypasses all the other
306  *      protocol layers and you must therefore supply it with a complete frame
307  */
308  
309 static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
310                                struct msghdr *msg, size_t len)
311 {
312         struct sock *sk = sock->sk;
313         struct sockaddr_pkt *saddr=(struct sockaddr_pkt *)msg->msg_name;
314         struct sk_buff *skb;
315         struct net_device *dev;
316         unsigned short proto=0;
317         int err;
318         
319         /*
320          *      Get and verify the address. 
321          */
322
323         if (saddr)
324         {
325                 if (msg->msg_namelen < sizeof(struct sockaddr))
326                         return(-EINVAL);
327                 if (msg->msg_namelen==sizeof(struct sockaddr_pkt))
328                         proto=saddr->spkt_protocol;
329         }
330         else
331                 return(-ENOTCONN);      /* SOCK_PACKET must be sent giving an address */
332
333         /*
334          *      Find the device first to size check it 
335          */
336
337         saddr->spkt_device[13] = 0;
338         dev = dev_get_by_name(saddr->spkt_device);
339         err = -ENODEV;
340         if (dev == NULL)
341                 goto out_unlock;
342         
343         /*
344          *      You may not queue a frame bigger than the mtu. This is the lowest level
345          *      raw protocol and you must do your own fragmentation at this level.
346          */
347          
348         err = -EMSGSIZE;
349         if(len>dev->mtu+dev->hard_header_len)
350                 goto out_unlock;
351
352         err = -ENOBUFS;
353         skb = sock_wmalloc(sk, len + LL_RESERVED_SPACE(dev), 0, GFP_KERNEL);
354
355         /*
356          *      If the write buffer is full, then tough. At this level the user gets to
357          *      deal with the problem - do your own algorithmic backoffs. That's far
358          *      more flexible.
359          */
360          
361         if (skb == NULL) 
362                 goto out_unlock;
363
364         /*
365          *      Fill it in 
366          */
367          
368         /* FIXME: Save some space for broken drivers that write a
369          * hard header at transmission time by themselves. PPP is the
370          * notable one here. This should really be fixed at the driver level.
371          */
372         skb_reserve(skb, LL_RESERVED_SPACE(dev));
373         skb->nh.raw = skb->data;
374
375         /* Try to align data part correctly */
376         if (dev->hard_header) {
377                 skb->data -= dev->hard_header_len;
378                 skb->tail -= dev->hard_header_len;
379                 if (len < dev->hard_header_len)
380                         skb->nh.raw = skb->data;
381         }
382
383         /* Returns -EFAULT on error */
384         err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
385         skb->protocol = proto;
386         skb->dev = dev;
387         skb->priority = sk->sk_priority;
388         if (err)
389                 goto out_free;
390
391         err = -ENETDOWN;
392         if (!(dev->flags & IFF_UP))
393                 goto out_free;
394
395         /*
396          *      Now send it
397          */
398
399         dev_queue_xmit(skb);
400         dev_put(dev);
401         return(len);
402
403 out_free:
404         kfree_skb(skb);
405 out_unlock:
406         if (dev)
407                 dev_put(dev);
408         return err;
409 }
410 #endif
411
412 static inline unsigned run_filter(struct sk_buff *skb, struct sock *sk, unsigned res)
413 {
414         struct sk_filter *filter;
415
416         bh_lock_sock(sk);
417         filter = sk->sk_filter;
418         /*
419          * Our caller already checked that filter != NULL but we need to
420          * verify that under bh_lock_sock() to be safe
421          */
422         if (likely(filter != NULL))
423                 res = sk_run_filter(skb, filter->insns, filter->len);
424         bh_unlock_sock(sk);
425
426         return res;
427 }
428
429 /*
430    This function makes lazy skb cloning in hope that most of packets
431    are discarded by BPF.
432
433    Note tricky part: we DO mangle shared skb! skb->data, skb->len
434    and skb->cb are mangled. It works because (and until) packets
435    falling here are owned by current CPU. Output packets are cloned
436    by dev_queue_xmit_nit(), input packets are processed by net_bh
437    sequencially, so that if we return skb to original state on exit,
438    we will not harm anyone.
439  */
440
441 static int packet_rcv(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt)
442 {
443         struct sock *sk;
444         struct sockaddr_ll *sll;
445         struct packet_opt *po;
446         u8 * skb_head = skb->data;
447         int skb_len = skb->len;
448         unsigned snaplen;
449
450         if (skb->pkt_type == PACKET_LOOPBACK)
451                 goto drop;
452
453         sk = pt->af_packet_priv;
454         po = pkt_sk(sk);
455
456         if ((int) sk->sk_xid > 0 && sk->sk_xid != skb->xid)
457                 goto drop;
458
459         skb->dev = dev;
460
461         if (dev->hard_header) {
462                 /* The device has an explicit notion of ll header,
463                    exported to higher levels.
464
465                    Otherwise, the device hides datails of it frame
466                    structure, so that corresponding packet head
467                    never delivered to user.
468                  */
469                 if (sk->sk_type != SOCK_DGRAM)
470                         skb_push(skb, skb->data - skb->mac.raw);
471                 else if (skb->pkt_type == PACKET_OUTGOING) {
472                         /* Special case: outgoing packets have ll header at head */
473                         skb_pull(skb, skb->nh.raw - skb->data);
474                 }
475         }
476
477         snaplen = skb->len;
478
479         if (sk->sk_filter) {
480                 unsigned res = run_filter(skb, sk, snaplen);
481                 if (res == 0)
482                         goto drop_n_restore;
483                 if (snaplen > res)
484                         snaplen = res;
485         }
486
487         if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
488             (unsigned)sk->sk_rcvbuf)
489                 goto drop_n_acct;
490
491         if (skb_shared(skb)) {
492                 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
493                 if (nskb == NULL)
494                         goto drop_n_acct;
495
496                 if (skb_head != skb->data) {
497                         skb->data = skb_head;
498                         skb->len = skb_len;
499                 }
500                 kfree_skb(skb);
501                 skb = nskb;
502         }
503
504         sll = (struct sockaddr_ll*)skb->cb;
505         sll->sll_family = AF_PACKET;
506         sll->sll_hatype = dev->type;
507         sll->sll_protocol = skb->protocol;
508         sll->sll_pkttype = skb->pkt_type;
509         sll->sll_ifindex = dev->ifindex;
510         sll->sll_halen = 0;
511
512         if (dev->hard_header_parse)
513                 sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
514
515         if (pskb_trim(skb, snaplen))
516                 goto drop_n_acct;
517
518         skb_set_owner_r(skb, sk);
519         skb->dev = NULL;
520         dst_release(skb->dst);
521         skb->dst = NULL;
522
523         spin_lock(&sk->sk_receive_queue.lock);
524         po->stats.tp_packets++;
525         __skb_queue_tail(&sk->sk_receive_queue, skb);
526         spin_unlock(&sk->sk_receive_queue.lock);
527         sk->sk_data_ready(sk, skb->len);
528         return 0;
529
530 drop_n_acct:
531         spin_lock(&sk->sk_receive_queue.lock);
532         po->stats.tp_drops++;
533         spin_unlock(&sk->sk_receive_queue.lock);
534
535 drop_n_restore:
536         if (skb_head != skb->data && skb_shared(skb)) {
537                 skb->data = skb_head;
538                 skb->len = skb_len;
539         }
540 drop:
541         kfree_skb(skb);
542         return 0;
543 }
544
545 #ifdef CONFIG_PACKET_MMAP
546 static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt)
547 {
548         struct sock *sk;
549         struct packet_opt *po;
550         struct sockaddr_ll *sll;
551         struct tpacket_hdr *h;
552         u8 * skb_head = skb->data;
553         int skb_len = skb->len;
554         unsigned snaplen;
555         unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
556         unsigned short macoff, netoff;
557         struct sk_buff *copy_skb = NULL;
558
559         if (skb->pkt_type == PACKET_LOOPBACK)
560                 goto drop;
561
562         sk = pt->af_packet_priv;
563         po = pkt_sk(sk);
564
565         if (dev->hard_header) {
566                 if (sk->sk_type != SOCK_DGRAM)
567                         skb_push(skb, skb->data - skb->mac.raw);
568                 else if (skb->pkt_type == PACKET_OUTGOING) {
569                         /* Special case: outgoing packets have ll header at head */
570                         skb_pull(skb, skb->nh.raw - skb->data);
571                         if (skb->ip_summed == CHECKSUM_HW)
572                                 status |= TP_STATUS_CSUMNOTREADY;
573                 }
574         }
575
576         snaplen = skb->len;
577
578         if (sk->sk_filter) {
579                 unsigned res = run_filter(skb, sk, snaplen);
580                 if (res == 0)
581                         goto drop_n_restore;
582                 if (snaplen > res)
583                         snaplen = res;
584         }
585
586         if (sk->sk_type == SOCK_DGRAM) {
587                 macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
588         } else {
589                 unsigned maclen = skb->nh.raw - skb->data;
590                 netoff = TPACKET_ALIGN(TPACKET_HDRLEN + (maclen < 16 ? 16 : maclen));
591                 macoff = netoff - maclen;
592         }
593
594         if (macoff + snaplen > po->frame_size) {
595                 if (po->copy_thresh &&
596                     atomic_read(&sk->sk_rmem_alloc) + skb->truesize <
597                     (unsigned)sk->sk_rcvbuf) {
598                         if (skb_shared(skb)) {
599                                 copy_skb = skb_clone(skb, GFP_ATOMIC);
600                         } else {
601                                 copy_skb = skb_get(skb);
602                                 skb_head = skb->data;
603                         }
604                         if (copy_skb)
605                                 skb_set_owner_r(copy_skb, sk);
606                 }
607                 snaplen = po->frame_size - macoff;
608                 if ((int)snaplen < 0)
609                         snaplen = 0;
610         }
611         if (snaplen > skb->len-skb->data_len)
612                 snaplen = skb->len-skb->data_len;
613
614         spin_lock(&sk->sk_receive_queue.lock);
615         h = (struct tpacket_hdr *)packet_lookup_frame(po, po->head);
616         
617         if (h->tp_status)
618                 goto ring_is_full;
619         po->head = po->head != po->frame_max ? po->head+1 : 0;
620         po->stats.tp_packets++;
621         if (copy_skb) {
622                 status |= TP_STATUS_COPY;
623                 __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
624         }
625         if (!po->stats.tp_drops)
626                 status &= ~TP_STATUS_LOSING;
627         spin_unlock(&sk->sk_receive_queue.lock);
628
629         memcpy((u8*)h + macoff, skb->data, snaplen);
630
631         h->tp_len = skb->len;
632         h->tp_snaplen = snaplen;
633         h->tp_mac = macoff;
634         h->tp_net = netoff;
635         if (skb->stamp.tv_sec == 0) { 
636                 do_gettimeofday(&skb->stamp);
637                 sock_enable_timestamp(sk);
638         }
639         h->tp_sec = skb->stamp.tv_sec;
640         h->tp_usec = skb->stamp.tv_usec;
641
642         sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h)));
643         sll->sll_halen = 0;
644         if (dev->hard_header_parse)
645                 sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
646         sll->sll_family = AF_PACKET;
647         sll->sll_hatype = dev->type;
648         sll->sll_protocol = skb->protocol;
649         sll->sll_pkttype = skb->pkt_type;
650         sll->sll_ifindex = dev->ifindex;
651
652         h->tp_status = status;
653         mb();
654
655         {
656                 struct page *p_start, *p_end;
657                 u8 *h_end = (u8 *)h + macoff + snaplen - 1;
658
659                 p_start = virt_to_page(h);
660                 p_end = virt_to_page(h_end);
661                 while (p_start <= p_end) {
662                         flush_dcache_page(p_start);
663                         p_start++;
664                 }
665         }
666
667         sk->sk_data_ready(sk, 0);
668
669 drop_n_restore:
670         if (skb_head != skb->data && skb_shared(skb)) {
671                 skb->data = skb_head;
672                 skb->len = skb_len;
673         }
674 drop:
675         kfree_skb(skb);
676         return 0;
677
678 ring_is_full:
679         po->stats.tp_drops++;
680         spin_unlock(&sk->sk_receive_queue.lock);
681
682         sk->sk_data_ready(sk, 0);
683         if (copy_skb)
684                 kfree_skb(copy_skb);
685         goto drop_n_restore;
686 }
687
688 #endif
689
690
691 static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
692                           struct msghdr *msg, size_t len)
693 {
694         struct sock *sk = sock->sk;
695         struct sockaddr_ll *saddr=(struct sockaddr_ll *)msg->msg_name;
696         struct sk_buff *skb;
697         struct net_device *dev;
698         unsigned short proto;
699         unsigned char *addr;
700         int ifindex, err, reserve = 0;
701
702         /*
703          *      Get and verify the address. 
704          */
705          
706         if (saddr == NULL) {
707                 struct packet_opt *po = pkt_sk(sk);
708
709                 ifindex = po->ifindex;
710                 proto   = po->num;
711                 addr    = NULL;
712         } else {
713                 err = -EINVAL;
714                 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
715                         goto out;
716                 ifindex = saddr->sll_ifindex;
717                 proto   = saddr->sll_protocol;
718                 addr    = saddr->sll_addr;
719         }
720
721
722         dev = dev_get_by_index(ifindex);
723         err = -ENXIO;
724         if (dev == NULL)
725                 goto out_unlock;
726         if (sock->type == SOCK_RAW)
727                 reserve = dev->hard_header_len;
728
729         err = -EMSGSIZE;
730         if (len > dev->mtu+reserve)
731                 goto out_unlock;
732
733         skb = sock_alloc_send_skb(sk, len + LL_RESERVED_SPACE(dev),
734                                 msg->msg_flags & MSG_DONTWAIT, &err);
735         if (skb==NULL)
736                 goto out_unlock;
737
738         skb_reserve(skb, LL_RESERVED_SPACE(dev));
739         skb->nh.raw = skb->data;
740
741         if (dev->hard_header) {
742                 int res;
743                 err = -EINVAL;
744                 res = dev->hard_header(skb, dev, ntohs(proto), addr, NULL, len);
745                 if (sock->type != SOCK_DGRAM) {
746                         skb->tail = skb->data;
747                         skb->len = 0;
748                 } else if (res < 0)
749                         goto out_free;
750         }
751
752         /* Returns -EFAULT on error */
753         err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
754         if (err)
755                 goto out_free;
756
757         skb->protocol = proto;
758         skb->dev = dev;
759         skb->priority = sk->sk_priority;
760
761         err = -ENETDOWN;
762         if (!(dev->flags & IFF_UP))
763                 goto out_free;
764
765         /*
766          *      Now send it
767          */
768
769         err = dev_queue_xmit(skb);
770         if (err > 0 && (err = net_xmit_errno(err)) != 0)
771                 goto out_unlock;
772
773         dev_put(dev);
774
775         return(len);
776
777 out_free:
778         kfree_skb(skb);
779 out_unlock:
780         if (dev)
781                 dev_put(dev);
782 out:
783         return err;
784 }
785
786 /*
787  *      Close a PACKET socket. This is fairly simple. We immediately go
788  *      to 'closed' state and remove our protocol entry in the device list.
789  */
790
791 static int packet_release(struct socket *sock)
792 {
793         struct sock *sk = sock->sk;
794         struct packet_opt *po;
795
796         if (!sk)
797                 return 0;
798
799         po = pkt_sk(sk);
800
801         write_lock_bh(&packet_sklist_lock);
802         sk_del_node_init(sk);
803         write_unlock_bh(&packet_sklist_lock);
804
805         /*
806          *      Unhook packet receive handler.
807          */
808
809         if (po->running) {
810                 /*
811                  *      Remove the protocol hook
812                  */
813                 dev_remove_pack(&po->prot_hook);
814                 po->running = 0;
815                 po->num = 0;
816                 __sock_put(sk);
817         }
818
819 #ifdef CONFIG_PACKET_MULTICAST
820         packet_flush_mclist(sk);
821 #endif
822
823 #ifdef CONFIG_PACKET_MMAP
824         if (po->pg_vec) {
825                 struct tpacket_req req;
826                 memset(&req, 0, sizeof(req));
827                 packet_set_ring(sk, &req, 1);
828         }
829 #endif
830
831         clr_vx_info(&sk->sk_vx_info);
832         clr_nx_info(&sk->sk_nx_info);
833
834         /*
835          *      Now the socket is dead. No more input will appear.
836          */
837
838         sock_orphan(sk);
839         sock->sk = NULL;
840
841         /* Purge queues */
842
843         skb_queue_purge(&sk->sk_receive_queue);
844
845         sock_put(sk);
846         return 0;
847 }
848
849 /*
850  *      Attach a packet hook.
851  */
852
853 static int packet_do_bind(struct sock *sk, struct net_device *dev, int protocol)
854 {
855         struct packet_opt *po = pkt_sk(sk);
856         /*
857          *      Detach an existing hook if present.
858          */
859
860         lock_sock(sk);
861
862         spin_lock(&po->bind_lock);
863         if (po->running) {
864                 __sock_put(sk);
865                 po->running = 0;
866                 po->num = 0;
867                 spin_unlock(&po->bind_lock);
868                 dev_remove_pack(&po->prot_hook);
869                 spin_lock(&po->bind_lock);
870         }
871
872         po->num = protocol;
873         po->prot_hook.type = protocol;
874         po->prot_hook.dev = dev;
875
876         po->ifindex = dev ? dev->ifindex : 0;
877
878         if (protocol == 0)
879                 goto out_unlock;
880
881         if (dev) {
882                 if (dev->flags&IFF_UP) {
883                         dev_add_pack(&po->prot_hook);
884                         sock_hold(sk);
885                         po->running = 1;
886                 } else {
887                         sk->sk_err = ENETDOWN;
888                         if (!sock_flag(sk, SOCK_DEAD))
889                                 sk->sk_error_report(sk);
890                 }
891         } else {
892                 dev_add_pack(&po->prot_hook);
893                 sock_hold(sk);
894                 po->running = 1;
895         }
896
897 out_unlock:
898         spin_unlock(&po->bind_lock);
899         release_sock(sk);
900         return 0;
901 }
902
903 /*
904  *      Bind a packet socket to a device
905  */
906
907 #ifdef CONFIG_SOCK_PACKET
908
909 static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len)
910 {
911         struct sock *sk=sock->sk;
912         char name[15];
913         struct net_device *dev;
914         int err = -ENODEV;
915         
916         /*
917          *      Check legality
918          */
919          
920         if(addr_len!=sizeof(struct sockaddr))
921                 return -EINVAL;
922         strlcpy(name,uaddr->sa_data,sizeof(name));
923
924         dev = dev_get_by_name(name);
925         if (dev) {
926                 err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
927                 dev_put(dev);
928         }
929         return err;
930 }
931 #endif
932
933 static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
934 {
935         struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
936         struct sock *sk=sock->sk;
937         struct net_device *dev = NULL;
938         int err;
939
940
941         /*
942          *      Check legality
943          */
944          
945         if (addr_len < sizeof(struct sockaddr_ll))
946                 return -EINVAL;
947         if (sll->sll_family != AF_PACKET)
948                 return -EINVAL;
949
950         if (sll->sll_ifindex) {
951                 err = -ENODEV;
952                 dev = dev_get_by_index(sll->sll_ifindex);
953                 if (dev == NULL)
954                         goto out;
955         }
956         err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
957         if (dev)
958                 dev_put(dev);
959
960 out:
961         return err;
962 }
963
964
965 /*
966  *      Create a packet of type SOCK_PACKET. 
967  */
968
969 static int packet_create(struct socket *sock, int protocol)
970 {
971         struct sock *sk;
972         struct packet_opt *po;
973         int err;
974
975         if (!capable(CAP_NET_RAW))
976                 return -EPERM;
977         if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW
978 #ifdef CONFIG_SOCK_PACKET
979             && sock->type != SOCK_PACKET
980 #endif
981             )
982                 return -ESOCKTNOSUPPORT;
983
984         sock->state = SS_UNCONNECTED;
985
986         err = -ENOBUFS;
987         sk = sk_alloc(PF_PACKET, GFP_KERNEL, 1, NULL);
988         if (sk == NULL)
989                 goto out;
990
991         sock->ops = &packet_ops;
992 #ifdef CONFIG_SOCK_PACKET
993         if (sock->type == SOCK_PACKET)
994                 sock->ops = &packet_ops_spkt;
995 #endif
996         sock_init_data(sock,sk);
997         sk_set_owner(sk, THIS_MODULE);
998
999         po = sk->sk_protinfo = kmalloc(sizeof(*po), GFP_KERNEL);
1000         if (!po)
1001                 goto out_free;
1002         memset(po, 0, sizeof(*po));
1003         sk->sk_family = PF_PACKET;
1004         po->num = protocol;
1005
1006         sk->sk_destruct = packet_sock_destruct;
1007         atomic_inc(&packet_socks_nr);
1008
1009         set_vx_info(&sk->sk_vx_info, current->vx_info);
1010         sk->sk_xid = vx_current_xid();
1011         set_nx_info(&sk->sk_nx_info, current->nx_info);
1012         sk->sk_nid = nx_current_nid();
1013
1014         /*
1015          *      Attach a protocol block
1016          */
1017
1018         spin_lock_init(&po->bind_lock);
1019         po->prot_hook.func = packet_rcv;
1020 #ifdef CONFIG_SOCK_PACKET
1021         if (sock->type == SOCK_PACKET)
1022                 po->prot_hook.func = packet_rcv_spkt;
1023 #endif
1024         po->prot_hook.af_packet_priv = sk;
1025
1026         if (protocol) {
1027                 po->prot_hook.type = protocol;
1028                 dev_add_pack(&po->prot_hook);
1029                 sock_hold(sk);
1030                 po->running = 1;
1031         }
1032
1033         write_lock_bh(&packet_sklist_lock);
1034         sk_add_node(sk, &packet_sklist);
1035         write_unlock_bh(&packet_sklist_lock);
1036         return(0);
1037
1038 out_free:
1039         sk_free(sk);
1040 out:
1041         return err;
1042 }
1043
1044 /*
1045  *      Pull a packet from our receive queue and hand it to the user.
1046  *      If necessary we block.
1047  */
1048
1049 static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1050                           struct msghdr *msg, size_t len, int flags)
1051 {
1052         struct sock *sk = sock->sk;
1053         struct sk_buff *skb;
1054         int copied, err;
1055
1056         err = -EINVAL;
1057         if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
1058                 goto out;
1059
1060 #if 0
1061         /* What error should we return now? EUNATTACH? */
1062         if (pkt_sk(sk)->ifindex < 0)
1063                 return -ENODEV;
1064 #endif
1065
1066         /*
1067          *      If the address length field is there to be filled in, we fill
1068          *      it in now.
1069          */
1070
1071         if (sock->type == SOCK_PACKET)
1072                 msg->msg_namelen = sizeof(struct sockaddr_pkt);
1073         else
1074                 msg->msg_namelen = sizeof(struct sockaddr_ll);
1075
1076         /*
1077          *      Call the generic datagram receiver. This handles all sorts
1078          *      of horrible races and re-entrancy so we can forget about it
1079          *      in the protocol layers.
1080          *
1081          *      Now it will return ENETDOWN, if device have just gone down,
1082          *      but then it will block.
1083          */
1084
1085         skb=skb_recv_datagram(sk,flags,flags&MSG_DONTWAIT,&err);
1086
1087         /*
1088          *      An error occurred so return it. Because skb_recv_datagram() 
1089          *      handles the blocking we don't see and worry about blocking
1090          *      retries.
1091          */
1092
1093         if(skb==NULL)
1094                 goto out;
1095
1096         /*
1097          *      You lose any data beyond the buffer you gave. If it worries a
1098          *      user program they can ask the device for its MTU anyway.
1099          */
1100
1101         copied = skb->len;
1102         if (copied > len)
1103         {
1104                 copied=len;
1105                 msg->msg_flags|=MSG_TRUNC;
1106         }
1107
1108         err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1109         if (err)
1110                 goto out_free;
1111
1112         sock_recv_timestamp(msg, sk, skb);
1113
1114         if (msg->msg_name)
1115                 memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
1116
1117         /*
1118          *      Free or return the buffer as appropriate. Again this
1119          *      hides all the races and re-entrancy issues from us.
1120          */
1121         err = (flags&MSG_TRUNC) ? skb->len : copied;
1122
1123 out_free:
1124         skb_free_datagram(sk, skb);
1125 out:
1126         return err;
1127 }
1128
1129 #ifdef CONFIG_SOCK_PACKET
1130 static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1131                                int *uaddr_len, int peer)
1132 {
1133         struct net_device *dev;
1134         struct sock *sk = sock->sk;
1135
1136         if (peer)
1137                 return -EOPNOTSUPP;
1138
1139         uaddr->sa_family = AF_PACKET;
1140         dev = dev_get_by_index(pkt_sk(sk)->ifindex);
1141         if (dev) {
1142                 strlcpy(uaddr->sa_data, dev->name, 15);
1143                 dev_put(dev);
1144         } else
1145                 memset(uaddr->sa_data, 0, 14);
1146         *uaddr_len = sizeof(*uaddr);
1147
1148         return 0;
1149 }
1150 #endif
1151
1152 static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1153                           int *uaddr_len, int peer)
1154 {
1155         struct net_device *dev;
1156         struct sock *sk = sock->sk;
1157         struct packet_opt *po = pkt_sk(sk);
1158         struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
1159
1160         if (peer)
1161                 return -EOPNOTSUPP;
1162
1163         sll->sll_family = AF_PACKET;
1164         sll->sll_ifindex = po->ifindex;
1165         sll->sll_protocol = po->num;
1166         dev = dev_get_by_index(po->ifindex);
1167         if (dev) {
1168                 sll->sll_hatype = dev->type;
1169                 sll->sll_halen = dev->addr_len;
1170                 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
1171                 dev_put(dev);
1172         } else {
1173                 sll->sll_hatype = 0;    /* Bad: we have no ARPHRD_UNSPEC */
1174                 sll->sll_halen = 0;
1175         }
1176         *uaddr_len = sizeof(*sll);
1177
1178         return 0;
1179 }
1180
1181 #ifdef CONFIG_PACKET_MULTICAST
1182 static void packet_dev_mc(struct net_device *dev, struct packet_mclist *i, int what)
1183 {
1184         switch (i->type) {
1185         case PACKET_MR_MULTICAST:
1186                 if (what > 0)
1187                         dev_mc_add(dev, i->addr, i->alen, 0);
1188                 else
1189                         dev_mc_delete(dev, i->addr, i->alen, 0);
1190                 break;
1191         case PACKET_MR_PROMISC:
1192                 dev_set_promiscuity(dev, what);
1193                 break;
1194         case PACKET_MR_ALLMULTI:
1195                 dev_set_allmulti(dev, what);
1196                 break;
1197         default:;
1198         }
1199 }
1200
1201 static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1202 {
1203         for ( ; i; i=i->next) {
1204                 if (i->ifindex == dev->ifindex)
1205                         packet_dev_mc(dev, i, what);
1206         }
1207 }
1208
1209 static int packet_mc_add(struct sock *sk, struct packet_mreq *mreq)
1210 {
1211         struct packet_opt *po = pkt_sk(sk);
1212         struct packet_mclist *ml, *i;
1213         struct net_device *dev;
1214         int err;
1215
1216         rtnl_lock();
1217
1218         err = -ENODEV;
1219         dev = __dev_get_by_index(mreq->mr_ifindex);
1220         if (!dev)
1221                 goto done;
1222
1223         err = -EINVAL;
1224         if (mreq->mr_alen > dev->addr_len)
1225                 goto done;
1226
1227         err = -ENOBUFS;
1228         i = (struct packet_mclist *)kmalloc(sizeof(*i), GFP_KERNEL);
1229         if (i == NULL)
1230                 goto done;
1231
1232         err = 0;
1233         for (ml = po->mclist; ml; ml = ml->next) {
1234                 if (ml->ifindex == mreq->mr_ifindex &&
1235                     ml->type == mreq->mr_type &&
1236                     ml->alen == mreq->mr_alen &&
1237                     memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1238                         ml->count++;
1239                         /* Free the new element ... */
1240                         kfree(i);
1241                         goto done;
1242                 }
1243         }
1244
1245         i->type = mreq->mr_type;
1246         i->ifindex = mreq->mr_ifindex;
1247         i->alen = mreq->mr_alen;
1248         memcpy(i->addr, mreq->mr_address, i->alen);
1249         i->count = 1;
1250         i->next = po->mclist;
1251         po->mclist = i;
1252         packet_dev_mc(dev, i, +1);
1253
1254 done:
1255         rtnl_unlock();
1256         return err;
1257 }
1258
1259 static int packet_mc_drop(struct sock *sk, struct packet_mreq *mreq)
1260 {
1261         struct packet_mclist *ml, **mlp;
1262
1263         rtnl_lock();
1264
1265         for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
1266                 if (ml->ifindex == mreq->mr_ifindex &&
1267                     ml->type == mreq->mr_type &&
1268                     ml->alen == mreq->mr_alen &&
1269                     memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1270                         if (--ml->count == 0) {
1271                                 struct net_device *dev;
1272                                 *mlp = ml->next;
1273                                 dev = dev_get_by_index(ml->ifindex);
1274                                 if (dev) {
1275                                         packet_dev_mc(dev, ml, -1);
1276                                         dev_put(dev);
1277                                 }
1278                                 kfree(ml);
1279                         }
1280                         rtnl_unlock();
1281                         return 0;
1282                 }
1283         }
1284         rtnl_unlock();
1285         return -EADDRNOTAVAIL;
1286 }
1287
1288 static void packet_flush_mclist(struct sock *sk)
1289 {
1290         struct packet_opt *po = pkt_sk(sk);
1291         struct packet_mclist *ml;
1292
1293         if (!po->mclist)
1294                 return;
1295
1296         rtnl_lock();
1297         while ((ml = po->mclist) != NULL) {
1298                 struct net_device *dev;
1299
1300                 po->mclist = ml->next;
1301                 if ((dev = dev_get_by_index(ml->ifindex)) != NULL) {
1302                         packet_dev_mc(dev, ml, -1);
1303                         dev_put(dev);
1304                 }
1305                 kfree(ml);
1306         }
1307         rtnl_unlock();
1308 }
1309 #endif
1310
1311 static int
1312 packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen)
1313 {
1314         struct sock *sk = sock->sk;
1315         int ret;
1316
1317         if (level != SOL_PACKET)
1318                 return -ENOPROTOOPT;
1319
1320         switch(optname) {
1321 #ifdef CONFIG_PACKET_MULTICAST
1322         case PACKET_ADD_MEMBERSHIP:     
1323         case PACKET_DROP_MEMBERSHIP:
1324         {
1325                 struct packet_mreq mreq;
1326                 if (optlen<sizeof(mreq))
1327                         return -EINVAL;
1328                 if (copy_from_user(&mreq,optval,sizeof(mreq)))
1329                         return -EFAULT;
1330                 if (optname == PACKET_ADD_MEMBERSHIP)
1331                         ret = packet_mc_add(sk, &mreq);
1332                 else
1333                         ret = packet_mc_drop(sk, &mreq);
1334                 return ret;
1335         }
1336 #endif
1337 #ifdef CONFIG_PACKET_MMAP
1338         case PACKET_RX_RING:
1339         {
1340                 struct tpacket_req req;
1341
1342                 if (optlen<sizeof(req))
1343                         return -EINVAL;
1344                 if (copy_from_user(&req,optval,sizeof(req)))
1345                         return -EFAULT;
1346                 return packet_set_ring(sk, &req, 0);
1347         }
1348         case PACKET_COPY_THRESH:
1349         {
1350                 int val;
1351
1352                 if (optlen!=sizeof(val))
1353                         return -EINVAL;
1354                 if (copy_from_user(&val,optval,sizeof(val)))
1355                         return -EFAULT;
1356
1357                 pkt_sk(sk)->copy_thresh = val;
1358                 return 0;
1359         }
1360 #endif
1361         default:
1362                 return -ENOPROTOOPT;
1363         }
1364 }
1365
1366 int packet_getsockopt(struct socket *sock, int level, int optname,
1367                       char __user *optval, int __user *optlen)
1368 {
1369         int len;
1370         struct sock *sk = sock->sk;
1371         struct packet_opt *po = pkt_sk(sk);
1372
1373         if (level != SOL_PACKET)
1374                 return -ENOPROTOOPT;
1375
1376         if (get_user(len,optlen))
1377                 return -EFAULT;
1378
1379         if (len < 0)
1380                 return -EINVAL;
1381                 
1382         switch(optname) {
1383         case PACKET_STATISTICS:
1384         {
1385                 struct tpacket_stats st;
1386
1387                 if (len > sizeof(struct tpacket_stats))
1388                         len = sizeof(struct tpacket_stats);
1389                 spin_lock_bh(&sk->sk_receive_queue.lock);
1390                 st = po->stats;
1391                 memset(&po->stats, 0, sizeof(st));
1392                 spin_unlock_bh(&sk->sk_receive_queue.lock);
1393                 st.tp_packets += st.tp_drops;
1394
1395                 if (copy_to_user(optval, &st, len))
1396                         return -EFAULT;
1397                 break;
1398         }
1399         default:
1400                 return -ENOPROTOOPT;
1401         }
1402
1403         if (put_user(len, optlen))
1404                 return -EFAULT;
1405         return 0;
1406 }
1407
1408
1409 static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
1410 {
1411         struct sock *sk;
1412         struct hlist_node *node;
1413         struct net_device *dev = (struct net_device*)data;
1414
1415         read_lock(&packet_sklist_lock);
1416         sk_for_each(sk, node, &packet_sklist) {
1417                 struct packet_opt *po = pkt_sk(sk);
1418
1419                 switch (msg) {
1420                 case NETDEV_UNREGISTER:
1421 #ifdef CONFIG_PACKET_MULTICAST
1422                         if (po->mclist)
1423                                 packet_dev_mclist(dev, po->mclist, -1);
1424                         // fallthrough
1425 #endif
1426                 case NETDEV_DOWN:
1427                         if (dev->ifindex == po->ifindex) {
1428                                 spin_lock(&po->bind_lock);
1429                                 if (po->running) {
1430                                         __dev_remove_pack(&po->prot_hook);
1431                                         __sock_put(sk);
1432                                         po->running = 0;
1433                                         sk->sk_err = ENETDOWN;
1434                                         if (!sock_flag(sk, SOCK_DEAD))
1435                                                 sk->sk_error_report(sk);
1436                                 }
1437                                 if (msg == NETDEV_UNREGISTER) {
1438                                         po->ifindex = -1;
1439                                         po->prot_hook.dev = NULL;
1440                                 }
1441                                 spin_unlock(&po->bind_lock);
1442                         }
1443                         break;
1444                 case NETDEV_UP:
1445                         spin_lock(&po->bind_lock);
1446                         if (dev->ifindex == po->ifindex && po->num &&
1447                             !po->running) {
1448                                 dev_add_pack(&po->prot_hook);
1449                                 sock_hold(sk);
1450                                 po->running = 1;
1451                         }
1452                         spin_unlock(&po->bind_lock);
1453                         break;
1454                 }
1455         }
1456         read_unlock(&packet_sklist_lock);
1457         return NOTIFY_DONE;
1458 }
1459
1460
1461 static int packet_ioctl(struct socket *sock, unsigned int cmd,
1462                         unsigned long arg)
1463 {
1464         struct sock *sk = sock->sk;
1465
1466         switch(cmd) {
1467                 case SIOCOUTQ:
1468                 {
1469                         int amount = atomic_read(&sk->sk_wmem_alloc);
1470                         return put_user(amount, (int __user *)arg);
1471                 }
1472                 case SIOCINQ:
1473                 {
1474                         struct sk_buff *skb;
1475                         int amount = 0;
1476
1477                         spin_lock_bh(&sk->sk_receive_queue.lock);
1478                         skb = skb_peek(&sk->sk_receive_queue);
1479                         if (skb)
1480                                 amount = skb->len;
1481                         spin_unlock_bh(&sk->sk_receive_queue.lock);
1482                         return put_user(amount, (int __user *)arg);
1483                 }
1484                 case SIOCGSTAMP:
1485                         return sock_get_timestamp(sk, (struct timeval __user *)arg);
1486                         
1487 #ifdef CONFIG_INET
1488                 case SIOCADDRT:
1489                 case SIOCDELRT:
1490                 case SIOCDARP:
1491                 case SIOCGARP:
1492                 case SIOCSARP:
1493                 case SIOCGIFADDR:
1494                 case SIOCSIFADDR:
1495                 case SIOCGIFBRDADDR:
1496                 case SIOCSIFBRDADDR:
1497                 case SIOCGIFNETMASK:
1498                 case SIOCSIFNETMASK:
1499                 case SIOCGIFDSTADDR:
1500                 case SIOCSIFDSTADDR:
1501                 case SIOCSIFFLAGS:
1502                         return inet_dgram_ops.ioctl(sock, cmd, arg);
1503 #endif
1504
1505                 default:
1506                         return dev_ioctl(cmd, (void __user *)arg);
1507         }
1508         return 0;
1509 }
1510
1511 #ifndef CONFIG_PACKET_MMAP
1512 #define packet_mmap sock_no_mmap
1513 #define packet_poll datagram_poll
1514 #else
1515
1516 unsigned int packet_poll(struct file * file, struct socket *sock, poll_table *wait)
1517 {
1518         struct sock *sk = sock->sk;
1519         struct packet_opt *po = pkt_sk(sk);
1520         unsigned int mask = datagram_poll(file, sock, wait);
1521
1522         spin_lock_bh(&sk->sk_receive_queue.lock);
1523         if (po->pg_vec) {
1524                 unsigned last = po->head ? po->head-1 : po->frame_max;
1525                 struct tpacket_hdr *h;
1526
1527                 h = (struct tpacket_hdr *)packet_lookup_frame(po, last);
1528
1529                 if (h->tp_status)
1530                         mask |= POLLIN | POLLRDNORM;
1531         }
1532         spin_unlock_bh(&sk->sk_receive_queue.lock);
1533         return mask;
1534 }
1535
1536
1537 /* Dirty? Well, I still did not learn better way to account
1538  * for user mmaps.
1539  */
1540
1541 static void packet_mm_open(struct vm_area_struct *vma)
1542 {
1543         struct file *file = vma->vm_file;
1544         struct inode *inode = file->f_dentry->d_inode;
1545         struct socket * sock = SOCKET_I(inode);
1546         struct sock *sk = sock->sk;
1547         
1548         if (sk)
1549                 atomic_inc(&pkt_sk(sk)->mapped);
1550 }
1551
1552 static void packet_mm_close(struct vm_area_struct *vma)
1553 {
1554         struct file *file = vma->vm_file;
1555         struct inode *inode = file->f_dentry->d_inode;
1556         struct socket * sock = SOCKET_I(inode);
1557         struct sock *sk = sock->sk;
1558         
1559         if (sk)
1560                 atomic_dec(&pkt_sk(sk)->mapped);
1561 }
1562
1563 static struct vm_operations_struct packet_mmap_ops = {
1564         .open = packet_mm_open,
1565         .close =packet_mm_close,
1566 };
1567
1568 static inline struct page *pg_vec_endpage(char *one_pg_vec, unsigned int order)
1569 {
1570         return virt_to_page(one_pg_vec + (PAGE_SIZE << order) - 1);
1571 }
1572
1573 static void free_pg_vec(char **pg_vec, unsigned order, unsigned len)
1574 {
1575         int i;
1576
1577         for (i=0; i<len; i++) {
1578                 if (pg_vec[i]) {
1579                         struct page *page, *pend;
1580
1581                         pend = pg_vec_endpage(pg_vec[i], order);
1582                         for (page = virt_to_page(pg_vec[i]); page <= pend; page++)
1583                                 ClearPageReserved(page);
1584                         free_pages((unsigned long)pg_vec[i], order);
1585                 }
1586         }
1587         kfree(pg_vec);
1588 }
1589
1590
1591 static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing)
1592 {
1593         char **pg_vec = NULL;
1594         struct packet_opt *po = pkt_sk(sk);
1595         int was_running, num, order = 0;
1596         int err = 0;
1597         
1598         if (req->tp_block_nr) {
1599                 int i, l;
1600
1601                 /* Sanity tests and some calculations */
1602
1603                 if (po->pg_vec)
1604                         return -EBUSY;
1605
1606                 if ((int)req->tp_block_size <= 0)
1607                         return -EINVAL;
1608                 if (req->tp_block_size&(PAGE_SIZE-1))
1609                         return -EINVAL;
1610                 if (req->tp_frame_size < TPACKET_HDRLEN)
1611                         return -EINVAL;
1612                 if (req->tp_frame_size&(TPACKET_ALIGNMENT-1))
1613                         return -EINVAL;
1614
1615                 po->frames_per_block = req->tp_block_size/req->tp_frame_size;
1616                 if (po->frames_per_block <= 0)
1617                         return -EINVAL;
1618                 if (po->frames_per_block*req->tp_block_nr != req->tp_frame_nr)
1619                         return -EINVAL;
1620                 /* OK! */
1621
1622                 /* Allocate page vector */
1623                 while ((PAGE_SIZE<<order) < req->tp_block_size)
1624                         order++;
1625
1626                 err = -ENOMEM;
1627
1628                 pg_vec = kmalloc(req->tp_block_nr*sizeof(char *), GFP_KERNEL);
1629                 if (pg_vec == NULL)
1630                         goto out;
1631                 memset(pg_vec, 0, req->tp_block_nr*sizeof(char **));
1632
1633                 for (i=0; i<req->tp_block_nr; i++) {
1634                         struct page *page, *pend;
1635                         pg_vec[i] = (char *)__get_free_pages(GFP_KERNEL, order);
1636                         if (!pg_vec[i])
1637                                 goto out_free_pgvec;
1638
1639                         pend = pg_vec_endpage(pg_vec[i], order);
1640                         for (page = virt_to_page(pg_vec[i]); page <= pend; page++)
1641                                 SetPageReserved(page);
1642                 }
1643                 /* Page vector is allocated */
1644
1645                 l = 0;
1646                 for (i=0; i<req->tp_block_nr; i++) {
1647                         char *ptr = pg_vec[i];
1648                         struct tpacket_hdr *header;
1649                         int k;
1650
1651                         for (k=0; k<po->frames_per_block; k++) {
1652                                 
1653                                 header = (struct tpacket_hdr*)ptr;
1654                                 header->tp_status = TP_STATUS_KERNEL;
1655                                 ptr += req->tp_frame_size;
1656                         }
1657                 }
1658                 /* Done */
1659         } else {
1660                 if (req->tp_frame_nr)
1661                         return -EINVAL;
1662         }
1663
1664         lock_sock(sk);
1665
1666         /* Detach socket from network */
1667         spin_lock(&po->bind_lock);
1668         was_running = po->running;
1669         num = po->num;
1670         if (was_running) {
1671                 __dev_remove_pack(&po->prot_hook);
1672                 po->num = 0;
1673                 po->running = 0;
1674                 __sock_put(sk);
1675         }
1676         spin_unlock(&po->bind_lock);
1677                 
1678         synchronize_net();
1679
1680         err = -EBUSY;
1681         if (closing || atomic_read(&po->mapped) == 0) {
1682                 err = 0;
1683 #define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
1684
1685                 spin_lock_bh(&sk->sk_receive_queue.lock);
1686                 pg_vec = XC(po->pg_vec, pg_vec);
1687                 po->frame_max = req->tp_frame_nr-1;
1688                 po->head = 0;
1689                 po->frame_size = req->tp_frame_size;
1690                 spin_unlock_bh(&sk->sk_receive_queue.lock);
1691
1692                 order = XC(po->pg_vec_order, order);
1693                 req->tp_block_nr = XC(po->pg_vec_len, req->tp_block_nr);
1694
1695                 po->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
1696                 po->prot_hook.func = po->pg_vec ? tpacket_rcv : packet_rcv;
1697                 skb_queue_purge(&sk->sk_receive_queue);
1698 #undef XC
1699                 if (atomic_read(&po->mapped))
1700                         printk(KERN_DEBUG "packet_mmap: vma is busy: %d\n", atomic_read(&po->mapped));
1701         }
1702
1703         spin_lock(&po->bind_lock);
1704         if (was_running && !po->running) {
1705                 sock_hold(sk);
1706                 po->running = 1;
1707                 po->num = num;
1708                 dev_add_pack(&po->prot_hook);
1709         }
1710         spin_unlock(&po->bind_lock);
1711
1712         release_sock(sk);
1713
1714 out_free_pgvec:
1715         if (pg_vec)
1716                 free_pg_vec(pg_vec, order, req->tp_block_nr);
1717 out:
1718         return err;
1719 }
1720
1721 static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1722 {
1723         struct sock *sk = sock->sk;
1724         struct packet_opt *po = pkt_sk(sk);
1725         unsigned long size;
1726         unsigned long start;
1727         int err = -EINVAL;
1728         int i;
1729
1730         if (vma->vm_pgoff)
1731                 return -EINVAL;
1732
1733         size = vma->vm_end - vma->vm_start;
1734
1735         lock_sock(sk);
1736         if (po->pg_vec == NULL)
1737                 goto out;
1738         if (size != po->pg_vec_len*po->pg_vec_pages*PAGE_SIZE)
1739                 goto out;
1740
1741         atomic_inc(&po->mapped);
1742         start = vma->vm_start;
1743         err = -EAGAIN;
1744         for (i=0; i<po->pg_vec_len; i++) {
1745                 if (remap_page_range(vma, start, __pa(po->pg_vec[i]),
1746                                      po->pg_vec_pages*PAGE_SIZE,
1747                                      vma->vm_page_prot))
1748                         goto out;
1749                 start += po->pg_vec_pages*PAGE_SIZE;
1750         }
1751         vma->vm_ops = &packet_mmap_ops;
1752         err = 0;
1753
1754 out:
1755         release_sock(sk);
1756         return err;
1757 }
1758 #endif
1759
1760
1761 #ifdef CONFIG_SOCK_PACKET
1762 struct proto_ops packet_ops_spkt = {
1763         .family =       PF_PACKET,
1764         .owner =        THIS_MODULE,
1765         .release =      packet_release,
1766         .bind =         packet_bind_spkt,
1767         .connect =      sock_no_connect,
1768         .socketpair =   sock_no_socketpair,
1769         .accept =       sock_no_accept,
1770         .getname =      packet_getname_spkt,
1771         .poll =         datagram_poll,
1772         .ioctl =        packet_ioctl,
1773         .listen =       sock_no_listen,
1774         .shutdown =     sock_no_shutdown,
1775         .setsockopt =   sock_no_setsockopt,
1776         .getsockopt =   sock_no_getsockopt,
1777         .sendmsg =      packet_sendmsg_spkt,
1778         .recvmsg =      packet_recvmsg,
1779         .mmap =         sock_no_mmap,
1780         .sendpage =     sock_no_sendpage,
1781 };
1782 #endif
1783
1784 struct proto_ops packet_ops = {
1785         .family =       PF_PACKET,
1786         .owner =        THIS_MODULE,
1787         .release =      packet_release,
1788         .bind =         packet_bind,
1789         .connect =      sock_no_connect,
1790         .socketpair =   sock_no_socketpair,
1791         .accept =       sock_no_accept,
1792         .getname =      packet_getname, 
1793         .poll =         packet_poll,
1794         .ioctl =        packet_ioctl,
1795         .listen =       sock_no_listen,
1796         .shutdown =     sock_no_shutdown,
1797         .setsockopt =   packet_setsockopt,
1798         .getsockopt =   packet_getsockopt,
1799         .sendmsg =      packet_sendmsg,
1800         .recvmsg =      packet_recvmsg,
1801         .mmap =         packet_mmap,
1802         .sendpage =     sock_no_sendpage,
1803 };
1804 EXPORT_SYMBOL(packet_ops);
1805
1806 struct net_proto_family packet_family_ops = {
1807         .family =       PF_PACKET,
1808         .create =       packet_create,
1809         .owner  =       THIS_MODULE,
1810 };
1811 EXPORT_SYMBOL(packet_family_ops);
1812
1813 static struct notifier_block packet_netdev_notifier = {
1814         .notifier_call =packet_notifier,
1815 };
1816
1817 #ifdef CONFIG_PROC_FS
1818 static inline struct sock *packet_seq_idx(loff_t off)
1819 {
1820         struct sock *s;
1821         struct hlist_node *node;
1822
1823         sk_for_each(s, node, &packet_sklist) {
1824                 if (!off--)
1825                         return s;
1826         }
1827         return NULL;
1828 }
1829
1830 static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
1831 {
1832         read_lock(&packet_sklist_lock);
1833         return *pos ? packet_seq_idx(*pos - 1) : SEQ_START_TOKEN;
1834 }
1835
1836 static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1837 {
1838         ++*pos;
1839         return  (v == SEQ_START_TOKEN) 
1840                 ? sk_head(&packet_sklist) 
1841                 : sk_next((struct sock*)v) ;
1842 }
1843
1844 static void packet_seq_stop(struct seq_file *seq, void *v)
1845 {
1846         read_unlock(&packet_sklist_lock);               
1847 }
1848
1849 static int packet_seq_show(struct seq_file *seq, void *v) 
1850 {
1851         if (v == SEQ_START_TOKEN)
1852                 seq_puts(seq, "sk       RefCnt Type Proto  Iface R Rmem   User   Inode\n");
1853         else {
1854                 struct sock *s = v;
1855                 const struct packet_opt *po = pkt_sk(s);
1856
1857                 seq_printf(seq,
1858                            "%p %-6d %-4d %04x   %-5d %1d %-6u %-6u %-6lu\n",
1859                            s,
1860                            atomic_read(&s->sk_refcnt),
1861                            s->sk_type,
1862                            ntohs(po->num),
1863                            po->ifindex,
1864                            po->running,
1865                            atomic_read(&s->sk_rmem_alloc),
1866                            sock_i_uid(s),
1867                            sock_i_ino(s) );
1868         }
1869
1870         return 0;
1871 }
1872
1873 static struct seq_operations packet_seq_ops = {
1874         .start  = packet_seq_start,
1875         .next   = packet_seq_next,
1876         .stop   = packet_seq_stop,
1877         .show   = packet_seq_show,
1878 };
1879
1880 static int packet_seq_open(struct inode *inode, struct file *file)
1881 {
1882         return seq_open(file, &packet_seq_ops);
1883 }
1884
1885 static struct file_operations packet_seq_fops = {
1886         .owner          = THIS_MODULE,
1887         .open           = packet_seq_open,
1888         .read           = seq_read,
1889         .llseek         = seq_lseek,
1890         .release        = seq_release,
1891 };
1892
1893 #endif
1894
1895 static void __exit packet_exit(void)
1896 {
1897         proc_net_remove("packet");
1898         unregister_netdevice_notifier(&packet_netdev_notifier);
1899         sock_unregister(PF_PACKET);
1900         return;
1901 }
1902
1903 static int __init packet_init(void)
1904 {
1905         sock_register(&packet_family_ops);
1906         register_netdevice_notifier(&packet_netdev_notifier);
1907         proc_net_fops_create("packet", 0, &packet_seq_fops);
1908
1909         return 0;
1910 }
1911
1912 module_init(packet_init);
1913 module_exit(packet_exit);
1914 MODULE_LICENSE("GPL");
1915 MODULE_ALIAS_NETPROTO(PF_PACKET);