Merge to VServer 1.9.2 (patch-2.6.8.1-vs1.9.2.diff)
[linux-2.6.git] / net / packet / af_packet.c
1 /*
2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
3  *              operating system.  INET is implemented using the  BSD Socket
4  *              interface as the means of communication with the user level.
5  *
6  *              PACKET - implements raw packet sockets.
7  *
8  * Version:     $Id: af_packet.c,v 1.61 2002/02/08 03:57:19 davem Exp $
9  *
10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
13  *
14  * Fixes:       
15  *              Alan Cox        :       verify_area() now used correctly
16  *              Alan Cox        :       new skbuff lists, look ma no backlogs!
17  *              Alan Cox        :       tidied skbuff lists.
18  *              Alan Cox        :       Now uses generic datagram routines I
19  *                                      added. Also fixed the peek/read crash
20  *                                      from all old Linux datagram code.
21  *              Alan Cox        :       Uses the improved datagram code.
22  *              Alan Cox        :       Added NULL's for socket options.
23  *              Alan Cox        :       Re-commented the code.
24  *              Alan Cox        :       Use new kernel side addressing
25  *              Rob Janssen     :       Correct MTU usage.
26  *              Dave Platt      :       Counter leaks caused by incorrect
27  *                                      interrupt locking and some slightly
28  *                                      dubious gcc output. Can you read
29  *                                      compiler: it said _VOLATILE_
30  *      Richard Kooijman        :       Timestamp fixes.
31  *              Alan Cox        :       New buffers. Use sk->mac.raw.
32  *              Alan Cox        :       sendmsg/recvmsg support.
33  *              Alan Cox        :       Protocol setting support
34  *      Alexey Kuznetsov        :       Untied from IPv4 stack.
35  *      Cyrus Durgin            :       Fixed kerneld for kmod.
36  *      Michal Ostrowski        :       Module initialization cleanup.
37  *         Ulises Alonso        :       Frame number limit removal and 
38  *                                      packet_set_ring memory leak.
39  *
40  *              This program is free software; you can redistribute it and/or
41  *              modify it under the terms of the GNU General Public License
42  *              as published by the Free Software Foundation; either version
43  *              2 of the License, or (at your option) any later version.
44  *
45  */
46  
47 #include <linux/config.h>
48 #include <linux/types.h>
49 #include <linux/sched.h>
50 #include <linux/mm.h>
51 #include <linux/fcntl.h>
52 #include <linux/socket.h>
53 #include <linux/in.h>
54 #include <linux/inet.h>
55 #include <linux/netdevice.h>
56 #include <linux/if_packet.h>
57 #include <linux/wireless.h>
58 #include <linux/kmod.h>
59 #include <net/ip.h>
60 #include <net/protocol.h>
61 #include <linux/skbuff.h>
62 #include <net/sock.h>
63 #include <linux/errno.h>
64 #include <linux/timer.h>
65 #include <asm/system.h>
66 #include <asm/uaccess.h>
67 #include <asm/ioctls.h>
68 #include <linux/proc_fs.h>
69 #include <linux/seq_file.h>
70 #include <linux/poll.h>
71 #include <linux/module.h>
72 #include <linux/init.h>
73
74 #ifdef CONFIG_INET
75 #include <net/inet_common.h>
76 #endif
77
78 #define CONFIG_SOCK_PACKET      1
79
80 /*
81    Proposed replacement for SIOC{ADD,DEL}MULTI and
82    IFF_PROMISC, IFF_ALLMULTI flags.
83
84    It is more expensive, but I believe,
85    it is really correct solution: reentereble, safe and fault tolerant.
86
87    IFF_PROMISC/IFF_ALLMULTI/SIOC{ADD/DEL}MULTI are faked by keeping
88    reference count and global flag, so that real status is
89    (gflag|(count != 0)), so that we can use obsolete faulty interface
90    not harming clever users.
91  */
92 #define CONFIG_PACKET_MULTICAST 1
93
94 /*
95    Assumptions:
96    - if device has no dev->hard_header routine, it adds and removes ll header
97      inside itself. In this case ll header is invisible outside of device,
98      but higher levels still should reserve dev->hard_header_len.
99      Some devices are enough clever to reallocate skb, when header
100      will not fit to reserved space (tunnel), another ones are silly
101      (PPP).
102    - packet socket receives packets with pulled ll header,
103      so that SOCK_RAW should push it back.
104
105 On receive:
106 -----------
107
108 Incoming, dev->hard_header!=NULL
109    mac.raw -> ll header
110    data    -> data
111
112 Outgoing, dev->hard_header!=NULL
113    mac.raw -> ll header
114    data    -> ll header
115
116 Incoming, dev->hard_header==NULL
117    mac.raw -> UNKNOWN position. It is very likely, that it points to ll header.
118               PPP makes it, that is wrong, because introduce assymetry
119               between rx and tx paths.
120    data    -> data
121
122 Outgoing, dev->hard_header==NULL
123    mac.raw -> data. ll header is still not built!
124    data    -> data
125
126 Resume
127   If dev->hard_header==NULL we are unlikely to restore sensible ll header.
128
129
130 On transmit:
131 ------------
132
133 dev->hard_header != NULL
134    mac.raw -> ll header
135    data    -> ll header
136
137 dev->hard_header == NULL (ll header is added by device, we cannot control it)
138    mac.raw -> data
139    data -> data
140
141    We should set nh.raw on output to correct posistion,
142    packet classifier depends on it.
143  */
144
145 /* List of all packet sockets. */
146 HLIST_HEAD(packet_sklist);
147 static rwlock_t packet_sklist_lock = RW_LOCK_UNLOCKED;
148
149 atomic_t packet_socks_nr;
150
151
152 /* Private packet socket structures. */
153
154 #ifdef CONFIG_PACKET_MULTICAST
155 struct packet_mclist
156 {
157         struct packet_mclist    *next;
158         int                     ifindex;
159         int                     count;
160         unsigned short          type;
161         unsigned short          alen;
162         unsigned char           addr[8];
163 };
164 #endif
165 #ifdef CONFIG_PACKET_MMAP
166 static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing);
167 #endif
168
169 static void packet_flush_mclist(struct sock *sk);
170
171 struct packet_opt
172 {
173         struct tpacket_stats    stats;
174 #ifdef CONFIG_PACKET_MMAP
175         unsigned long           *pg_vec;
176         unsigned int            head;
177         unsigned int            frames_per_block;
178         unsigned int            frame_size;
179         unsigned int            frame_max;
180         int                     copy_thresh;
181 #endif
182         struct packet_type      prot_hook;
183         spinlock_t              bind_lock;
184         char                    running;        /* prot_hook is attached*/
185         int                     ifindex;        /* bound device         */
186         unsigned short          num;
187 #ifdef CONFIG_PACKET_MULTICAST
188         struct packet_mclist    *mclist;
189 #endif
190 #ifdef CONFIG_PACKET_MMAP
191         atomic_t                mapped;
192         unsigned int            pg_vec_order;
193         unsigned int            pg_vec_pages;
194         unsigned int            pg_vec_len;
195 #endif
196 };
197
198 #ifdef CONFIG_PACKET_MMAP
199
200 static inline unsigned long packet_lookup_frame(struct packet_opt *po, unsigned int position)
201 {
202         unsigned int pg_vec_pos, frame_offset;
203         unsigned long frame;
204
205         pg_vec_pos = position / po->frames_per_block;
206         frame_offset = position % po->frames_per_block;
207
208         frame = (unsigned long) (po->pg_vec[pg_vec_pos] + (frame_offset * po->frame_size));
209         
210         return frame;
211 }
212 #endif
213
214 #define pkt_sk(__sk) ((struct packet_opt *)(__sk)->sk_protinfo)
215
216 void packet_sock_destruct(struct sock *sk)
217 {
218         BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
219         BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
220         BUG_ON(sk->sk_nx_info);
221         BUG_ON(sk->sk_vx_info);
222
223         if (!sock_flag(sk, SOCK_DEAD)) {
224                 printk("Attempt to release alive packet socket: %p\n", sk);
225                 return;
226         }
227
228         if (pkt_sk(sk))
229                 kfree(pkt_sk(sk));
230         atomic_dec(&packet_socks_nr);
231 #ifdef PACKET_REFCNT_DEBUG
232         printk(KERN_DEBUG "PACKET socket %p is free, %d are alive\n", sk, atomic_read(&packet_socks_nr));
233 #endif
234 }
235
236
237 extern struct proto_ops packet_ops;
238
239 #ifdef CONFIG_SOCK_PACKET
240 extern struct proto_ops packet_ops_spkt;
241
242 static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt)
243 {
244         struct sock *sk;
245         struct sockaddr_pkt *spkt;
246
247         /*
248          *      When we registered the protocol we saved the socket in the data
249          *      field for just this event.
250          */
251
252         sk = pt->af_packet_priv;
253         
254         /*
255          *      Yank back the headers [hope the device set this
256          *      right or kerboom...]
257          *
258          *      Incoming packets have ll header pulled,
259          *      push it back.
260          *
261          *      For outgoing ones skb->data == skb->mac.raw
262          *      so that this procedure is noop.
263          */
264
265         if (skb->pkt_type == PACKET_LOOPBACK)
266                 goto out;
267
268         if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
269                 goto oom;
270
271         /* drop any routing info */
272         dst_release(skb->dst);
273         skb->dst = NULL;
274
275         spkt = (struct sockaddr_pkt*)skb->cb;
276
277         skb_push(skb, skb->data-skb->mac.raw);
278
279         /*
280          *      The SOCK_PACKET socket receives _all_ frames.
281          */
282
283         spkt->spkt_family = dev->type;
284         strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
285         spkt->spkt_protocol = skb->protocol;
286
287         /*
288          *      Charge the memory to the socket. This is done specifically
289          *      to prevent sockets using all the memory up.
290          */
291
292         if (sock_queue_rcv_skb(sk,skb) == 0)
293                 return 0;
294
295 out:
296         kfree_skb(skb);
297 oom:
298         return 0;
299 }
300
301
302 /*
303  *      Output a raw packet to a device layer. This bypasses all the other
304  *      protocol layers and you must therefore supply it with a complete frame
305  */
306  
307 static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
308                                struct msghdr *msg, size_t len)
309 {
310         struct sock *sk = sock->sk;
311         struct sockaddr_pkt *saddr=(struct sockaddr_pkt *)msg->msg_name;
312         struct sk_buff *skb;
313         struct net_device *dev;
314         unsigned short proto=0;
315         int err;
316         
317         /*
318          *      Get and verify the address. 
319          */
320
321         if (saddr)
322         {
323                 if (msg->msg_namelen < sizeof(struct sockaddr))
324                         return(-EINVAL);
325                 if (msg->msg_namelen==sizeof(struct sockaddr_pkt))
326                         proto=saddr->spkt_protocol;
327         }
328         else
329                 return(-ENOTCONN);      /* SOCK_PACKET must be sent giving an address */
330
331         /*
332          *      Find the device first to size check it 
333          */
334
335         saddr->spkt_device[13] = 0;
336         dev = dev_get_by_name(saddr->spkt_device);
337         err = -ENODEV;
338         if (dev == NULL)
339                 goto out_unlock;
340         
341         /*
342          *      You may not queue a frame bigger than the mtu. This is the lowest level
343          *      raw protocol and you must do your own fragmentation at this level.
344          */
345          
346         err = -EMSGSIZE;
347         if(len>dev->mtu+dev->hard_header_len)
348                 goto out_unlock;
349
350         err = -ENOBUFS;
351         skb = sock_wmalloc(sk, len + LL_RESERVED_SPACE(dev), 0, GFP_KERNEL);
352
353         /*
354          *      If the write buffer is full, then tough. At this level the user gets to
355          *      deal with the problem - do your own algorithmic backoffs. That's far
356          *      more flexible.
357          */
358          
359         if (skb == NULL) 
360                 goto out_unlock;
361
362         /*
363          *      Fill it in 
364          */
365          
366         /* FIXME: Save some space for broken drivers that write a
367          * hard header at transmission time by themselves. PPP is the
368          * notable one here. This should really be fixed at the driver level.
369          */
370         skb_reserve(skb, LL_RESERVED_SPACE(dev));
371         skb->nh.raw = skb->data;
372
373         /* Try to align data part correctly */
374         if (dev->hard_header) {
375                 skb->data -= dev->hard_header_len;
376                 skb->tail -= dev->hard_header_len;
377                 if (len < dev->hard_header_len)
378                         skb->nh.raw = skb->data;
379         }
380
381         /* Returns -EFAULT on error */
382         err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
383         skb->protocol = proto;
384         skb->dev = dev;
385         skb->priority = sk->sk_priority;
386         if (err)
387                 goto out_free;
388
389         err = -ENETDOWN;
390         if (!(dev->flags & IFF_UP))
391                 goto out_free;
392
393         /*
394          *      Now send it
395          */
396
397         dev_queue_xmit(skb);
398         dev_put(dev);
399         return(len);
400
401 out_free:
402         kfree_skb(skb);
403 out_unlock:
404         if (dev)
405                 dev_put(dev);
406         return err;
407 }
408 #endif
409
410 static inline unsigned run_filter(struct sk_buff *skb, struct sock *sk, unsigned res)
411 {
412         struct sk_filter *filter;
413
414         bh_lock_sock(sk);
415         filter = sk->sk_filter;
416         /*
417          * Our caller already checked that filter != NULL but we need to
418          * verify that under bh_lock_sock() to be safe
419          */
420         if (likely(filter != NULL))
421                 res = sk_run_filter(skb, filter->insns, filter->len);
422         bh_unlock_sock(sk);
423
424         return res;
425 }
426
427 /*
428    This function makes lazy skb cloning in hope that most of packets
429    are discarded by BPF.
430
431    Note tricky part: we DO mangle shared skb! skb->data, skb->len
432    and skb->cb are mangled. It works because (and until) packets
433    falling here are owned by current CPU. Output packets are cloned
434    by dev_queue_xmit_nit(), input packets are processed by net_bh
435    sequencially, so that if we return skb to original state on exit,
436    we will not harm anyone.
437  */
438
439 static int packet_rcv(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt)
440 {
441         struct sock *sk;
442         struct sockaddr_ll *sll;
443         struct packet_opt *po;
444         u8 * skb_head = skb->data;
445         int skb_len = skb->len;
446         unsigned snaplen;
447
448         if (skb->pkt_type == PACKET_LOOPBACK)
449                 goto drop;
450
451         sk = pt->af_packet_priv;
452         po = pkt_sk(sk);
453
454         if (sk->sk_xid && sk->sk_xid != skb->xid)
455                 goto drop;
456
457         skb->dev = dev;
458
459         if (dev->hard_header) {
460                 /* The device has an explicit notion of ll header,
461                    exported to higher levels.
462
463                    Otherwise, the device hides datails of it frame
464                    structure, so that corresponding packet head
465                    never delivered to user.
466                  */
467                 if (sk->sk_type != SOCK_DGRAM)
468                         skb_push(skb, skb->data - skb->mac.raw);
469                 else if (skb->pkt_type == PACKET_OUTGOING) {
470                         /* Special case: outgoing packets have ll header at head */
471                         skb_pull(skb, skb->nh.raw - skb->data);
472                 }
473         }
474
475         snaplen = skb->len;
476
477         if (sk->sk_filter) {
478                 unsigned res = run_filter(skb, sk, snaplen);
479                 if (res == 0)
480                         goto drop_n_restore;
481                 if (snaplen > res)
482                         snaplen = res;
483         }
484
485         if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
486             (unsigned)sk->sk_rcvbuf)
487                 goto drop_n_acct;
488
489         if (skb_shared(skb)) {
490                 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
491                 if (nskb == NULL)
492                         goto drop_n_acct;
493
494                 if (skb_head != skb->data) {
495                         skb->data = skb_head;
496                         skb->len = skb_len;
497                 }
498                 kfree_skb(skb);
499                 skb = nskb;
500         }
501
502         sll = (struct sockaddr_ll*)skb->cb;
503         sll->sll_family = AF_PACKET;
504         sll->sll_hatype = dev->type;
505         sll->sll_protocol = skb->protocol;
506         sll->sll_pkttype = skb->pkt_type;
507         sll->sll_ifindex = dev->ifindex;
508         sll->sll_halen = 0;
509
510         if (dev->hard_header_parse)
511                 sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
512
513         if (pskb_trim(skb, snaplen))
514                 goto drop_n_acct;
515
516         skb_set_owner_r(skb, sk);
517         skb->dev = NULL;
518         dst_release(skb->dst);
519         skb->dst = NULL;
520
521         spin_lock(&sk->sk_receive_queue.lock);
522         po->stats.tp_packets++;
523         __skb_queue_tail(&sk->sk_receive_queue, skb);
524         spin_unlock(&sk->sk_receive_queue.lock);
525         sk->sk_data_ready(sk, skb->len);
526         return 0;
527
528 drop_n_acct:
529         spin_lock(&sk->sk_receive_queue.lock);
530         po->stats.tp_drops++;
531         spin_unlock(&sk->sk_receive_queue.lock);
532
533 drop_n_restore:
534         if (skb_head != skb->data && skb_shared(skb)) {
535                 skb->data = skb_head;
536                 skb->len = skb_len;
537         }
538 drop:
539         kfree_skb(skb);
540         return 0;
541 }
542
543 #ifdef CONFIG_PACKET_MMAP
544 static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt)
545 {
546         struct sock *sk;
547         struct packet_opt *po;
548         struct sockaddr_ll *sll;
549         struct tpacket_hdr *h;
550         u8 * skb_head = skb->data;
551         int skb_len = skb->len;
552         unsigned snaplen;
553         unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
554         unsigned short macoff, netoff;
555         struct sk_buff *copy_skb = NULL;
556
557         if (skb->pkt_type == PACKET_LOOPBACK)
558                 goto drop;
559
560         sk = pt->af_packet_priv;
561         po = pkt_sk(sk);
562
563         if (dev->hard_header) {
564                 if (sk->sk_type != SOCK_DGRAM)
565                         skb_push(skb, skb->data - skb->mac.raw);
566                 else if (skb->pkt_type == PACKET_OUTGOING) {
567                         /* Special case: outgoing packets have ll header at head */
568                         skb_pull(skb, skb->nh.raw - skb->data);
569                         if (skb->ip_summed == CHECKSUM_HW)
570                                 status |= TP_STATUS_CSUMNOTREADY;
571                 }
572         }
573
574         snaplen = skb->len;
575
576         if (sk->sk_filter) {
577                 unsigned res = run_filter(skb, sk, snaplen);
578                 if (res == 0)
579                         goto drop_n_restore;
580                 if (snaplen > res)
581                         snaplen = res;
582         }
583
584         if (sk->sk_type == SOCK_DGRAM) {
585                 macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
586         } else {
587                 unsigned maclen = skb->nh.raw - skb->data;
588                 netoff = TPACKET_ALIGN(TPACKET_HDRLEN + (maclen < 16 ? 16 : maclen));
589                 macoff = netoff - maclen;
590         }
591
592         if (macoff + snaplen > po->frame_size) {
593                 if (po->copy_thresh &&
594                     atomic_read(&sk->sk_rmem_alloc) + skb->truesize <
595                     (unsigned)sk->sk_rcvbuf) {
596                         if (skb_shared(skb)) {
597                                 copy_skb = skb_clone(skb, GFP_ATOMIC);
598                         } else {
599                                 copy_skb = skb_get(skb);
600                                 skb_head = skb->data;
601                         }
602                         if (copy_skb)
603                                 skb_set_owner_r(copy_skb, sk);
604                 }
605                 snaplen = po->frame_size - macoff;
606                 if ((int)snaplen < 0)
607                         snaplen = 0;
608         }
609         if (snaplen > skb->len-skb->data_len)
610                 snaplen = skb->len-skb->data_len;
611
612         spin_lock(&sk->sk_receive_queue.lock);
613         h = (struct tpacket_hdr *)packet_lookup_frame(po, po->head);
614         
615         if (h->tp_status)
616                 goto ring_is_full;
617         po->head = po->head != po->frame_max ? po->head+1 : 0;
618         po->stats.tp_packets++;
619         if (copy_skb) {
620                 status |= TP_STATUS_COPY;
621                 __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
622         }
623         if (!po->stats.tp_drops)
624                 status &= ~TP_STATUS_LOSING;
625         spin_unlock(&sk->sk_receive_queue.lock);
626
627         memcpy((u8*)h + macoff, skb->data, snaplen);
628
629         h->tp_len = skb->len;
630         h->tp_snaplen = snaplen;
631         h->tp_mac = macoff;
632         h->tp_net = netoff;
633         if (skb->stamp.tv_sec == 0) { 
634                 do_gettimeofday(&skb->stamp);
635                 sock_enable_timestamp(sk);
636         }
637         h->tp_sec = skb->stamp.tv_sec;
638         h->tp_usec = skb->stamp.tv_usec;
639
640         sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h)));
641         sll->sll_halen = 0;
642         if (dev->hard_header_parse)
643                 sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
644         sll->sll_family = AF_PACKET;
645         sll->sll_hatype = dev->type;
646         sll->sll_protocol = skb->protocol;
647         sll->sll_pkttype = skb->pkt_type;
648         sll->sll_ifindex = dev->ifindex;
649
650         h->tp_status = status;
651         mb();
652
653         {
654                 struct page *p_start, *p_end;
655                 u8 *h_end = (u8 *)h + macoff + snaplen - 1;
656
657                 p_start = virt_to_page(h);
658                 p_end = virt_to_page(h_end);
659                 while (p_start <= p_end) {
660                         flush_dcache_page(p_start);
661                         p_start++;
662                 }
663         }
664
665         sk->sk_data_ready(sk, 0);
666
667 drop_n_restore:
668         if (skb_head != skb->data && skb_shared(skb)) {
669                 skb->data = skb_head;
670                 skb->len = skb_len;
671         }
672 drop:
673         kfree_skb(skb);
674         return 0;
675
676 ring_is_full:
677         po->stats.tp_drops++;
678         spin_unlock(&sk->sk_receive_queue.lock);
679
680         sk->sk_data_ready(sk, 0);
681         if (copy_skb)
682                 kfree_skb(copy_skb);
683         goto drop_n_restore;
684 }
685
686 #endif
687
688
689 static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
690                           struct msghdr *msg, size_t len)
691 {
692         struct sock *sk = sock->sk;
693         struct sockaddr_ll *saddr=(struct sockaddr_ll *)msg->msg_name;
694         struct sk_buff *skb;
695         struct net_device *dev;
696         unsigned short proto;
697         unsigned char *addr;
698         int ifindex, err, reserve = 0;
699
700         /*
701          *      Get and verify the address. 
702          */
703          
704         if (saddr == NULL) {
705                 struct packet_opt *po = pkt_sk(sk);
706
707                 ifindex = po->ifindex;
708                 proto   = po->num;
709                 addr    = NULL;
710         } else {
711                 err = -EINVAL;
712                 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
713                         goto out;
714                 ifindex = saddr->sll_ifindex;
715                 proto   = saddr->sll_protocol;
716                 addr    = saddr->sll_addr;
717         }
718
719
720         dev = dev_get_by_index(ifindex);
721         err = -ENXIO;
722         if (dev == NULL)
723                 goto out_unlock;
724         if (sock->type == SOCK_RAW)
725                 reserve = dev->hard_header_len;
726
727         err = -EMSGSIZE;
728         if (len > dev->mtu+reserve)
729                 goto out_unlock;
730
731         skb = sock_alloc_send_skb(sk, len + LL_RESERVED_SPACE(dev),
732                                 msg->msg_flags & MSG_DONTWAIT, &err);
733         if (skb==NULL)
734                 goto out_unlock;
735
736         skb_reserve(skb, LL_RESERVED_SPACE(dev));
737         skb->nh.raw = skb->data;
738
739         if (dev->hard_header) {
740                 int res;
741                 err = -EINVAL;
742                 res = dev->hard_header(skb, dev, ntohs(proto), addr, NULL, len);
743                 if (sock->type != SOCK_DGRAM) {
744                         skb->tail = skb->data;
745                         skb->len = 0;
746                 } else if (res < 0)
747                         goto out_free;
748         }
749
750         /* Returns -EFAULT on error */
751         err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
752         if (err)
753                 goto out_free;
754
755         skb->protocol = proto;
756         skb->dev = dev;
757         skb->priority = sk->sk_priority;
758
759         err = -ENETDOWN;
760         if (!(dev->flags & IFF_UP))
761                 goto out_free;
762
763         /*
764          *      Now send it
765          */
766
767         err = dev_queue_xmit(skb);
768         if (err > 0 && (err = net_xmit_errno(err)) != 0)
769                 goto out_unlock;
770
771         dev_put(dev);
772
773         return(len);
774
775 out_free:
776         kfree_skb(skb);
777 out_unlock:
778         if (dev)
779                 dev_put(dev);
780 out:
781         return err;
782 }
783
784 /*
785  *      Close a PACKET socket. This is fairly simple. We immediately go
786  *      to 'closed' state and remove our protocol entry in the device list.
787  */
788
789 static int packet_release(struct socket *sock)
790 {
791         struct sock *sk = sock->sk;
792         struct packet_opt *po = pkt_sk(sk);
793
794         if (!sk)
795                 return 0;
796
797         write_lock_bh(&packet_sklist_lock);
798         sk_del_node_init(sk);
799         write_unlock_bh(&packet_sklist_lock);
800
801         /*
802          *      Unhook packet receive handler.
803          */
804
805         if (po->running) {
806                 /*
807                  *      Remove the protocol hook
808                  */
809                 dev_remove_pack(&po->prot_hook);
810                 po->running = 0;
811                 po->num = 0;
812                 __sock_put(sk);
813         }
814
815 #ifdef CONFIG_PACKET_MULTICAST
816         packet_flush_mclist(sk);
817 #endif
818
819 #ifdef CONFIG_PACKET_MMAP
820         if (po->pg_vec) {
821                 struct tpacket_req req;
822                 memset(&req, 0, sizeof(req));
823                 packet_set_ring(sk, &req, 1);
824         }
825 #endif
826
827         clr_vx_info(&sk->sk_vx_info);
828         clr_nx_info(&sk->sk_nx_info);
829
830         /*
831          *      Now the socket is dead. No more input will appear.
832          */
833
834         sock_orphan(sk);
835         sock->sk = NULL;
836
837         /* Purge queues */
838
839         skb_queue_purge(&sk->sk_receive_queue);
840
841         sock_put(sk);
842         return 0;
843 }
844
845 /*
846  *      Attach a packet hook.
847  */
848
849 static int packet_do_bind(struct sock *sk, struct net_device *dev, int protocol)
850 {
851         struct packet_opt *po = pkt_sk(sk);
852         /*
853          *      Detach an existing hook if present.
854          */
855
856         lock_sock(sk);
857
858         spin_lock(&po->bind_lock);
859         if (po->running) {
860                 __sock_put(sk);
861                 po->running = 0;
862                 po->num = 0;
863                 spin_unlock(&po->bind_lock);
864                 dev_remove_pack(&po->prot_hook);
865                 spin_lock(&po->bind_lock);
866         }
867
868         po->num = protocol;
869         po->prot_hook.type = protocol;
870         po->prot_hook.dev = dev;
871
872         po->ifindex = dev ? dev->ifindex : 0;
873
874         if (protocol == 0)
875                 goto out_unlock;
876
877         if (dev) {
878                 if (dev->flags&IFF_UP) {
879                         dev_add_pack(&po->prot_hook);
880                         sock_hold(sk);
881                         po->running = 1;
882                 } else {
883                         sk->sk_err = ENETDOWN;
884                         if (!sock_flag(sk, SOCK_DEAD))
885                                 sk->sk_error_report(sk);
886                 }
887         } else {
888                 dev_add_pack(&po->prot_hook);
889                 sock_hold(sk);
890                 po->running = 1;
891         }
892
893 out_unlock:
894         spin_unlock(&po->bind_lock);
895         release_sock(sk);
896         return 0;
897 }
898
899 /*
900  *      Bind a packet socket to a device
901  */
902
903 #ifdef CONFIG_SOCK_PACKET
904
905 static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len)
906 {
907         struct sock *sk=sock->sk;
908         char name[15];
909         struct net_device *dev;
910         int err = -ENODEV;
911         
912         /*
913          *      Check legality
914          */
915          
916         if(addr_len!=sizeof(struct sockaddr))
917                 return -EINVAL;
918         strlcpy(name,uaddr->sa_data,sizeof(name));
919
920         dev = dev_get_by_name(name);
921         if (dev) {
922                 err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
923                 dev_put(dev);
924         }
925         return err;
926 }
927 #endif
928
929 static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
930 {
931         struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
932         struct sock *sk=sock->sk;
933         struct net_device *dev = NULL;
934         int err;
935
936
937         /*
938          *      Check legality
939          */
940          
941         if (addr_len < sizeof(struct sockaddr_ll))
942                 return -EINVAL;
943         if (sll->sll_family != AF_PACKET)
944                 return -EINVAL;
945
946         if (sll->sll_ifindex) {
947                 err = -ENODEV;
948                 dev = dev_get_by_index(sll->sll_ifindex);
949                 if (dev == NULL)
950                         goto out;
951         }
952         err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
953         if (dev)
954                 dev_put(dev);
955
956 out:
957         return err;
958 }
959
960
961 /*
962  *      Create a packet of type SOCK_PACKET. 
963  */
964
965 static int packet_create(struct socket *sock, int protocol)
966 {
967         struct sock *sk;
968         struct packet_opt *po;
969         int err;
970
971         if (!capable(CAP_NET_RAW))
972                 return -EPERM;
973         if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW
974 #ifdef CONFIG_SOCK_PACKET
975             && sock->type != SOCK_PACKET
976 #endif
977             )
978                 return -ESOCKTNOSUPPORT;
979
980         sock->state = SS_UNCONNECTED;
981
982         err = -ENOBUFS;
983         sk = sk_alloc(PF_PACKET, GFP_KERNEL, 1, NULL);
984         if (sk == NULL)
985                 goto out;
986
987         sock->ops = &packet_ops;
988 #ifdef CONFIG_SOCK_PACKET
989         if (sock->type == SOCK_PACKET)
990                 sock->ops = &packet_ops_spkt;
991 #endif
992         sock_init_data(sock,sk);
993         sk_set_owner(sk, THIS_MODULE);
994
995         po = sk->sk_protinfo = kmalloc(sizeof(*po), GFP_KERNEL);
996         if (!po)
997                 goto out_free;
998         memset(po, 0, sizeof(*po));
999         sk->sk_family = PF_PACKET;
1000         po->num = protocol;
1001
1002         sk->sk_destruct = packet_sock_destruct;
1003         atomic_inc(&packet_socks_nr);
1004
1005         set_vx_info(&sk->sk_vx_info, current->vx_info);
1006         sk->sk_xid = vx_current_xid();
1007         set_nx_info(&sk->sk_nx_info, current->nx_info);
1008         sk->sk_nid = nx_current_nid();
1009
1010         /*
1011          *      Attach a protocol block
1012          */
1013
1014         spin_lock_init(&po->bind_lock);
1015         po->prot_hook.func = packet_rcv;
1016 #ifdef CONFIG_SOCK_PACKET
1017         if (sock->type == SOCK_PACKET)
1018                 po->prot_hook.func = packet_rcv_spkt;
1019 #endif
1020         po->prot_hook.af_packet_priv = sk;
1021
1022         if (protocol) {
1023                 po->prot_hook.type = protocol;
1024                 dev_add_pack(&po->prot_hook);
1025                 sock_hold(sk);
1026                 po->running = 1;
1027         }
1028
1029         write_lock_bh(&packet_sklist_lock);
1030         sk_add_node(sk, &packet_sklist);
1031         write_unlock_bh(&packet_sklist_lock);
1032         return(0);
1033
1034 out_free:
1035         sk_free(sk);
1036 out:
1037         return err;
1038 }
1039
1040 /*
1041  *      Pull a packet from our receive queue and hand it to the user.
1042  *      If necessary we block.
1043  */
1044
1045 static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1046                           struct msghdr *msg, size_t len, int flags)
1047 {
1048         struct sock *sk = sock->sk;
1049         struct sk_buff *skb;
1050         int copied, err;
1051
1052         err = -EINVAL;
1053         if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
1054                 goto out;
1055
1056 #if 0
1057         /* What error should we return now? EUNATTACH? */
1058         if (pkt_sk(sk)->ifindex < 0)
1059                 return -ENODEV;
1060 #endif
1061
1062         /*
1063          *      If the address length field is there to be filled in, we fill
1064          *      it in now.
1065          */
1066
1067         if (sock->type == SOCK_PACKET)
1068                 msg->msg_namelen = sizeof(struct sockaddr_pkt);
1069         else
1070                 msg->msg_namelen = sizeof(struct sockaddr_ll);
1071
1072         /*
1073          *      Call the generic datagram receiver. This handles all sorts
1074          *      of horrible races and re-entrancy so we can forget about it
1075          *      in the protocol layers.
1076          *
1077          *      Now it will return ENETDOWN, if device have just gone down,
1078          *      but then it will block.
1079          */
1080
1081         skb=skb_recv_datagram(sk,flags,flags&MSG_DONTWAIT,&err);
1082
1083         /*
1084          *      An error occurred so return it. Because skb_recv_datagram() 
1085          *      handles the blocking we don't see and worry about blocking
1086          *      retries.
1087          */
1088
1089         if(skb==NULL)
1090                 goto out;
1091
1092         /*
1093          *      You lose any data beyond the buffer you gave. If it worries a
1094          *      user program they can ask the device for its MTU anyway.
1095          */
1096
1097         copied = skb->len;
1098         if (copied > len)
1099         {
1100                 copied=len;
1101                 msg->msg_flags|=MSG_TRUNC;
1102         }
1103
1104         err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1105         if (err)
1106                 goto out_free;
1107
1108         sock_recv_timestamp(msg, sk, skb);
1109
1110         if (msg->msg_name)
1111                 memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
1112
1113         /*
1114          *      Free or return the buffer as appropriate. Again this
1115          *      hides all the races and re-entrancy issues from us.
1116          */
1117         err = (flags&MSG_TRUNC) ? skb->len : copied;
1118
1119 out_free:
1120         skb_free_datagram(sk, skb);
1121 out:
1122         return err;
1123 }
1124
1125 #ifdef CONFIG_SOCK_PACKET
1126 static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1127                                int *uaddr_len, int peer)
1128 {
1129         struct net_device *dev;
1130         struct sock *sk = sock->sk;
1131
1132         if (peer)
1133                 return -EOPNOTSUPP;
1134
1135         uaddr->sa_family = AF_PACKET;
1136         dev = dev_get_by_index(pkt_sk(sk)->ifindex);
1137         if (dev) {
1138                 strlcpy(uaddr->sa_data, dev->name, 15);
1139                 dev_put(dev);
1140         } else
1141                 memset(uaddr->sa_data, 0, 14);
1142         *uaddr_len = sizeof(*uaddr);
1143
1144         return 0;
1145 }
1146 #endif
1147
1148 static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1149                           int *uaddr_len, int peer)
1150 {
1151         struct net_device *dev;
1152         struct sock *sk = sock->sk;
1153         struct packet_opt *po = pkt_sk(sk);
1154         struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
1155
1156         if (peer)
1157                 return -EOPNOTSUPP;
1158
1159         sll->sll_family = AF_PACKET;
1160         sll->sll_ifindex = po->ifindex;
1161         sll->sll_protocol = po->num;
1162         dev = dev_get_by_index(po->ifindex);
1163         if (dev) {
1164                 sll->sll_hatype = dev->type;
1165                 sll->sll_halen = dev->addr_len;
1166                 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
1167                 dev_put(dev);
1168         } else {
1169                 sll->sll_hatype = 0;    /* Bad: we have no ARPHRD_UNSPEC */
1170                 sll->sll_halen = 0;
1171         }
1172         *uaddr_len = sizeof(*sll);
1173
1174         return 0;
1175 }
1176
1177 #ifdef CONFIG_PACKET_MULTICAST
1178 static void packet_dev_mc(struct net_device *dev, struct packet_mclist *i, int what)
1179 {
1180         switch (i->type) {
1181         case PACKET_MR_MULTICAST:
1182                 if (what > 0)
1183                         dev_mc_add(dev, i->addr, i->alen, 0);
1184                 else
1185                         dev_mc_delete(dev, i->addr, i->alen, 0);
1186                 break;
1187         case PACKET_MR_PROMISC:
1188                 dev_set_promiscuity(dev, what);
1189                 break;
1190         case PACKET_MR_ALLMULTI:
1191                 dev_set_allmulti(dev, what);
1192                 break;
1193         default:;
1194         }
1195 }
1196
1197 static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1198 {
1199         for ( ; i; i=i->next) {
1200                 if (i->ifindex == dev->ifindex)
1201                         packet_dev_mc(dev, i, what);
1202         }
1203 }
1204
1205 static int packet_mc_add(struct sock *sk, struct packet_mreq *mreq)
1206 {
1207         struct packet_opt *po = pkt_sk(sk);
1208         struct packet_mclist *ml, *i;
1209         struct net_device *dev;
1210         int err;
1211
1212         rtnl_lock();
1213
1214         err = -ENODEV;
1215         dev = __dev_get_by_index(mreq->mr_ifindex);
1216         if (!dev)
1217                 goto done;
1218
1219         err = -EINVAL;
1220         if (mreq->mr_alen > dev->addr_len)
1221                 goto done;
1222
1223         err = -ENOBUFS;
1224         i = (struct packet_mclist *)kmalloc(sizeof(*i), GFP_KERNEL);
1225         if (i == NULL)
1226                 goto done;
1227
1228         err = 0;
1229         for (ml = po->mclist; ml; ml = ml->next) {
1230                 if (ml->ifindex == mreq->mr_ifindex &&
1231                     ml->type == mreq->mr_type &&
1232                     ml->alen == mreq->mr_alen &&
1233                     memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1234                         ml->count++;
1235                         /* Free the new element ... */
1236                         kfree(i);
1237                         goto done;
1238                 }
1239         }
1240
1241         i->type = mreq->mr_type;
1242         i->ifindex = mreq->mr_ifindex;
1243         i->alen = mreq->mr_alen;
1244         memcpy(i->addr, mreq->mr_address, i->alen);
1245         i->count = 1;
1246         i->next = po->mclist;
1247         po->mclist = i;
1248         packet_dev_mc(dev, i, +1);
1249
1250 done:
1251         rtnl_unlock();
1252         return err;
1253 }
1254
1255 static int packet_mc_drop(struct sock *sk, struct packet_mreq *mreq)
1256 {
1257         struct packet_mclist *ml, **mlp;
1258
1259         rtnl_lock();
1260
1261         for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
1262                 if (ml->ifindex == mreq->mr_ifindex &&
1263                     ml->type == mreq->mr_type &&
1264                     ml->alen == mreq->mr_alen &&
1265                     memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1266                         if (--ml->count == 0) {
1267                                 struct net_device *dev;
1268                                 *mlp = ml->next;
1269                                 dev = dev_get_by_index(ml->ifindex);
1270                                 if (dev) {
1271                                         packet_dev_mc(dev, ml, -1);
1272                                         dev_put(dev);
1273                                 }
1274                                 kfree(ml);
1275                         }
1276                         rtnl_unlock();
1277                         return 0;
1278                 }
1279         }
1280         rtnl_unlock();
1281         return -EADDRNOTAVAIL;
1282 }
1283
1284 static void packet_flush_mclist(struct sock *sk)
1285 {
1286         struct packet_opt *po = pkt_sk(sk);
1287         struct packet_mclist *ml;
1288
1289         if (!po->mclist)
1290                 return;
1291
1292         rtnl_lock();
1293         while ((ml = po->mclist) != NULL) {
1294                 struct net_device *dev;
1295
1296                 po->mclist = ml->next;
1297                 if ((dev = dev_get_by_index(ml->ifindex)) != NULL) {
1298                         packet_dev_mc(dev, ml, -1);
1299                         dev_put(dev);
1300                 }
1301                 kfree(ml);
1302         }
1303         rtnl_unlock();
1304 }
1305 #endif
1306
1307 static int
1308 packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen)
1309 {
1310         struct sock *sk = sock->sk;
1311         int ret;
1312
1313         if (level != SOL_PACKET)
1314                 return -ENOPROTOOPT;
1315
1316         switch(optname) {
1317 #ifdef CONFIG_PACKET_MULTICAST
1318         case PACKET_ADD_MEMBERSHIP:     
1319         case PACKET_DROP_MEMBERSHIP:
1320         {
1321                 struct packet_mreq mreq;
1322                 if (optlen<sizeof(mreq))
1323                         return -EINVAL;
1324                 if (copy_from_user(&mreq,optval,sizeof(mreq)))
1325                         return -EFAULT;
1326                 if (optname == PACKET_ADD_MEMBERSHIP)
1327                         ret = packet_mc_add(sk, &mreq);
1328                 else
1329                         ret = packet_mc_drop(sk, &mreq);
1330                 return ret;
1331         }
1332 #endif
1333 #ifdef CONFIG_PACKET_MMAP
1334         case PACKET_RX_RING:
1335         {
1336                 struct tpacket_req req;
1337
1338                 if (optlen<sizeof(req))
1339                         return -EINVAL;
1340                 if (copy_from_user(&req,optval,sizeof(req)))
1341                         return -EFAULT;
1342                 return packet_set_ring(sk, &req, 0);
1343         }
1344         case PACKET_COPY_THRESH:
1345         {
1346                 int val;
1347
1348                 if (optlen!=sizeof(val))
1349                         return -EINVAL;
1350                 if (copy_from_user(&val,optval,sizeof(val)))
1351                         return -EFAULT;
1352
1353                 pkt_sk(sk)->copy_thresh = val;
1354                 return 0;
1355         }
1356 #endif
1357         default:
1358                 return -ENOPROTOOPT;
1359         }
1360 }
1361
1362 int packet_getsockopt(struct socket *sock, int level, int optname,
1363                       char __user *optval, int __user *optlen)
1364 {
1365         int len;
1366         struct sock *sk = sock->sk;
1367         struct packet_opt *po = pkt_sk(sk);
1368
1369         if (level != SOL_PACKET)
1370                 return -ENOPROTOOPT;
1371
1372         if (get_user(len,optlen))
1373                 return -EFAULT;
1374
1375         if (len < 0)
1376                 return -EINVAL;
1377                 
1378         switch(optname) {
1379         case PACKET_STATISTICS:
1380         {
1381                 struct tpacket_stats st;
1382
1383                 if (len > sizeof(struct tpacket_stats))
1384                         len = sizeof(struct tpacket_stats);
1385                 spin_lock_bh(&sk->sk_receive_queue.lock);
1386                 st = po->stats;
1387                 memset(&po->stats, 0, sizeof(st));
1388                 spin_unlock_bh(&sk->sk_receive_queue.lock);
1389                 st.tp_packets += st.tp_drops;
1390
1391                 if (copy_to_user(optval, &st, len))
1392                         return -EFAULT;
1393                 break;
1394         }
1395         default:
1396                 return -ENOPROTOOPT;
1397         }
1398
1399         if (put_user(len, optlen))
1400                 return -EFAULT;
1401         return 0;
1402 }
1403
1404
1405 static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
1406 {
1407         struct sock *sk;
1408         struct hlist_node *node;
1409         struct net_device *dev = (struct net_device*)data;
1410
1411         read_lock(&packet_sklist_lock);
1412         sk_for_each(sk, node, &packet_sklist) {
1413                 struct packet_opt *po = pkt_sk(sk);
1414
1415                 switch (msg) {
1416                 case NETDEV_UNREGISTER:
1417 #ifdef CONFIG_PACKET_MULTICAST
1418                         if (po->mclist)
1419                                 packet_dev_mclist(dev, po->mclist, -1);
1420                         // fallthrough
1421 #endif
1422                 case NETDEV_DOWN:
1423                         if (dev->ifindex == po->ifindex) {
1424                                 spin_lock(&po->bind_lock);
1425                                 if (po->running) {
1426                                         __dev_remove_pack(&po->prot_hook);
1427                                         __sock_put(sk);
1428                                         po->running = 0;
1429                                         sk->sk_err = ENETDOWN;
1430                                         if (!sock_flag(sk, SOCK_DEAD))
1431                                                 sk->sk_error_report(sk);
1432                                 }
1433                                 if (msg == NETDEV_UNREGISTER) {
1434                                         po->ifindex = -1;
1435                                         po->prot_hook.dev = NULL;
1436                                 }
1437                                 spin_unlock(&po->bind_lock);
1438                         }
1439                         break;
1440                 case NETDEV_UP:
1441                         spin_lock(&po->bind_lock);
1442                         if (dev->ifindex == po->ifindex && po->num &&
1443                             !po->running) {
1444                                 dev_add_pack(&po->prot_hook);
1445                                 sock_hold(sk);
1446                                 po->running = 1;
1447                         }
1448                         spin_unlock(&po->bind_lock);
1449                         break;
1450                 }
1451         }
1452         read_unlock(&packet_sklist_lock);
1453         return NOTIFY_DONE;
1454 }
1455
1456
1457 static int packet_ioctl(struct socket *sock, unsigned int cmd,
1458                         unsigned long arg)
1459 {
1460         struct sock *sk = sock->sk;
1461
1462         switch(cmd) {
1463                 case SIOCOUTQ:
1464                 {
1465                         int amount = atomic_read(&sk->sk_wmem_alloc);
1466                         return put_user(amount, (int __user *)arg);
1467                 }
1468                 case SIOCINQ:
1469                 {
1470                         struct sk_buff *skb;
1471                         int amount = 0;
1472
1473                         spin_lock_bh(&sk->sk_receive_queue.lock);
1474                         skb = skb_peek(&sk->sk_receive_queue);
1475                         if (skb)
1476                                 amount = skb->len;
1477                         spin_unlock_bh(&sk->sk_receive_queue.lock);
1478                         return put_user(amount, (int __user *)arg);
1479                 }
1480                 case SIOCGSTAMP:
1481                         return sock_get_timestamp(sk, (struct timeval __user *)arg);
1482                         
1483 #ifdef CONFIG_INET
1484                 case SIOCADDRT:
1485                 case SIOCDELRT:
1486                 case SIOCDARP:
1487                 case SIOCGARP:
1488                 case SIOCSARP:
1489                 case SIOCGIFADDR:
1490                 case SIOCSIFADDR:
1491                 case SIOCGIFBRDADDR:
1492                 case SIOCSIFBRDADDR:
1493                 case SIOCGIFNETMASK:
1494                 case SIOCSIFNETMASK:
1495                 case SIOCGIFDSTADDR:
1496                 case SIOCSIFDSTADDR:
1497                 case SIOCSIFFLAGS:
1498                         return inet_dgram_ops.ioctl(sock, cmd, arg);
1499 #endif
1500
1501                 default:
1502                         return dev_ioctl(cmd, (void __user *)arg);
1503         }
1504         return 0;
1505 }
1506
1507 #ifndef CONFIG_PACKET_MMAP
1508 #define packet_mmap sock_no_mmap
1509 #define packet_poll datagram_poll
1510 #else
1511
1512 unsigned int packet_poll(struct file * file, struct socket *sock, poll_table *wait)
1513 {
1514         struct sock *sk = sock->sk;
1515         struct packet_opt *po = pkt_sk(sk);
1516         unsigned int mask = datagram_poll(file, sock, wait);
1517
1518         spin_lock_bh(&sk->sk_receive_queue.lock);
1519         if (po->pg_vec) {
1520                 unsigned last = po->head ? po->head-1 : po->frame_max;
1521                 struct tpacket_hdr *h;
1522
1523                 h = (struct tpacket_hdr *)packet_lookup_frame(po, last);
1524
1525                 if (h->tp_status)
1526                         mask |= POLLIN | POLLRDNORM;
1527         }
1528         spin_unlock_bh(&sk->sk_receive_queue.lock);
1529         return mask;
1530 }
1531
1532
1533 /* Dirty? Well, I still did not learn better way to account
1534  * for user mmaps.
1535  */
1536
1537 static void packet_mm_open(struct vm_area_struct *vma)
1538 {
1539         struct file *file = vma->vm_file;
1540         struct inode *inode = file->f_dentry->d_inode;
1541         struct socket * sock = SOCKET_I(inode);
1542         struct sock *sk = sock->sk;
1543         
1544         if (sk)
1545                 atomic_inc(&pkt_sk(sk)->mapped);
1546 }
1547
1548 static void packet_mm_close(struct vm_area_struct *vma)
1549 {
1550         struct file *file = vma->vm_file;
1551         struct inode *inode = file->f_dentry->d_inode;
1552         struct socket * sock = SOCKET_I(inode);
1553         struct sock *sk = sock->sk;
1554         
1555         if (sk)
1556                 atomic_dec(&pkt_sk(sk)->mapped);
1557 }
1558
1559 static struct vm_operations_struct packet_mmap_ops = {
1560         .open = packet_mm_open,
1561         .close =packet_mm_close,
1562 };
1563
1564 static void free_pg_vec(unsigned long *pg_vec, unsigned order, unsigned len)
1565 {
1566         int i;
1567
1568         for (i=0; i<len; i++) {
1569                 if (pg_vec[i]) {
1570                         struct page *page, *pend;
1571
1572                         pend = virt_to_page(pg_vec[i] + (PAGE_SIZE << order) - 1);
1573                         for (page = virt_to_page(pg_vec[i]); page <= pend; page++)
1574                                 ClearPageReserved(page);
1575                         free_pages(pg_vec[i], order);
1576                 }
1577         }
1578         kfree(pg_vec);
1579 }
1580
1581
1582 static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing)
1583 {
1584         unsigned long *pg_vec = NULL;
1585         struct packet_opt *po = pkt_sk(sk);
1586         int was_running, num, order = 0;
1587         int err = 0;
1588         
1589         if (req->tp_block_nr) {
1590                 int i, l;
1591
1592                 /* Sanity tests and some calculations */
1593
1594                 if (po->pg_vec)
1595                         return -EBUSY;
1596
1597                 if ((int)req->tp_block_size <= 0)
1598                         return -EINVAL;
1599                 if (req->tp_block_size&(PAGE_SIZE-1))
1600                         return -EINVAL;
1601                 if (req->tp_frame_size < TPACKET_HDRLEN)
1602                         return -EINVAL;
1603                 if (req->tp_frame_size&(TPACKET_ALIGNMENT-1))
1604                         return -EINVAL;
1605
1606                 po->frames_per_block = req->tp_block_size/req->tp_frame_size;
1607                 if (po->frames_per_block <= 0)
1608                         return -EINVAL;
1609                 if (po->frames_per_block*req->tp_block_nr != req->tp_frame_nr)
1610                         return -EINVAL;
1611                 /* OK! */
1612
1613                 /* Allocate page vector */
1614                 while ((PAGE_SIZE<<order) < req->tp_block_size)
1615                         order++;
1616
1617                 err = -ENOMEM;
1618
1619                 pg_vec = kmalloc(req->tp_block_nr*sizeof(unsigned long*), GFP_KERNEL);
1620                 if (pg_vec == NULL)
1621                         goto out;
1622                 memset(pg_vec, 0, req->tp_block_nr*sizeof(unsigned long*));
1623
1624                 for (i=0; i<req->tp_block_nr; i++) {
1625                         struct page *page, *pend;
1626                         pg_vec[i] = __get_free_pages(GFP_KERNEL, order);
1627                         if (!pg_vec[i])
1628                                 goto out_free_pgvec;
1629
1630                         pend = virt_to_page(pg_vec[i] + (PAGE_SIZE << order) - 1);
1631                         for (page = virt_to_page(pg_vec[i]); page <= pend; page++)
1632                                 SetPageReserved(page);
1633                 }
1634                 /* Page vector is allocated */
1635
1636                 l = 0;
1637                 for (i=0; i<req->tp_block_nr; i++) {
1638                         unsigned long ptr = pg_vec[i];
1639                         struct tpacket_hdr *header;
1640                         int k;
1641
1642                         for (k=0; k<po->frames_per_block; k++) {
1643                                 
1644                                 header = (struct tpacket_hdr*)ptr;
1645                                 header->tp_status = TP_STATUS_KERNEL;
1646                                 ptr += req->tp_frame_size;
1647                         }
1648                 }
1649                 /* Done */
1650         } else {
1651                 if (req->tp_frame_nr)
1652                         return -EINVAL;
1653         }
1654
1655         lock_sock(sk);
1656
1657         /* Detach socket from network */
1658         spin_lock(&po->bind_lock);
1659         was_running = po->running;
1660         num = po->num;
1661         if (was_running) {
1662                 __dev_remove_pack(&po->prot_hook);
1663                 po->num = 0;
1664                 po->running = 0;
1665                 __sock_put(sk);
1666         }
1667         spin_unlock(&po->bind_lock);
1668                 
1669         synchronize_net();
1670
1671         err = -EBUSY;
1672         if (closing || atomic_read(&po->mapped) == 0) {
1673                 err = 0;
1674 #define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
1675
1676                 spin_lock_bh(&sk->sk_receive_queue.lock);
1677                 pg_vec = XC(po->pg_vec, pg_vec);
1678                 po->frame_max = req->tp_frame_nr-1;
1679                 po->head = 0;
1680                 po->frame_size = req->tp_frame_size;
1681                 spin_unlock_bh(&sk->sk_receive_queue.lock);
1682
1683                 order = XC(po->pg_vec_order, order);
1684                 req->tp_block_nr = XC(po->pg_vec_len, req->tp_block_nr);
1685
1686                 po->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
1687                 po->prot_hook.func = po->pg_vec ? tpacket_rcv : packet_rcv;
1688                 skb_queue_purge(&sk->sk_receive_queue);
1689 #undef XC
1690                 if (atomic_read(&po->mapped))
1691                         printk(KERN_DEBUG "packet_mmap: vma is busy: %d\n", atomic_read(&po->mapped));
1692         }
1693
1694         spin_lock(&po->bind_lock);
1695         if (was_running && !po->running) {
1696                 sock_hold(sk);
1697                 po->running = 1;
1698                 po->num = num;
1699                 dev_add_pack(&po->prot_hook);
1700         }
1701         spin_unlock(&po->bind_lock);
1702
1703         release_sock(sk);
1704
1705 out_free_pgvec:
1706         if (pg_vec)
1707                 free_pg_vec(pg_vec, order, req->tp_block_nr);
1708 out:
1709         return err;
1710 }
1711
1712 static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1713 {
1714         struct sock *sk = sock->sk;
1715         struct packet_opt *po = pkt_sk(sk);
1716         unsigned long size;
1717         unsigned long start;
1718         int err = -EINVAL;
1719         int i;
1720
1721         if (vma->vm_pgoff)
1722                 return -EINVAL;
1723
1724         size = vma->vm_end - vma->vm_start;
1725
1726         lock_sock(sk);
1727         if (po->pg_vec == NULL)
1728                 goto out;
1729         if (size != po->pg_vec_len*po->pg_vec_pages*PAGE_SIZE)
1730                 goto out;
1731
1732         atomic_inc(&po->mapped);
1733         start = vma->vm_start;
1734         err = -EAGAIN;
1735         for (i=0; i<po->pg_vec_len; i++) {
1736                 if (remap_page_range(vma, start, __pa(po->pg_vec[i]),
1737                                      po->pg_vec_pages*PAGE_SIZE,
1738                                      vma->vm_page_prot))
1739                         goto out;
1740                 start += po->pg_vec_pages*PAGE_SIZE;
1741         }
1742         vma->vm_ops = &packet_mmap_ops;
1743         err = 0;
1744
1745 out:
1746         release_sock(sk);
1747         return err;
1748 }
1749 #endif
1750
1751
1752 #ifdef CONFIG_SOCK_PACKET
1753 struct proto_ops packet_ops_spkt = {
1754         .family =       PF_PACKET,
1755         .owner =        THIS_MODULE,
1756         .release =      packet_release,
1757         .bind =         packet_bind_spkt,
1758         .connect =      sock_no_connect,
1759         .socketpair =   sock_no_socketpair,
1760         .accept =       sock_no_accept,
1761         .getname =      packet_getname_spkt,
1762         .poll =         datagram_poll,
1763         .ioctl =        packet_ioctl,
1764         .listen =       sock_no_listen,
1765         .shutdown =     sock_no_shutdown,
1766         .setsockopt =   sock_no_setsockopt,
1767         .getsockopt =   sock_no_getsockopt,
1768         .sendmsg =      packet_sendmsg_spkt,
1769         .recvmsg =      packet_recvmsg,
1770         .mmap =         sock_no_mmap,
1771         .sendpage =     sock_no_sendpage,
1772 };
1773 #endif
1774
1775 struct proto_ops packet_ops = {
1776         .family =       PF_PACKET,
1777         .owner =        THIS_MODULE,
1778         .release =      packet_release,
1779         .bind =         packet_bind,
1780         .connect =      sock_no_connect,
1781         .socketpair =   sock_no_socketpair,
1782         .accept =       sock_no_accept,
1783         .getname =      packet_getname, 
1784         .poll =         packet_poll,
1785         .ioctl =        packet_ioctl,
1786         .listen =       sock_no_listen,
1787         .shutdown =     sock_no_shutdown,
1788         .setsockopt =   packet_setsockopt,
1789         .getsockopt =   packet_getsockopt,
1790         .sendmsg =      packet_sendmsg,
1791         .recvmsg =      packet_recvmsg,
1792         .mmap =         packet_mmap,
1793         .sendpage =     sock_no_sendpage,
1794 };
1795 EXPORT_SYMBOL(packet_ops);
1796
1797 struct net_proto_family packet_family_ops = {
1798         .family =       PF_PACKET,
1799         .create =       packet_create,
1800         .owner  =       THIS_MODULE,
1801 };
1802 EXPORT_SYMBOL(packet_family_ops);
1803
1804 static struct notifier_block packet_netdev_notifier = {
1805         .notifier_call =packet_notifier,
1806 };
1807
1808 #ifdef CONFIG_PROC_FS
1809 static inline struct sock *packet_seq_idx(loff_t off)
1810 {
1811         struct sock *s;
1812         struct hlist_node *node;
1813
1814         sk_for_each(s, node, &packet_sklist) {
1815                 if (!off--)
1816                         return s;
1817         }
1818         return NULL;
1819 }
1820
1821 static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
1822 {
1823         read_lock(&packet_sklist_lock);
1824         return *pos ? packet_seq_idx(*pos - 1) : SEQ_START_TOKEN;
1825 }
1826
1827 static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1828 {
1829         ++*pos;
1830         return  (v == SEQ_START_TOKEN) 
1831                 ? sk_head(&packet_sklist) 
1832                 : sk_next((struct sock*)v) ;
1833 }
1834
1835 static void packet_seq_stop(struct seq_file *seq, void *v)
1836 {
1837         read_unlock(&packet_sklist_lock);               
1838 }
1839
1840 static int packet_seq_show(struct seq_file *seq, void *v) 
1841 {
1842         if (v == SEQ_START_TOKEN)
1843                 seq_puts(seq, "sk       RefCnt Type Proto  Iface R Rmem   User   Inode\n");
1844         else {
1845                 struct sock *s = v;
1846                 const struct packet_opt *po = pkt_sk(s);
1847
1848                 seq_printf(seq,
1849                            "%p %-6d %-4d %04x   %-5d %1d %-6u %-6u %-6lu\n",
1850                            s,
1851                            atomic_read(&s->sk_refcnt),
1852                            s->sk_type,
1853                            ntohs(po->num),
1854                            po->ifindex,
1855                            po->running,
1856                            atomic_read(&s->sk_rmem_alloc),
1857                            sock_i_uid(s),
1858                            sock_i_ino(s) );
1859         }
1860
1861         return 0;
1862 }
1863
1864 static struct seq_operations packet_seq_ops = {
1865         .start  = packet_seq_start,
1866         .next   = packet_seq_next,
1867         .stop   = packet_seq_stop,
1868         .show   = packet_seq_show,
1869 };
1870
1871 static int packet_seq_open(struct inode *inode, struct file *file)
1872 {
1873         return seq_open(file, &packet_seq_ops);
1874 }
1875
1876 static struct file_operations packet_seq_fops = {
1877         .owner          = THIS_MODULE,
1878         .open           = packet_seq_open,
1879         .read           = seq_read,
1880         .llseek         = seq_lseek,
1881         .release        = seq_release,
1882 };
1883
1884 #endif
1885
1886 static void __exit packet_exit(void)
1887 {
1888         proc_net_remove("packet");
1889         unregister_netdevice_notifier(&packet_netdev_notifier);
1890         sock_unregister(PF_PACKET);
1891         return;
1892 }
1893
1894 static int __init packet_init(void)
1895 {
1896         sock_register(&packet_family_ops);
1897         register_netdevice_notifier(&packet_netdev_notifier);
1898         proc_net_fops_create("packet", 0, &packet_seq_fops);
1899
1900         return 0;
1901 }
1902
1903 module_init(packet_init);
1904 module_exit(packet_exit);
1905 MODULE_LICENSE("GPL");
1906 MODULE_ALIAS_NETPROTO(PF_PACKET);