This commit was manufactured by cvs2svn to create branch
[linux-2.6.git] / net / packet / af_packet.c
1 /*
2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
3  *              operating system.  INET is implemented using the  BSD Socket
4  *              interface as the means of communication with the user level.
5  *
6  *              PACKET - implements raw packet sockets.
7  *
8  * Version:     $Id: af_packet.c,v 1.61 2002/02/08 03:57:19 davem Exp $
9  *
10  * Authors:     Ross Biro
11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
13  *
14  * Fixes:       
15  *              Alan Cox        :       verify_area() now used correctly
16  *              Alan Cox        :       new skbuff lists, look ma no backlogs!
17  *              Alan Cox        :       tidied skbuff lists.
18  *              Alan Cox        :       Now uses generic datagram routines I
19  *                                      added. Also fixed the peek/read crash
20  *                                      from all old Linux datagram code.
21  *              Alan Cox        :       Uses the improved datagram code.
22  *              Alan Cox        :       Added NULL's for socket options.
23  *              Alan Cox        :       Re-commented the code.
24  *              Alan Cox        :       Use new kernel side addressing
25  *              Rob Janssen     :       Correct MTU usage.
26  *              Dave Platt      :       Counter leaks caused by incorrect
27  *                                      interrupt locking and some slightly
28  *                                      dubious gcc output. Can you read
29  *                                      compiler: it said _VOLATILE_
30  *      Richard Kooijman        :       Timestamp fixes.
31  *              Alan Cox        :       New buffers. Use sk->mac.raw.
32  *              Alan Cox        :       sendmsg/recvmsg support.
33  *              Alan Cox        :       Protocol setting support
34  *      Alexey Kuznetsov        :       Untied from IPv4 stack.
35  *      Cyrus Durgin            :       Fixed kerneld for kmod.
36  *      Michal Ostrowski        :       Module initialization cleanup.
37  *         Ulises Alonso        :       Frame number limit removal and 
38  *                                      packet_set_ring memory leak.
39  *
40  *              This program is free software; you can redistribute it and/or
41  *              modify it under the terms of the GNU General Public License
42  *              as published by the Free Software Foundation; either version
43  *              2 of the License, or (at your option) any later version.
44  *
45  */
46  
47 #include <linux/config.h>
48 #include <linux/types.h>
49 #include <linux/sched.h>
50 #include <linux/mm.h>
51 #include <linux/fcntl.h>
52 #include <linux/socket.h>
53 #include <linux/in.h>
54 #include <linux/inet.h>
55 #include <linux/netdevice.h>
56 #include <linux/if_packet.h>
57 #include <linux/wireless.h>
58 #include <linux/kmod.h>
59 #include <net/ip.h>
60 #include <net/protocol.h>
61 #include <linux/skbuff.h>
62 #include <net/sock.h>
63 #include <linux/errno.h>
64 #include <linux/timer.h>
65 #include <asm/system.h>
66 #include <asm/uaccess.h>
67 #include <asm/ioctls.h>
68 #include <asm/page.h>
69 #include <asm/io.h>
70 #include <linux/proc_fs.h>
71 #include <linux/seq_file.h>
72 #include <linux/poll.h>
73 #include <linux/module.h>
74 #include <linux/init.h>
75
76 #ifdef CONFIG_INET
77 #include <net/inet_common.h>
78 #endif
79
80 #define CONFIG_SOCK_PACKET      1
81
82 /*
83    Proposed replacement for SIOC{ADD,DEL}MULTI and
84    IFF_PROMISC, IFF_ALLMULTI flags.
85
86    It is more expensive, but I believe,
87    it is really correct solution: reentereble, safe and fault tolerant.
88
89    IFF_PROMISC/IFF_ALLMULTI/SIOC{ADD/DEL}MULTI are faked by keeping
90    reference count and global flag, so that real status is
91    (gflag|(count != 0)), so that we can use obsolete faulty interface
92    not harming clever users.
93  */
94 #define CONFIG_PACKET_MULTICAST 1
95
96 /*
97    Assumptions:
98    - if device has no dev->hard_header routine, it adds and removes ll header
99      inside itself. In this case ll header is invisible outside of device,
100      but higher levels still should reserve dev->hard_header_len.
101      Some devices are enough clever to reallocate skb, when header
102      will not fit to reserved space (tunnel), another ones are silly
103      (PPP).
104    - packet socket receives packets with pulled ll header,
105      so that SOCK_RAW should push it back.
106
107 On receive:
108 -----------
109
110 Incoming, dev->hard_header!=NULL
111    mac.raw -> ll header
112    data    -> data
113
114 Outgoing, dev->hard_header!=NULL
115    mac.raw -> ll header
116    data    -> ll header
117
118 Incoming, dev->hard_header==NULL
119    mac.raw -> UNKNOWN position. It is very likely, that it points to ll header.
120               PPP makes it, that is wrong, because introduce assymetry
121               between rx and tx paths.
122    data    -> data
123
124 Outgoing, dev->hard_header==NULL
125    mac.raw -> data. ll header is still not built!
126    data    -> data
127
128 Resume
129   If dev->hard_header==NULL we are unlikely to restore sensible ll header.
130
131
132 On transmit:
133 ------------
134
135 dev->hard_header != NULL
136    mac.raw -> ll header
137    data    -> ll header
138
139 dev->hard_header == NULL (ll header is added by device, we cannot control it)
140    mac.raw -> data
141    data -> data
142
143    We should set nh.raw on output to correct posistion,
144    packet classifier depends on it.
145  */
146
147 /* List of all packet sockets. */
148 static HLIST_HEAD(packet_sklist);
149 static DEFINE_RWLOCK(packet_sklist_lock);
150
151 static atomic_t packet_socks_nr;
152
153
154 /* Private packet socket structures. */
155
156 #ifdef CONFIG_PACKET_MULTICAST
157 struct packet_mclist
158 {
159         struct packet_mclist    *next;
160         int                     ifindex;
161         int                     count;
162         unsigned short          type;
163         unsigned short          alen;
164         unsigned char           addr[8];
165 };
166 #endif
167 #ifdef CONFIG_PACKET_MMAP
168 static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing);
169 #endif
170
171 static void packet_flush_mclist(struct sock *sk);
172
173 struct packet_sock {
174         /* struct sock has to be the first member of packet_sock */
175         struct sock             sk;
176         struct tpacket_stats    stats;
177 #ifdef CONFIG_PACKET_MMAP
178         char *                  *pg_vec;
179         unsigned int            head;
180         unsigned int            frames_per_block;
181         unsigned int            frame_size;
182         unsigned int            frame_max;
183         int                     copy_thresh;
184 #endif
185         struct packet_type      prot_hook;
186         spinlock_t              bind_lock;
187         char                    running;        /* prot_hook is attached*/
188         int                     ifindex;        /* bound device         */
189         unsigned short          num;
190 #ifdef CONFIG_PACKET_MULTICAST
191         struct packet_mclist    *mclist;
192 #endif
193 #ifdef CONFIG_PACKET_MMAP
194         atomic_t                mapped;
195         unsigned int            pg_vec_order;
196         unsigned int            pg_vec_pages;
197         unsigned int            pg_vec_len;
198 #endif
199 };
200
201 #ifdef CONFIG_PACKET_MMAP
202
203 static inline char *packet_lookup_frame(struct packet_sock *po, unsigned int position)
204 {
205         unsigned int pg_vec_pos, frame_offset;
206         char *frame;
207
208         pg_vec_pos = position / po->frames_per_block;
209         frame_offset = position % po->frames_per_block;
210
211         frame = po->pg_vec[pg_vec_pos] + (frame_offset * po->frame_size);
212         
213         return frame;
214 }
215 #endif
216
217 static inline struct packet_sock *pkt_sk(struct sock *sk)
218 {
219         return (struct packet_sock *)sk;
220 }
221
222 static void packet_sock_destruct(struct sock *sk)
223 {
224         BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
225         BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
226
227         if (!sock_flag(sk, SOCK_DEAD)) {
228                 printk("Attempt to release alive packet socket: %p\n", sk);
229                 return;
230         }
231
232         atomic_dec(&packet_socks_nr);
233 #ifdef PACKET_REFCNT_DEBUG
234         printk(KERN_DEBUG "PACKET socket %p is free, %d are alive\n", sk, atomic_read(&packet_socks_nr));
235 #endif
236 }
237
238
239 static struct proto_ops packet_ops;
240
241 #ifdef CONFIG_SOCK_PACKET
242 static struct proto_ops packet_ops_spkt;
243
244 static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt)
245 {
246         struct sock *sk;
247         struct sockaddr_pkt *spkt;
248
249         /*
250          *      When we registered the protocol we saved the socket in the data
251          *      field for just this event.
252          */
253
254         sk = pt->af_packet_priv;
255         
256         /*
257          *      Yank back the headers [hope the device set this
258          *      right or kerboom...]
259          *
260          *      Incoming packets have ll header pulled,
261          *      push it back.
262          *
263          *      For outgoing ones skb->data == skb->mac.raw
264          *      so that this procedure is noop.
265          */
266
267         if (skb->pkt_type == PACKET_LOOPBACK)
268                 goto out;
269
270         if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
271                 goto oom;
272
273         /* drop any routing info */
274         dst_release(skb->dst);
275         skb->dst = NULL;
276
277         /* drop conntrack reference */
278         nf_reset(skb);
279
280         spkt = (struct sockaddr_pkt*)skb->cb;
281
282         skb_push(skb, skb->data-skb->mac.raw);
283
284         /*
285          *      The SOCK_PACKET socket receives _all_ frames.
286          */
287
288         spkt->spkt_family = dev->type;
289         strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
290         spkt->spkt_protocol = skb->protocol;
291
292         /*
293          *      Charge the memory to the socket. This is done specifically
294          *      to prevent sockets using all the memory up.
295          */
296
297         if (sock_queue_rcv_skb(sk,skb) == 0)
298                 return 0;
299
300 out:
301         kfree_skb(skb);
302 oom:
303         return 0;
304 }
305
306
307 /*
308  *      Output a raw packet to a device layer. This bypasses all the other
309  *      protocol layers and you must therefore supply it with a complete frame
310  */
311  
312 static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
313                                struct msghdr *msg, size_t len)
314 {
315         struct sock *sk = sock->sk;
316         struct sockaddr_pkt *saddr=(struct sockaddr_pkt *)msg->msg_name;
317         struct sk_buff *skb;
318         struct net_device *dev;
319         unsigned short proto=0;
320         int err;
321         
322         /*
323          *      Get and verify the address. 
324          */
325
326         if (saddr)
327         {
328                 if (msg->msg_namelen < sizeof(struct sockaddr))
329                         return(-EINVAL);
330                 if (msg->msg_namelen==sizeof(struct sockaddr_pkt))
331                         proto=saddr->spkt_protocol;
332         }
333         else
334                 return(-ENOTCONN);      /* SOCK_PACKET must be sent giving an address */
335
336         /*
337          *      Find the device first to size check it 
338          */
339
340         saddr->spkt_device[13] = 0;
341         dev = dev_get_by_name(saddr->spkt_device);
342         err = -ENODEV;
343         if (dev == NULL)
344                 goto out_unlock;
345         
346         /*
347          *      You may not queue a frame bigger than the mtu. This is the lowest level
348          *      raw protocol and you must do your own fragmentation at this level.
349          */
350          
351         err = -EMSGSIZE;
352         if(len>dev->mtu+dev->hard_header_len)
353                 goto out_unlock;
354
355         err = -ENOBUFS;
356         skb = sock_wmalloc(sk, len + LL_RESERVED_SPACE(dev), 0, GFP_KERNEL);
357
358         /*
359          *      If the write buffer is full, then tough. At this level the user gets to
360          *      deal with the problem - do your own algorithmic backoffs. That's far
361          *      more flexible.
362          */
363          
364         if (skb == NULL) 
365                 goto out_unlock;
366
367         /*
368          *      Fill it in 
369          */
370          
371         /* FIXME: Save some space for broken drivers that write a
372          * hard header at transmission time by themselves. PPP is the
373          * notable one here. This should really be fixed at the driver level.
374          */
375         skb_reserve(skb, LL_RESERVED_SPACE(dev));
376         skb->nh.raw = skb->data;
377
378         /* Try to align data part correctly */
379         if (dev->hard_header) {
380                 skb->data -= dev->hard_header_len;
381                 skb->tail -= dev->hard_header_len;
382                 if (len < dev->hard_header_len)
383                         skb->nh.raw = skb->data;
384         }
385
386         /* Returns -EFAULT on error */
387         err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
388         skb->protocol = proto;
389         skb->dev = dev;
390         skb->priority = sk->sk_priority;
391         if (err)
392                 goto out_free;
393
394         err = -ENETDOWN;
395         if (!(dev->flags & IFF_UP))
396                 goto out_free;
397
398         /*
399          *      Now send it
400          */
401
402         dev_queue_xmit(skb);
403         dev_put(dev);
404         return(len);
405
406 out_free:
407         kfree_skb(skb);
408 out_unlock:
409         if (dev)
410                 dev_put(dev);
411         return err;
412 }
413 #endif
414
415 static inline unsigned run_filter(struct sk_buff *skb, struct sock *sk, unsigned res)
416 {
417         struct sk_filter *filter;
418
419         bh_lock_sock(sk);
420         filter = sk->sk_filter;
421         /*
422          * Our caller already checked that filter != NULL but we need to
423          * verify that under bh_lock_sock() to be safe
424          */
425         if (likely(filter != NULL))
426                 res = sk_run_filter(skb, filter->insns, filter->len);
427         bh_unlock_sock(sk);
428
429         return res;
430 }
431
432 /*
433    This function makes lazy skb cloning in hope that most of packets
434    are discarded by BPF.
435
436    Note tricky part: we DO mangle shared skb! skb->data, skb->len
437    and skb->cb are mangled. It works because (and until) packets
438    falling here are owned by current CPU. Output packets are cloned
439    by dev_queue_xmit_nit(), input packets are processed by net_bh
440    sequencially, so that if we return skb to original state on exit,
441    we will not harm anyone.
442  */
443
444 static int packet_rcv(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt)
445 {
446         struct sock *sk;
447         struct sockaddr_ll *sll;
448         struct packet_sock *po;
449         u8 * skb_head = skb->data;
450         int skb_len = skb->len;
451         unsigned snaplen;
452
453         if (skb->pkt_type == PACKET_LOOPBACK)
454                 goto drop;
455
456         sk = pt->af_packet_priv;
457         po = pkt_sk(sk);
458
459 #if defined(CONFIG_VNET) || defined(CONFIG_VNET_MODULE)
460         if (vnet_active &&
461             (int) sk->sk_xid > 0 && sk->sk_xid != skb->xid)
462                 goto drop;
463 #endif
464
465         skb->dev = dev;
466
467         if (dev->hard_header) {
468                 /* The device has an explicit notion of ll header,
469                    exported to higher levels.
470
471                    Otherwise, the device hides datails of it frame
472                    structure, so that corresponding packet head
473                    never delivered to user.
474                  */
475                 if (sk->sk_type != SOCK_DGRAM)
476                         skb_push(skb, skb->data - skb->mac.raw);
477                 else if (skb->pkt_type == PACKET_OUTGOING) {
478                         /* Special case: outgoing packets have ll header at head */
479                         skb_pull(skb, skb->nh.raw - skb->data);
480                 }
481         }
482
483         snaplen = skb->len;
484
485         if (sk->sk_filter) {
486                 unsigned res = run_filter(skb, sk, snaplen);
487                 if (res == 0)
488                         goto drop_n_restore;
489                 if (snaplen > res)
490                         snaplen = res;
491         }
492
493         if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
494             (unsigned)sk->sk_rcvbuf)
495                 goto drop_n_acct;
496
497         if (skb_shared(skb)) {
498                 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
499                 if (nskb == NULL)
500                         goto drop_n_acct;
501
502                 if (skb_head != skb->data) {
503                         skb->data = skb_head;
504                         skb->len = skb_len;
505                 }
506                 kfree_skb(skb);
507                 skb = nskb;
508         }
509
510         sll = (struct sockaddr_ll*)skb->cb;
511         sll->sll_family = AF_PACKET;
512         sll->sll_hatype = dev->type;
513         sll->sll_protocol = skb->protocol;
514         sll->sll_pkttype = skb->pkt_type;
515         sll->sll_ifindex = dev->ifindex;
516         sll->sll_halen = 0;
517
518         if (dev->hard_header_parse)
519                 sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
520
521         if (pskb_trim(skb, snaplen))
522                 goto drop_n_acct;
523
524         skb_set_owner_r(skb, sk);
525         skb->dev = NULL;
526         dst_release(skb->dst);
527         skb->dst = NULL;
528
529         /* drop conntrack reference */
530         nf_reset(skb);
531
532         spin_lock(&sk->sk_receive_queue.lock);
533         po->stats.tp_packets++;
534         __skb_queue_tail(&sk->sk_receive_queue, skb);
535         spin_unlock(&sk->sk_receive_queue.lock);
536         sk->sk_data_ready(sk, skb->len);
537         return 0;
538
539 drop_n_acct:
540         spin_lock(&sk->sk_receive_queue.lock);
541         po->stats.tp_drops++;
542         spin_unlock(&sk->sk_receive_queue.lock);
543
544 drop_n_restore:
545         if (skb_head != skb->data && skb_shared(skb)) {
546                 skb->data = skb_head;
547                 skb->len = skb_len;
548         }
549 drop:
550         kfree_skb(skb);
551         return 0;
552 }
553
554 #ifdef CONFIG_PACKET_MMAP
555 static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt)
556 {
557         struct sock *sk;
558         struct packet_sock *po;
559         struct sockaddr_ll *sll;
560         struct tpacket_hdr *h;
561         u8 * skb_head = skb->data;
562         int skb_len = skb->len;
563         unsigned snaplen;
564         unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
565         unsigned short macoff, netoff;
566         struct sk_buff *copy_skb = NULL;
567
568         if (skb->pkt_type == PACKET_LOOPBACK)
569                 goto drop;
570
571         sk = pt->af_packet_priv;
572         po = pkt_sk(sk);
573
574         if (dev->hard_header) {
575                 if (sk->sk_type != SOCK_DGRAM)
576                         skb_push(skb, skb->data - skb->mac.raw);
577                 else if (skb->pkt_type == PACKET_OUTGOING) {
578                         /* Special case: outgoing packets have ll header at head */
579                         skb_pull(skb, skb->nh.raw - skb->data);
580                         if (skb->ip_summed == CHECKSUM_HW)
581                                 status |= TP_STATUS_CSUMNOTREADY;
582                 }
583         }
584
585         snaplen = skb->len;
586
587         if (sk->sk_filter) {
588                 unsigned res = run_filter(skb, sk, snaplen);
589                 if (res == 0)
590                         goto drop_n_restore;
591                 if (snaplen > res)
592                         snaplen = res;
593         }
594
595         if (sk->sk_type == SOCK_DGRAM) {
596                 macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
597         } else {
598                 unsigned maclen = skb->nh.raw - skb->data;
599                 netoff = TPACKET_ALIGN(TPACKET_HDRLEN + (maclen < 16 ? 16 : maclen));
600                 macoff = netoff - maclen;
601         }
602
603         if (macoff + snaplen > po->frame_size) {
604                 if (po->copy_thresh &&
605                     atomic_read(&sk->sk_rmem_alloc) + skb->truesize <
606                     (unsigned)sk->sk_rcvbuf) {
607                         if (skb_shared(skb)) {
608                                 copy_skb = skb_clone(skb, GFP_ATOMIC);
609                         } else {
610                                 copy_skb = skb_get(skb);
611                                 skb_head = skb->data;
612                         }
613                         if (copy_skb)
614                                 skb_set_owner_r(copy_skb, sk);
615                 }
616                 snaplen = po->frame_size - macoff;
617                 if ((int)snaplen < 0)
618                         snaplen = 0;
619         }
620         if (snaplen > skb->len-skb->data_len)
621                 snaplen = skb->len-skb->data_len;
622
623         spin_lock(&sk->sk_receive_queue.lock);
624         h = (struct tpacket_hdr *)packet_lookup_frame(po, po->head);
625         
626         if (h->tp_status)
627                 goto ring_is_full;
628         po->head = po->head != po->frame_max ? po->head+1 : 0;
629         po->stats.tp_packets++;
630         if (copy_skb) {
631                 status |= TP_STATUS_COPY;
632                 __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
633         }
634         if (!po->stats.tp_drops)
635                 status &= ~TP_STATUS_LOSING;
636         spin_unlock(&sk->sk_receive_queue.lock);
637
638         memcpy((u8*)h + macoff, skb->data, snaplen);
639
640         h->tp_len = skb->len;
641         h->tp_snaplen = snaplen;
642         h->tp_mac = macoff;
643         h->tp_net = netoff;
644         if (skb->stamp.tv_sec == 0) { 
645                 do_gettimeofday(&skb->stamp);
646                 sock_enable_timestamp(sk);
647         }
648         h->tp_sec = skb->stamp.tv_sec;
649         h->tp_usec = skb->stamp.tv_usec;
650
651         sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h)));
652         sll->sll_halen = 0;
653         if (dev->hard_header_parse)
654                 sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
655         sll->sll_family = AF_PACKET;
656         sll->sll_hatype = dev->type;
657         sll->sll_protocol = skb->protocol;
658         sll->sll_pkttype = skb->pkt_type;
659         sll->sll_ifindex = dev->ifindex;
660
661         h->tp_status = status;
662         mb();
663
664         {
665                 struct page *p_start, *p_end;
666                 u8 *h_end = (u8 *)h + macoff + snaplen - 1;
667
668                 p_start = virt_to_page(h);
669                 p_end = virt_to_page(h_end);
670                 while (p_start <= p_end) {
671                         flush_dcache_page(p_start);
672                         p_start++;
673                 }
674         }
675
676         sk->sk_data_ready(sk, 0);
677
678 drop_n_restore:
679         if (skb_head != skb->data && skb_shared(skb)) {
680                 skb->data = skb_head;
681                 skb->len = skb_len;
682         }
683 drop:
684         kfree_skb(skb);
685         return 0;
686
687 ring_is_full:
688         po->stats.tp_drops++;
689         spin_unlock(&sk->sk_receive_queue.lock);
690
691         sk->sk_data_ready(sk, 0);
692         if (copy_skb)
693                 kfree_skb(copy_skb);
694         goto drop_n_restore;
695 }
696
697 #endif
698
699
700 static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
701                           struct msghdr *msg, size_t len)
702 {
703         struct sock *sk = sock->sk;
704         struct sockaddr_ll *saddr=(struct sockaddr_ll *)msg->msg_name;
705         struct sk_buff *skb;
706         struct net_device *dev;
707         unsigned short proto;
708         unsigned char *addr;
709         int ifindex, err, reserve = 0;
710
711         /*
712          *      Get and verify the address. 
713          */
714          
715         if (saddr == NULL) {
716                 struct packet_sock *po = pkt_sk(sk);
717
718                 ifindex = po->ifindex;
719                 proto   = po->num;
720                 addr    = NULL;
721         } else {
722                 err = -EINVAL;
723                 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
724                         goto out;
725                 ifindex = saddr->sll_ifindex;
726                 proto   = saddr->sll_protocol;
727                 addr    = saddr->sll_addr;
728         }
729
730
731         dev = dev_get_by_index(ifindex);
732         err = -ENXIO;
733         if (dev == NULL)
734                 goto out_unlock;
735         if (sock->type == SOCK_RAW)
736                 reserve = dev->hard_header_len;
737
738         err = -EMSGSIZE;
739         if (len > dev->mtu+reserve)
740                 goto out_unlock;
741
742         skb = sock_alloc_send_skb(sk, len + LL_RESERVED_SPACE(dev),
743                                 msg->msg_flags & MSG_DONTWAIT, &err);
744         if (skb==NULL)
745                 goto out_unlock;
746
747         skb_reserve(skb, LL_RESERVED_SPACE(dev));
748         skb->nh.raw = skb->data;
749
750         if (dev->hard_header) {
751                 int res;
752                 err = -EINVAL;
753                 res = dev->hard_header(skb, dev, ntohs(proto), addr, NULL, len);
754                 if (sock->type != SOCK_DGRAM) {
755                         skb->tail = skb->data;
756                         skb->len = 0;
757                 } else if (res < 0)
758                         goto out_free;
759         }
760
761         /* Returns -EFAULT on error */
762         err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
763         if (err)
764                 goto out_free;
765
766         skb->protocol = proto;
767         skb->dev = dev;
768         skb->priority = sk->sk_priority;
769
770         err = -ENETDOWN;
771         if (!(dev->flags & IFF_UP))
772                 goto out_free;
773
774         /*
775          *      Now send it
776          */
777
778         err = dev_queue_xmit(skb);
779         if (err > 0 && (err = net_xmit_errno(err)) != 0)
780                 goto out_unlock;
781
782         dev_put(dev);
783
784         return(len);
785
786 out_free:
787         kfree_skb(skb);
788 out_unlock:
789         if (dev)
790                 dev_put(dev);
791 out:
792         return err;
793 }
794
795 /*
796  *      Close a PACKET socket. This is fairly simple. We immediately go
797  *      to 'closed' state and remove our protocol entry in the device list.
798  */
799
800 static int packet_release(struct socket *sock)
801 {
802         struct sock *sk = sock->sk;
803         struct packet_sock *po;
804
805         if (!sk)
806                 return 0;
807
808         po = pkt_sk(sk);
809
810         write_lock_bh(&packet_sklist_lock);
811         sk_del_node_init(sk);
812         write_unlock_bh(&packet_sklist_lock);
813
814         /*
815          *      Unhook packet receive handler.
816          */
817
818         if (po->running) {
819                 /*
820                  *      Remove the protocol hook
821                  */
822                 dev_remove_pack(&po->prot_hook);
823                 po->running = 0;
824                 po->num = 0;
825                 __sock_put(sk);
826         }
827
828 #ifdef CONFIG_PACKET_MULTICAST
829         packet_flush_mclist(sk);
830 #endif
831
832 #ifdef CONFIG_PACKET_MMAP
833         if (po->pg_vec) {
834                 struct tpacket_req req;
835                 memset(&req, 0, sizeof(req));
836                 packet_set_ring(sk, &req, 1);
837         }
838 #endif
839
840         /*
841          *      Now the socket is dead. No more input will appear.
842          */
843
844         sock_orphan(sk);
845         sock->sk = NULL;
846
847         /* Purge queues */
848
849         skb_queue_purge(&sk->sk_receive_queue);
850
851         sock_put(sk);
852         return 0;
853 }
854
855 /*
856  *      Attach a packet hook.
857  */
858
859 static int packet_do_bind(struct sock *sk, struct net_device *dev, int protocol)
860 {
861         struct packet_sock *po = pkt_sk(sk);
862         /*
863          *      Detach an existing hook if present.
864          */
865
866         lock_sock(sk);
867
868         spin_lock(&po->bind_lock);
869         if (po->running) {
870                 __sock_put(sk);
871                 po->running = 0;
872                 po->num = 0;
873                 spin_unlock(&po->bind_lock);
874                 dev_remove_pack(&po->prot_hook);
875                 spin_lock(&po->bind_lock);
876         }
877
878         po->num = protocol;
879         po->prot_hook.type = protocol;
880         po->prot_hook.dev = dev;
881
882         po->ifindex = dev ? dev->ifindex : 0;
883
884         if (protocol == 0)
885                 goto out_unlock;
886
887         if (dev) {
888                 if (dev->flags&IFF_UP) {
889                         dev_add_pack(&po->prot_hook);
890                         sock_hold(sk);
891                         po->running = 1;
892                 } else {
893                         sk->sk_err = ENETDOWN;
894                         if (!sock_flag(sk, SOCK_DEAD))
895                                 sk->sk_error_report(sk);
896                 }
897         } else {
898                 dev_add_pack(&po->prot_hook);
899                 sock_hold(sk);
900                 po->running = 1;
901         }
902
903 out_unlock:
904         spin_unlock(&po->bind_lock);
905         release_sock(sk);
906         return 0;
907 }
908
909 /*
910  *      Bind a packet socket to a device
911  */
912
913 #ifdef CONFIG_SOCK_PACKET
914
915 static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len)
916 {
917         struct sock *sk=sock->sk;
918         char name[15];
919         struct net_device *dev;
920         int err = -ENODEV;
921         
922         /*
923          *      Check legality
924          */
925          
926         if(addr_len!=sizeof(struct sockaddr))
927                 return -EINVAL;
928         strlcpy(name,uaddr->sa_data,sizeof(name));
929
930         dev = dev_get_by_name(name);
931         if (dev) {
932                 err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
933                 dev_put(dev);
934         }
935         return err;
936 }
937 #endif
938
939 static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
940 {
941         struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
942         struct sock *sk=sock->sk;
943         struct net_device *dev = NULL;
944         int err;
945
946
947         /*
948          *      Check legality
949          */
950          
951         if (addr_len < sizeof(struct sockaddr_ll))
952                 return -EINVAL;
953         if (sll->sll_family != AF_PACKET)
954                 return -EINVAL;
955
956         if (sll->sll_ifindex) {
957                 err = -ENODEV;
958                 dev = dev_get_by_index(sll->sll_ifindex);
959                 if (dev == NULL)
960                         goto out;
961         }
962         err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
963         if (dev)
964                 dev_put(dev);
965
966 out:
967         return err;
968 }
969
970 static struct proto packet_proto = {
971         .name     = "PACKET",
972         .owner    = THIS_MODULE,
973         .obj_size = sizeof(struct packet_sock),
974 };
975
976 /*
977  *      Create a packet of type SOCK_PACKET. 
978  */
979
980 static int packet_create(struct socket *sock, int protocol)
981 {
982         struct sock *sk;
983         struct packet_sock *po;
984         int err;
985
986         if (!capable(CAP_NET_RAW))
987                 return -EPERM;
988         if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW
989 #ifdef CONFIG_SOCK_PACKET
990             && sock->type != SOCK_PACKET
991 #endif
992             )
993                 return -ESOCKTNOSUPPORT;
994
995         sock->state = SS_UNCONNECTED;
996
997         err = -ENOBUFS;
998         sk = sk_alloc(PF_PACKET, GFP_KERNEL, &packet_proto, 1);
999         if (sk == NULL)
1000                 goto out;
1001
1002         sock->ops = &packet_ops;
1003 #ifdef CONFIG_SOCK_PACKET
1004         if (sock->type == SOCK_PACKET)
1005                 sock->ops = &packet_ops_spkt;
1006 #endif
1007         sock_init_data(sock, sk);
1008
1009         po = pkt_sk(sk);
1010         sk->sk_family = PF_PACKET;
1011         po->num = protocol;
1012
1013         sk->sk_destruct = packet_sock_destruct;
1014         atomic_inc(&packet_socks_nr);
1015
1016         /*
1017          *      Attach a protocol block
1018          */
1019
1020         spin_lock_init(&po->bind_lock);
1021         po->prot_hook.func = packet_rcv;
1022 #ifdef CONFIG_SOCK_PACKET
1023         if (sock->type == SOCK_PACKET)
1024                 po->prot_hook.func = packet_rcv_spkt;
1025 #endif
1026         po->prot_hook.af_packet_priv = sk;
1027
1028         if (protocol) {
1029                 po->prot_hook.type = protocol;
1030                 dev_add_pack(&po->prot_hook);
1031                 sock_hold(sk);
1032                 po->running = 1;
1033         }
1034
1035         write_lock_bh(&packet_sklist_lock);
1036         sk_add_node(sk, &packet_sklist);
1037         write_unlock_bh(&packet_sklist_lock);
1038         return(0);
1039 out:
1040         return err;
1041 }
1042
1043 /*
1044  *      Pull a packet from our receive queue and hand it to the user.
1045  *      If necessary we block.
1046  */
1047
1048 static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1049                           struct msghdr *msg, size_t len, int flags)
1050 {
1051         struct sock *sk = sock->sk;
1052         struct sk_buff *skb;
1053         int copied, err;
1054
1055         err = -EINVAL;
1056         if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
1057                 goto out;
1058
1059 #if 0
1060         /* What error should we return now? EUNATTACH? */
1061         if (pkt_sk(sk)->ifindex < 0)
1062                 return -ENODEV;
1063 #endif
1064
1065         /*
1066          *      If the address length field is there to be filled in, we fill
1067          *      it in now.
1068          */
1069
1070         if (sock->type == SOCK_PACKET)
1071                 msg->msg_namelen = sizeof(struct sockaddr_pkt);
1072         else
1073                 msg->msg_namelen = sizeof(struct sockaddr_ll);
1074
1075         /*
1076          *      Call the generic datagram receiver. This handles all sorts
1077          *      of horrible races and re-entrancy so we can forget about it
1078          *      in the protocol layers.
1079          *
1080          *      Now it will return ENETDOWN, if device have just gone down,
1081          *      but then it will block.
1082          */
1083
1084         skb=skb_recv_datagram(sk,flags,flags&MSG_DONTWAIT,&err);
1085
1086         /*
1087          *      An error occurred so return it. Because skb_recv_datagram() 
1088          *      handles the blocking we don't see and worry about blocking
1089          *      retries.
1090          */
1091
1092         if(skb==NULL)
1093                 goto out;
1094
1095         /*
1096          *      You lose any data beyond the buffer you gave. If it worries a
1097          *      user program they can ask the device for its MTU anyway.
1098          */
1099
1100         copied = skb->len;
1101         if (copied > len)
1102         {
1103                 copied=len;
1104                 msg->msg_flags|=MSG_TRUNC;
1105         }
1106
1107         err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1108         if (err)
1109                 goto out_free;
1110
1111         sock_recv_timestamp(msg, sk, skb);
1112
1113         if (msg->msg_name)
1114                 memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
1115
1116         /*
1117          *      Free or return the buffer as appropriate. Again this
1118          *      hides all the races and re-entrancy issues from us.
1119          */
1120         err = (flags&MSG_TRUNC) ? skb->len : copied;
1121
1122 out_free:
1123         skb_free_datagram(sk, skb);
1124 out:
1125         return err;
1126 }
1127
1128 #ifdef CONFIG_SOCK_PACKET
1129 static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1130                                int *uaddr_len, int peer)
1131 {
1132         struct net_device *dev;
1133         struct sock *sk = sock->sk;
1134
1135         if (peer)
1136                 return -EOPNOTSUPP;
1137
1138         uaddr->sa_family = AF_PACKET;
1139         dev = dev_get_by_index(pkt_sk(sk)->ifindex);
1140         if (dev) {
1141                 strlcpy(uaddr->sa_data, dev->name, 15);
1142                 dev_put(dev);
1143         } else
1144                 memset(uaddr->sa_data, 0, 14);
1145         *uaddr_len = sizeof(*uaddr);
1146
1147         return 0;
1148 }
1149 #endif
1150
1151 static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1152                           int *uaddr_len, int peer)
1153 {
1154         struct net_device *dev;
1155         struct sock *sk = sock->sk;
1156         struct packet_sock *po = pkt_sk(sk);
1157         struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
1158
1159         if (peer)
1160                 return -EOPNOTSUPP;
1161
1162         sll->sll_family = AF_PACKET;
1163         sll->sll_ifindex = po->ifindex;
1164         sll->sll_protocol = po->num;
1165         dev = dev_get_by_index(po->ifindex);
1166         if (dev) {
1167                 sll->sll_hatype = dev->type;
1168                 sll->sll_halen = dev->addr_len;
1169                 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
1170                 dev_put(dev);
1171         } else {
1172                 sll->sll_hatype = 0;    /* Bad: we have no ARPHRD_UNSPEC */
1173                 sll->sll_halen = 0;
1174         }
1175         *uaddr_len = sizeof(*sll);
1176
1177         return 0;
1178 }
1179
1180 #ifdef CONFIG_PACKET_MULTICAST
1181 static void packet_dev_mc(struct net_device *dev, struct packet_mclist *i, int what)
1182 {
1183         switch (i->type) {
1184         case PACKET_MR_MULTICAST:
1185                 if (what > 0)
1186                         dev_mc_add(dev, i->addr, i->alen, 0);
1187                 else
1188                         dev_mc_delete(dev, i->addr, i->alen, 0);
1189                 break;
1190         case PACKET_MR_PROMISC:
1191                 dev_set_promiscuity(dev, what);
1192                 break;
1193         case PACKET_MR_ALLMULTI:
1194                 dev_set_allmulti(dev, what);
1195                 break;
1196         default:;
1197         }
1198 }
1199
1200 static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1201 {
1202         for ( ; i; i=i->next) {
1203                 if (i->ifindex == dev->ifindex)
1204                         packet_dev_mc(dev, i, what);
1205         }
1206 }
1207
1208 static int packet_mc_add(struct sock *sk, struct packet_mreq *mreq)
1209 {
1210         struct packet_sock *po = pkt_sk(sk);
1211         struct packet_mclist *ml, *i;
1212         struct net_device *dev;
1213         int err;
1214
1215         rtnl_lock();
1216
1217         err = -ENODEV;
1218         dev = __dev_get_by_index(mreq->mr_ifindex);
1219         if (!dev)
1220                 goto done;
1221
1222         err = -EINVAL;
1223         if (mreq->mr_alen > dev->addr_len)
1224                 goto done;
1225
1226         err = -ENOBUFS;
1227         i = (struct packet_mclist *)kmalloc(sizeof(*i), GFP_KERNEL);
1228         if (i == NULL)
1229                 goto done;
1230
1231         err = 0;
1232         for (ml = po->mclist; ml; ml = ml->next) {
1233                 if (ml->ifindex == mreq->mr_ifindex &&
1234                     ml->type == mreq->mr_type &&
1235                     ml->alen == mreq->mr_alen &&
1236                     memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1237                         ml->count++;
1238                         /* Free the new element ... */
1239                         kfree(i);
1240                         goto done;
1241                 }
1242         }
1243
1244         i->type = mreq->mr_type;
1245         i->ifindex = mreq->mr_ifindex;
1246         i->alen = mreq->mr_alen;
1247         memcpy(i->addr, mreq->mr_address, i->alen);
1248         i->count = 1;
1249         i->next = po->mclist;
1250         po->mclist = i;
1251         packet_dev_mc(dev, i, +1);
1252
1253 done:
1254         rtnl_unlock();
1255         return err;
1256 }
1257
1258 static int packet_mc_drop(struct sock *sk, struct packet_mreq *mreq)
1259 {
1260         struct packet_mclist *ml, **mlp;
1261
1262         rtnl_lock();
1263
1264         for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
1265                 if (ml->ifindex == mreq->mr_ifindex &&
1266                     ml->type == mreq->mr_type &&
1267                     ml->alen == mreq->mr_alen &&
1268                     memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1269                         if (--ml->count == 0) {
1270                                 struct net_device *dev;
1271                                 *mlp = ml->next;
1272                                 dev = dev_get_by_index(ml->ifindex);
1273                                 if (dev) {
1274                                         packet_dev_mc(dev, ml, -1);
1275                                         dev_put(dev);
1276                                 }
1277                                 kfree(ml);
1278                         }
1279                         rtnl_unlock();
1280                         return 0;
1281                 }
1282         }
1283         rtnl_unlock();
1284         return -EADDRNOTAVAIL;
1285 }
1286
1287 static void packet_flush_mclist(struct sock *sk)
1288 {
1289         struct packet_sock *po = pkt_sk(sk);
1290         struct packet_mclist *ml;
1291
1292         if (!po->mclist)
1293                 return;
1294
1295         rtnl_lock();
1296         while ((ml = po->mclist) != NULL) {
1297                 struct net_device *dev;
1298
1299                 po->mclist = ml->next;
1300                 if ((dev = dev_get_by_index(ml->ifindex)) != NULL) {
1301                         packet_dev_mc(dev, ml, -1);
1302                         dev_put(dev);
1303                 }
1304                 kfree(ml);
1305         }
1306         rtnl_unlock();
1307 }
1308 #endif
1309
1310 static int
1311 packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen)
1312 {
1313         struct sock *sk = sock->sk;
1314         int ret;
1315
1316         if (level != SOL_PACKET)
1317                 return -ENOPROTOOPT;
1318
1319         switch(optname) {
1320 #ifdef CONFIG_PACKET_MULTICAST
1321         case PACKET_ADD_MEMBERSHIP:     
1322         case PACKET_DROP_MEMBERSHIP:
1323         {
1324                 struct packet_mreq mreq;
1325                 if (optlen<sizeof(mreq))
1326                         return -EINVAL;
1327                 if (copy_from_user(&mreq,optval,sizeof(mreq)))
1328                         return -EFAULT;
1329                 if (optname == PACKET_ADD_MEMBERSHIP)
1330                         ret = packet_mc_add(sk, &mreq);
1331                 else
1332                         ret = packet_mc_drop(sk, &mreq);
1333                 return ret;
1334         }
1335 #endif
1336 #ifdef CONFIG_PACKET_MMAP
1337         case PACKET_RX_RING:
1338         {
1339                 struct tpacket_req req;
1340
1341                 if (optlen<sizeof(req))
1342                         return -EINVAL;
1343                 if (copy_from_user(&req,optval,sizeof(req)))
1344                         return -EFAULT;
1345                 return packet_set_ring(sk, &req, 0);
1346         }
1347         case PACKET_COPY_THRESH:
1348         {
1349                 int val;
1350
1351                 if (optlen!=sizeof(val))
1352                         return -EINVAL;
1353                 if (copy_from_user(&val,optval,sizeof(val)))
1354                         return -EFAULT;
1355
1356                 pkt_sk(sk)->copy_thresh = val;
1357                 return 0;
1358         }
1359 #endif
1360         default:
1361                 return -ENOPROTOOPT;
1362         }
1363 }
1364
1365 static int packet_getsockopt(struct socket *sock, int level, int optname,
1366                              char __user *optval, int __user *optlen)
1367 {
1368         int len;
1369         struct sock *sk = sock->sk;
1370         struct packet_sock *po = pkt_sk(sk);
1371
1372         if (level != SOL_PACKET)
1373                 return -ENOPROTOOPT;
1374
1375         if (get_user(len,optlen))
1376                 return -EFAULT;
1377
1378         if (len < 0)
1379                 return -EINVAL;
1380                 
1381         switch(optname) {
1382         case PACKET_STATISTICS:
1383         {
1384                 struct tpacket_stats st;
1385
1386                 if (len > sizeof(struct tpacket_stats))
1387                         len = sizeof(struct tpacket_stats);
1388                 spin_lock_bh(&sk->sk_receive_queue.lock);
1389                 st = po->stats;
1390                 memset(&po->stats, 0, sizeof(st));
1391                 spin_unlock_bh(&sk->sk_receive_queue.lock);
1392                 st.tp_packets += st.tp_drops;
1393
1394                 if (copy_to_user(optval, &st, len))
1395                         return -EFAULT;
1396                 break;
1397         }
1398         default:
1399                 return -ENOPROTOOPT;
1400         }
1401
1402         if (put_user(len, optlen))
1403                 return -EFAULT;
1404         return 0;
1405 }
1406
1407
1408 static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
1409 {
1410         struct sock *sk;
1411         struct hlist_node *node;
1412         struct net_device *dev = (struct net_device*)data;
1413
1414         read_lock(&packet_sklist_lock);
1415         sk_for_each(sk, node, &packet_sklist) {
1416                 struct packet_sock *po = pkt_sk(sk);
1417
1418                 switch (msg) {
1419                 case NETDEV_UNREGISTER:
1420 #ifdef CONFIG_PACKET_MULTICAST
1421                         if (po->mclist)
1422                                 packet_dev_mclist(dev, po->mclist, -1);
1423                         // fallthrough
1424 #endif
1425                 case NETDEV_DOWN:
1426                         if (dev->ifindex == po->ifindex) {
1427                                 spin_lock(&po->bind_lock);
1428                                 if (po->running) {
1429                                         __dev_remove_pack(&po->prot_hook);
1430                                         __sock_put(sk);
1431                                         po->running = 0;
1432                                         sk->sk_err = ENETDOWN;
1433                                         if (!sock_flag(sk, SOCK_DEAD))
1434                                                 sk->sk_error_report(sk);
1435                                 }
1436                                 if (msg == NETDEV_UNREGISTER) {
1437                                         po->ifindex = -1;
1438                                         po->prot_hook.dev = NULL;
1439                                 }
1440                                 spin_unlock(&po->bind_lock);
1441                         }
1442                         break;
1443                 case NETDEV_UP:
1444                         spin_lock(&po->bind_lock);
1445                         if (dev->ifindex == po->ifindex && po->num &&
1446                             !po->running) {
1447                                 dev_add_pack(&po->prot_hook);
1448                                 sock_hold(sk);
1449                                 po->running = 1;
1450                         }
1451                         spin_unlock(&po->bind_lock);
1452                         break;
1453                 }
1454         }
1455         read_unlock(&packet_sklist_lock);
1456         return NOTIFY_DONE;
1457 }
1458
1459
1460 static int packet_ioctl(struct socket *sock, unsigned int cmd,
1461                         unsigned long arg)
1462 {
1463         struct sock *sk = sock->sk;
1464
1465         switch(cmd) {
1466                 case SIOCOUTQ:
1467                 {
1468                         int amount = atomic_read(&sk->sk_wmem_alloc);
1469                         return put_user(amount, (int __user *)arg);
1470                 }
1471                 case SIOCINQ:
1472                 {
1473                         struct sk_buff *skb;
1474                         int amount = 0;
1475
1476                         spin_lock_bh(&sk->sk_receive_queue.lock);
1477                         skb = skb_peek(&sk->sk_receive_queue);
1478                         if (skb)
1479                                 amount = skb->len;
1480                         spin_unlock_bh(&sk->sk_receive_queue.lock);
1481                         return put_user(amount, (int __user *)arg);
1482                 }
1483                 case SIOCGSTAMP:
1484                         return sock_get_timestamp(sk, (struct timeval __user *)arg);
1485                         
1486 #ifdef CONFIG_INET
1487                 case SIOCADDRT:
1488                 case SIOCDELRT:
1489                 case SIOCDARP:
1490                 case SIOCGARP:
1491                 case SIOCSARP:
1492                 case SIOCGIFADDR:
1493                 case SIOCSIFADDR:
1494                 case SIOCGIFBRDADDR:
1495                 case SIOCSIFBRDADDR:
1496                 case SIOCGIFNETMASK:
1497                 case SIOCSIFNETMASK:
1498                 case SIOCGIFDSTADDR:
1499                 case SIOCSIFDSTADDR:
1500                 case SIOCSIFFLAGS:
1501                         return inet_dgram_ops.ioctl(sock, cmd, arg);
1502 #endif
1503
1504                 default:
1505                         return dev_ioctl(cmd, (void __user *)arg);
1506         }
1507         return 0;
1508 }
1509
1510 #ifndef CONFIG_PACKET_MMAP
1511 #define packet_mmap sock_no_mmap
1512 #define packet_poll datagram_poll
1513 #else
1514
1515 static unsigned int packet_poll(struct file * file, struct socket *sock,
1516                                 poll_table *wait)
1517 {
1518         struct sock *sk = sock->sk;
1519         struct packet_sock *po = pkt_sk(sk);
1520         unsigned int mask = datagram_poll(file, sock, wait);
1521
1522         spin_lock_bh(&sk->sk_receive_queue.lock);
1523         if (po->pg_vec) {
1524                 unsigned last = po->head ? po->head-1 : po->frame_max;
1525                 struct tpacket_hdr *h;
1526
1527                 h = (struct tpacket_hdr *)packet_lookup_frame(po, last);
1528
1529                 if (h->tp_status)
1530                         mask |= POLLIN | POLLRDNORM;
1531         }
1532         spin_unlock_bh(&sk->sk_receive_queue.lock);
1533         return mask;
1534 }
1535
1536
1537 /* Dirty? Well, I still did not learn better way to account
1538  * for user mmaps.
1539  */
1540
1541 static void packet_mm_open(struct vm_area_struct *vma)
1542 {
1543         struct file *file = vma->vm_file;
1544         struct inode *inode = file->f_dentry->d_inode;
1545         struct socket * sock = SOCKET_I(inode);
1546         struct sock *sk = sock->sk;
1547         
1548         if (sk)
1549                 atomic_inc(&pkt_sk(sk)->mapped);
1550 }
1551
1552 static void packet_mm_close(struct vm_area_struct *vma)
1553 {
1554         struct file *file = vma->vm_file;
1555         struct inode *inode = file->f_dentry->d_inode;
1556         struct socket * sock = SOCKET_I(inode);
1557         struct sock *sk = sock->sk;
1558         
1559         if (sk)
1560                 atomic_dec(&pkt_sk(sk)->mapped);
1561 }
1562
1563 static struct vm_operations_struct packet_mmap_ops = {
1564         .open = packet_mm_open,
1565         .close =packet_mm_close,
1566 };
1567
1568 static inline struct page *pg_vec_endpage(char *one_pg_vec, unsigned int order)
1569 {
1570         return virt_to_page(one_pg_vec + (PAGE_SIZE << order) - 1);
1571 }
1572
1573 static void free_pg_vec(char **pg_vec, unsigned order, unsigned len)
1574 {
1575         int i;
1576
1577         for (i=0; i<len; i++) {
1578                 if (pg_vec[i]) {
1579                         struct page *page, *pend;
1580
1581                         pend = pg_vec_endpage(pg_vec[i], order);
1582                         for (page = virt_to_page(pg_vec[i]); page <= pend; page++)
1583                                 ClearPageReserved(page);
1584                         free_pages((unsigned long)pg_vec[i], order);
1585                 }
1586         }
1587         kfree(pg_vec);
1588 }
1589
1590
1591 static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing)
1592 {
1593         char **pg_vec = NULL;
1594         struct packet_sock *po = pkt_sk(sk);
1595         int was_running, num, order = 0;
1596         int err = 0;
1597         
1598         if (req->tp_block_nr) {
1599                 int i, l;
1600
1601                 /* Sanity tests and some calculations */
1602
1603                 if (po->pg_vec)
1604                         return -EBUSY;
1605
1606                 if ((int)req->tp_block_size <= 0)
1607                         return -EINVAL;
1608                 if (req->tp_block_size&(PAGE_SIZE-1))
1609                         return -EINVAL;
1610                 if (req->tp_frame_size < TPACKET_HDRLEN)
1611                         return -EINVAL;
1612                 if (req->tp_frame_size&(TPACKET_ALIGNMENT-1))
1613                         return -EINVAL;
1614
1615                 po->frames_per_block = req->tp_block_size/req->tp_frame_size;
1616                 if (po->frames_per_block <= 0)
1617                         return -EINVAL;
1618                 if (po->frames_per_block*req->tp_block_nr != req->tp_frame_nr)
1619                         return -EINVAL;
1620                 /* OK! */
1621
1622                 /* Allocate page vector */
1623                 while ((PAGE_SIZE<<order) < req->tp_block_size)
1624                         order++;
1625
1626                 err = -ENOMEM;
1627
1628                 pg_vec = kmalloc(req->tp_block_nr*sizeof(char *), GFP_KERNEL);
1629                 if (pg_vec == NULL)
1630                         goto out;
1631                 memset(pg_vec, 0, req->tp_block_nr*sizeof(char **));
1632
1633                 for (i=0; i<req->tp_block_nr; i++) {
1634                         struct page *page, *pend;
1635                         pg_vec[i] = (char *)__get_free_pages(GFP_KERNEL, order);
1636                         if (!pg_vec[i])
1637                                 goto out_free_pgvec;
1638
1639                         pend = pg_vec_endpage(pg_vec[i], order);
1640                         for (page = virt_to_page(pg_vec[i]); page <= pend; page++)
1641                                 SetPageReserved(page);
1642                 }
1643                 /* Page vector is allocated */
1644
1645                 l = 0;
1646                 for (i=0; i<req->tp_block_nr; i++) {
1647                         char *ptr = pg_vec[i];
1648                         struct tpacket_hdr *header;
1649                         int k;
1650
1651                         for (k=0; k<po->frames_per_block; k++) {
1652                                 
1653                                 header = (struct tpacket_hdr*)ptr;
1654                                 header->tp_status = TP_STATUS_KERNEL;
1655                                 ptr += req->tp_frame_size;
1656                         }
1657                 }
1658                 /* Done */
1659         } else {
1660                 if (req->tp_frame_nr)
1661                         return -EINVAL;
1662         }
1663
1664         lock_sock(sk);
1665
1666         /* Detach socket from network */
1667         spin_lock(&po->bind_lock);
1668         was_running = po->running;
1669         num = po->num;
1670         if (was_running) {
1671                 __dev_remove_pack(&po->prot_hook);
1672                 po->num = 0;
1673                 po->running = 0;
1674                 __sock_put(sk);
1675         }
1676         spin_unlock(&po->bind_lock);
1677                 
1678         synchronize_net();
1679
1680         err = -EBUSY;
1681         if (closing || atomic_read(&po->mapped) == 0) {
1682                 err = 0;
1683 #define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
1684
1685                 spin_lock_bh(&sk->sk_receive_queue.lock);
1686                 pg_vec = XC(po->pg_vec, pg_vec);
1687                 po->frame_max = req->tp_frame_nr-1;
1688                 po->head = 0;
1689                 po->frame_size = req->tp_frame_size;
1690                 spin_unlock_bh(&sk->sk_receive_queue.lock);
1691
1692                 order = XC(po->pg_vec_order, order);
1693                 req->tp_block_nr = XC(po->pg_vec_len, req->tp_block_nr);
1694
1695                 po->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
1696                 po->prot_hook.func = po->pg_vec ? tpacket_rcv : packet_rcv;
1697                 skb_queue_purge(&sk->sk_receive_queue);
1698 #undef XC
1699                 if (atomic_read(&po->mapped))
1700                         printk(KERN_DEBUG "packet_mmap: vma is busy: %d\n", atomic_read(&po->mapped));
1701         }
1702
1703         spin_lock(&po->bind_lock);
1704         if (was_running && !po->running) {
1705                 sock_hold(sk);
1706                 po->running = 1;
1707                 po->num = num;
1708                 dev_add_pack(&po->prot_hook);
1709         }
1710         spin_unlock(&po->bind_lock);
1711
1712         release_sock(sk);
1713
1714 out_free_pgvec:
1715         if (pg_vec)
1716                 free_pg_vec(pg_vec, order, req->tp_block_nr);
1717 out:
1718         return err;
1719 }
1720
1721 static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1722 {
1723         struct sock *sk = sock->sk;
1724         struct packet_sock *po = pkt_sk(sk);
1725         unsigned long size;
1726         unsigned long start;
1727         int err = -EINVAL;
1728         int i;
1729
1730         if (vma->vm_pgoff)
1731                 return -EINVAL;
1732
1733         size = vma->vm_end - vma->vm_start;
1734
1735         lock_sock(sk);
1736         if (po->pg_vec == NULL)
1737                 goto out;
1738         if (size != po->pg_vec_len*po->pg_vec_pages*PAGE_SIZE)
1739                 goto out;
1740
1741         atomic_inc(&po->mapped);
1742         start = vma->vm_start;
1743         err = -EAGAIN;
1744         for (i=0; i<po->pg_vec_len; i++) {
1745                 if (remap_pfn_range(vma, start,
1746                                      __pa(po->pg_vec[i]) >> PAGE_SHIFT,
1747                                      po->pg_vec_pages*PAGE_SIZE,
1748                                      vma->vm_page_prot))
1749                         goto out;
1750                 start += po->pg_vec_pages*PAGE_SIZE;
1751         }
1752         vma->vm_ops = &packet_mmap_ops;
1753         err = 0;
1754
1755 out:
1756         release_sock(sk);
1757         return err;
1758 }
1759 #endif
1760
1761
1762 #ifdef CONFIG_SOCK_PACKET
1763 static struct proto_ops packet_ops_spkt = {
1764         .family =       PF_PACKET,
1765         .owner =        THIS_MODULE,
1766         .release =      packet_release,
1767         .bind =         packet_bind_spkt,
1768         .connect =      sock_no_connect,
1769         .socketpair =   sock_no_socketpair,
1770         .accept =       sock_no_accept,
1771         .getname =      packet_getname_spkt,
1772         .poll =         datagram_poll,
1773         .ioctl =        packet_ioctl,
1774         .listen =       sock_no_listen,
1775         .shutdown =     sock_no_shutdown,
1776         .setsockopt =   sock_no_setsockopt,
1777         .getsockopt =   sock_no_getsockopt,
1778         .sendmsg =      packet_sendmsg_spkt,
1779         .recvmsg =      packet_recvmsg,
1780         .mmap =         sock_no_mmap,
1781         .sendpage =     sock_no_sendpage,
1782 };
1783 #endif
1784
1785 static struct proto_ops packet_ops = {
1786         .family =       PF_PACKET,
1787         .owner =        THIS_MODULE,
1788         .release =      packet_release,
1789         .bind =         packet_bind,
1790         .connect =      sock_no_connect,
1791         .socketpair =   sock_no_socketpair,
1792         .accept =       sock_no_accept,
1793         .getname =      packet_getname, 
1794         .poll =         packet_poll,
1795         .ioctl =        packet_ioctl,
1796         .listen =       sock_no_listen,
1797         .shutdown =     sock_no_shutdown,
1798         .setsockopt =   packet_setsockopt,
1799         .getsockopt =   packet_getsockopt,
1800         .sendmsg =      packet_sendmsg,
1801         .recvmsg =      packet_recvmsg,
1802         .mmap =         packet_mmap,
1803         .sendpage =     sock_no_sendpage,
1804 };
1805
1806 #if defined(CONFIG_VNET) || defined(CONFIG_VNET_MODULE)
1807 EXPORT_SYMBOL(packet_ops);
1808 struct net_proto_family packet_family_ops;
1809 EXPORT_SYMBOL(packet_family_ops);
1810 #else
1811 static
1812 #endif
1813 struct net_proto_family packet_family_ops = {
1814         .family =       PF_PACKET,
1815         .create =       packet_create,
1816         .owner  =       THIS_MODULE,
1817 };
1818
1819 static struct notifier_block packet_netdev_notifier = {
1820         .notifier_call =packet_notifier,
1821 };
1822
1823 #ifdef CONFIG_PROC_FS
1824 static inline struct sock *packet_seq_idx(loff_t off)
1825 {
1826         struct sock *s;
1827         struct hlist_node *node;
1828
1829         sk_for_each(s, node, &packet_sklist) {
1830                 if (!off--)
1831                         return s;
1832         }
1833         return NULL;
1834 }
1835
1836 static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
1837 {
1838         read_lock(&packet_sklist_lock);
1839         return *pos ? packet_seq_idx(*pos - 1) : SEQ_START_TOKEN;
1840 }
1841
1842 static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1843 {
1844         ++*pos;
1845         return  (v == SEQ_START_TOKEN) 
1846                 ? sk_head(&packet_sklist) 
1847                 : sk_next((struct sock*)v) ;
1848 }
1849
1850 static void packet_seq_stop(struct seq_file *seq, void *v)
1851 {
1852         read_unlock(&packet_sklist_lock);               
1853 }
1854
1855 static int packet_seq_show(struct seq_file *seq, void *v) 
1856 {
1857         if (v == SEQ_START_TOKEN)
1858                 seq_puts(seq, "sk       RefCnt Type Proto  Iface R Rmem   User   Inode\n");
1859         else {
1860                 struct sock *s = v;
1861                 const struct packet_sock *po = pkt_sk(s);
1862
1863                 seq_printf(seq,
1864                            "%p %-6d %-4d %04x   %-5d %1d %-6u %-6u %-6lu\n",
1865                            s,
1866                            atomic_read(&s->sk_refcnt),
1867                            s->sk_type,
1868                            ntohs(po->num),
1869                            po->ifindex,
1870                            po->running,
1871                            atomic_read(&s->sk_rmem_alloc),
1872                            sock_i_uid(s),
1873                            sock_i_ino(s) );
1874         }
1875
1876         return 0;
1877 }
1878
1879 static struct seq_operations packet_seq_ops = {
1880         .start  = packet_seq_start,
1881         .next   = packet_seq_next,
1882         .stop   = packet_seq_stop,
1883         .show   = packet_seq_show,
1884 };
1885
1886 static int packet_seq_open(struct inode *inode, struct file *file)
1887 {
1888         return seq_open(file, &packet_seq_ops);
1889 }
1890
1891 static struct file_operations packet_seq_fops = {
1892         .owner          = THIS_MODULE,
1893         .open           = packet_seq_open,
1894         .read           = seq_read,
1895         .llseek         = seq_lseek,
1896         .release        = seq_release,
1897 };
1898
1899 #endif
1900
1901 static void __exit packet_exit(void)
1902 {
1903         proc_net_remove("packet");
1904         unregister_netdevice_notifier(&packet_netdev_notifier);
1905         sock_unregister(PF_PACKET);
1906         proto_unregister(&packet_proto);
1907 }
1908
1909 static int __init packet_init(void)
1910 {
1911         int rc = proto_register(&packet_proto, 0);
1912
1913         if (rc != 0)
1914                 goto out;
1915
1916         sock_register(&packet_family_ops);
1917         register_netdevice_notifier(&packet_netdev_notifier);
1918         proc_net_fops_create("packet", 0, &packet_seq_fops);
1919 out:
1920         return rc;
1921 }
1922
1923 module_init(packet_init);
1924 module_exit(packet_exit);
1925 MODULE_LICENSE("GPL");
1926 MODULE_ALIAS_NETPROTO(PF_PACKET);