429cb4b96edbd05f380cf1076f160a5db6462bae
[linux-2.6.git] / net / packet / af_packet.c
1 /*
2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
3  *              operating system.  INET is implemented using the  BSD Socket
4  *              interface as the means of communication with the user level.
5  *
6  *              PACKET - implements raw packet sockets.
7  *
8  * Version:     $Id: af_packet.c,v 1.61 2002/02/08 03:57:19 davem Exp $
9  *
10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
13  *
14  * Fixes:       
15  *              Alan Cox        :       verify_area() now used correctly
16  *              Alan Cox        :       new skbuff lists, look ma no backlogs!
17  *              Alan Cox        :       tidied skbuff lists.
18  *              Alan Cox        :       Now uses generic datagram routines I
19  *                                      added. Also fixed the peek/read crash
20  *                                      from all old Linux datagram code.
21  *              Alan Cox        :       Uses the improved datagram code.
22  *              Alan Cox        :       Added NULL's for socket options.
23  *              Alan Cox        :       Re-commented the code.
24  *              Alan Cox        :       Use new kernel side addressing
25  *              Rob Janssen     :       Correct MTU usage.
26  *              Dave Platt      :       Counter leaks caused by incorrect
27  *                                      interrupt locking and some slightly
28  *                                      dubious gcc output. Can you read
29  *                                      compiler: it said _VOLATILE_
30  *      Richard Kooijman        :       Timestamp fixes.
31  *              Alan Cox        :       New buffers. Use sk->mac.raw.
32  *              Alan Cox        :       sendmsg/recvmsg support.
33  *              Alan Cox        :       Protocol setting support
34  *      Alexey Kuznetsov        :       Untied from IPv4 stack.
35  *      Cyrus Durgin            :       Fixed kerneld for kmod.
36  *      Michal Ostrowski        :       Module initialization cleanup.
37  *         Ulises Alonso        :       Frame number limit removal and 
38  *                                      packet_set_ring memory leak.
39  *
40  *              This program is free software; you can redistribute it and/or
41  *              modify it under the terms of the GNU General Public License
42  *              as published by the Free Software Foundation; either version
43  *              2 of the License, or (at your option) any later version.
44  *
45  */
46  
47 #include <linux/config.h>
48 #include <linux/types.h>
49 #include <linux/sched.h>
50 #include <linux/mm.h>
51 #include <linux/fcntl.h>
52 #include <linux/socket.h>
53 #include <linux/in.h>
54 #include <linux/inet.h>
55 #include <linux/netdevice.h>
56 #include <linux/if_packet.h>
57 #include <linux/wireless.h>
58 #include <linux/kmod.h>
59 #include <net/ip.h>
60 #include <net/protocol.h>
61 #include <linux/skbuff.h>
62 #include <net/sock.h>
63 #include <linux/errno.h>
64 #include <linux/timer.h>
65 #include <asm/system.h>
66 #include <asm/uaccess.h>
67 #include <asm/ioctls.h>
68 #include <linux/proc_fs.h>
69 #include <linux/seq_file.h>
70 #include <linux/poll.h>
71 #include <linux/module.h>
72 #include <linux/init.h>
73 #include <linux/vs_base.h>
74 #include <linux/vs_context.h>
75 #include <linux/vs_network.h>
76
77 #ifdef CONFIG_INET
78 #include <net/inet_common.h>
79 #endif
80
81 #define CONFIG_SOCK_PACKET      1
82
83 /*
84    Proposed replacement for SIOC{ADD,DEL}MULTI and
85    IFF_PROMISC, IFF_ALLMULTI flags.
86
87    It is more expensive, but I believe,
88    it is really correct solution: reentereble, safe and fault tolerant.
89
90    IFF_PROMISC/IFF_ALLMULTI/SIOC{ADD/DEL}MULTI are faked by keeping
91    reference count and global flag, so that real status is
92    (gflag|(count != 0)), so that we can use obsolete faulty interface
93    not harming clever users.
94  */
95 #define CONFIG_PACKET_MULTICAST 1
96
97 /*
98    Assumptions:
99    - if device has no dev->hard_header routine, it adds and removes ll header
100      inside itself. In this case ll header is invisible outside of device,
101      but higher levels still should reserve dev->hard_header_len.
102      Some devices are enough clever to reallocate skb, when header
103      will not fit to reserved space (tunnel), another ones are silly
104      (PPP).
105    - packet socket receives packets with pulled ll header,
106      so that SOCK_RAW should push it back.
107
108 On receive:
109 -----------
110
111 Incoming, dev->hard_header!=NULL
112    mac.raw -> ll header
113    data    -> data
114
115 Outgoing, dev->hard_header!=NULL
116    mac.raw -> ll header
117    data    -> ll header
118
119 Incoming, dev->hard_header==NULL
120    mac.raw -> UNKNOWN position. It is very likely, that it points to ll header.
121               PPP makes it, that is wrong, because introduce assymetry
122               between rx and tx paths.
123    data    -> data
124
125 Outgoing, dev->hard_header==NULL
126    mac.raw -> data. ll header is still not built!
127    data    -> data
128
129 Resume
130   If dev->hard_header==NULL we are unlikely to restore sensible ll header.
131
132
133 On transmit:
134 ------------
135
136 dev->hard_header != NULL
137    mac.raw -> ll header
138    data    -> ll header
139
140 dev->hard_header == NULL (ll header is added by device, we cannot control it)
141    mac.raw -> data
142    data -> data
143
144    We should set nh.raw on output to correct posistion,
145    packet classifier depends on it.
146  */
147
148 /* List of all packet sockets. */
149 HLIST_HEAD(packet_sklist);
150 static rwlock_t packet_sklist_lock = RW_LOCK_UNLOCKED;
151
152 atomic_t packet_socks_nr;
153
154
155 /* Private packet socket structures. */
156
157 #ifdef CONFIG_PACKET_MULTICAST
158 struct packet_mclist
159 {
160         struct packet_mclist    *next;
161         int                     ifindex;
162         int                     count;
163         unsigned short          type;
164         unsigned short          alen;
165         unsigned char           addr[8];
166 };
167 #endif
168 #ifdef CONFIG_PACKET_MMAP
169 static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing);
170 #endif
171
172 static void packet_flush_mclist(struct sock *sk);
173
174 struct packet_opt
175 {
176         struct tpacket_stats    stats;
177 #ifdef CONFIG_PACKET_MMAP
178         unsigned long           *pg_vec;
179         unsigned int            head;
180         unsigned int            frames_per_block;
181         unsigned int            frame_size;
182         unsigned int            frame_max;
183         int                     copy_thresh;
184 #endif
185         struct packet_type      prot_hook;
186         spinlock_t              bind_lock;
187         char                    running;        /* prot_hook is attached*/
188         int                     ifindex;        /* bound device         */
189         unsigned short          num;
190 #ifdef CONFIG_PACKET_MULTICAST
191         struct packet_mclist    *mclist;
192 #endif
193 #ifdef CONFIG_PACKET_MMAP
194         atomic_t                mapped;
195         unsigned int            pg_vec_order;
196         unsigned int            pg_vec_pages;
197         unsigned int            pg_vec_len;
198 #endif
199 };
200
201 #ifdef CONFIG_PACKET_MMAP
202
203 static inline unsigned long packet_lookup_frame(struct packet_opt *po, unsigned int position)
204 {
205         unsigned int pg_vec_pos, frame_offset;
206         unsigned long frame;
207
208         pg_vec_pos = position / po->frames_per_block;
209         frame_offset = position % po->frames_per_block;
210
211         frame = (unsigned long) (po->pg_vec[pg_vec_pos] + (frame_offset * po->frame_size));
212         
213         return frame;
214 }
215 #endif
216
217 #define pkt_sk(__sk) ((struct packet_opt *)(__sk)->sk_protinfo)
218
219 void packet_sock_destruct(struct sock *sk)
220 {
221         BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
222         BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
223         BUG_ON(sk->sk_nx_info);
224         BUG_ON(sk->sk_vx_info);
225
226         if (!sock_flag(sk, SOCK_DEAD)) {
227                 printk("Attempt to release alive packet socket: %p\n", sk);
228                 return;
229         }
230
231         if (pkt_sk(sk))
232                 kfree(pkt_sk(sk));
233         atomic_dec(&packet_socks_nr);
234 #ifdef PACKET_REFCNT_DEBUG
235         printk(KERN_DEBUG "PACKET socket %p is free, %d are alive\n", sk, atomic_read(&packet_socks_nr));
236 #endif
237 }
238
239
240 extern struct proto_ops packet_ops;
241
242 #ifdef CONFIG_SOCK_PACKET
243 extern struct proto_ops packet_ops_spkt;
244
245 static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt)
246 {
247         struct sock *sk;
248         struct sockaddr_pkt *spkt;
249
250         /*
251          *      When we registered the protocol we saved the socket in the data
252          *      field for just this event.
253          */
254
255         sk = pt->af_packet_priv;
256         
257         /*
258          *      Yank back the headers [hope the device set this
259          *      right or kerboom...]
260          *
261          *      Incoming packets have ll header pulled,
262          *      push it back.
263          *
264          *      For outgoing ones skb->data == skb->mac.raw
265          *      so that this procedure is noop.
266          */
267
268         if (skb->pkt_type == PACKET_LOOPBACK)
269                 goto out;
270
271         if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
272                 goto oom;
273
274         /* drop any routing info */
275         dst_release(skb->dst);
276         skb->dst = NULL;
277
278         spkt = (struct sockaddr_pkt*)skb->cb;
279
280         skb_push(skb, skb->data-skb->mac.raw);
281
282         /*
283          *      The SOCK_PACKET socket receives _all_ frames.
284          */
285
286         spkt->spkt_family = dev->type;
287         strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
288         spkt->spkt_protocol = skb->protocol;
289
290         /*
291          *      Charge the memory to the socket. This is done specifically
292          *      to prevent sockets using all the memory up.
293          */
294
295         if (sock_queue_rcv_skb(sk,skb) == 0)
296                 return 0;
297
298 out:
299         kfree_skb(skb);
300 oom:
301         return 0;
302 }
303
304
305 /*
306  *      Output a raw packet to a device layer. This bypasses all the other
307  *      protocol layers and you must therefore supply it with a complete frame
308  */
309  
310 static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
311                                struct msghdr *msg, size_t len)
312 {
313         struct sock *sk = sock->sk;
314         struct sockaddr_pkt *saddr=(struct sockaddr_pkt *)msg->msg_name;
315         struct sk_buff *skb;
316         struct net_device *dev;
317         unsigned short proto=0;
318         int err;
319         
320         /*
321          *      Get and verify the address. 
322          */
323
324         if (saddr)
325         {
326                 if (msg->msg_namelen < sizeof(struct sockaddr))
327                         return(-EINVAL);
328                 if (msg->msg_namelen==sizeof(struct sockaddr_pkt))
329                         proto=saddr->spkt_protocol;
330         }
331         else
332                 return(-ENOTCONN);      /* SOCK_PACKET must be sent giving an address */
333
334         /*
335          *      Find the device first to size check it 
336          */
337
338         saddr->spkt_device[13] = 0;
339         dev = dev_get_by_name(saddr->spkt_device);
340         err = -ENODEV;
341         if (dev == NULL)
342                 goto out_unlock;
343         
344         /*
345          *      You may not queue a frame bigger than the mtu. This is the lowest level
346          *      raw protocol and you must do your own fragmentation at this level.
347          */
348          
349         err = -EMSGSIZE;
350         if(len>dev->mtu+dev->hard_header_len)
351                 goto out_unlock;
352
353         err = -ENOBUFS;
354         skb = sock_wmalloc(sk, len + LL_RESERVED_SPACE(dev), 0, GFP_KERNEL);
355
356         /*
357          *      If the write buffer is full, then tough. At this level the user gets to
358          *      deal with the problem - do your own algorithmic backoffs. That's far
359          *      more flexible.
360          */
361          
362         if (skb == NULL) 
363                 goto out_unlock;
364
365         /*
366          *      Fill it in 
367          */
368          
369         /* FIXME: Save some space for broken drivers that write a
370          * hard header at transmission time by themselves. PPP is the
371          * notable one here. This should really be fixed at the driver level.
372          */
373         skb_reserve(skb, LL_RESERVED_SPACE(dev));
374         skb->nh.raw = skb->data;
375
376         /* Try to align data part correctly */
377         if (dev->hard_header) {
378                 skb->data -= dev->hard_header_len;
379                 skb->tail -= dev->hard_header_len;
380                 if (len < dev->hard_header_len)
381                         skb->nh.raw = skb->data;
382         }
383
384         /* Returns -EFAULT on error */
385         err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
386         skb->protocol = proto;
387         skb->dev = dev;
388         skb->priority = sk->sk_priority;
389         if (err)
390                 goto out_free;
391
392         err = -ENETDOWN;
393         if (!(dev->flags & IFF_UP))
394                 goto out_free;
395
396         /*
397          *      Now send it
398          */
399
400         dev_queue_xmit(skb);
401         dev_put(dev);
402         return(len);
403
404 out_free:
405         kfree_skb(skb);
406 out_unlock:
407         if (dev)
408                 dev_put(dev);
409         return err;
410 }
411 #endif
412
413 static inline unsigned run_filter(struct sk_buff *skb, struct sock *sk, unsigned res)
414 {
415         struct sk_filter *filter;
416
417         bh_lock_sock(sk);
418         filter = sk->sk_filter;
419         /*
420          * Our caller already checked that filter != NULL but we need to
421          * verify that under bh_lock_sock() to be safe
422          */
423         if (likely(filter != NULL))
424                 res = sk_run_filter(skb, filter->insns, filter->len);
425         bh_unlock_sock(sk);
426
427         return res;
428 }
429
430 /*
431    This function makes lazy skb cloning in hope that most of packets
432    are discarded by BPF.
433
434    Note tricky part: we DO mangle shared skb! skb->data, skb->len
435    and skb->cb are mangled. It works because (and until) packets
436    falling here are owned by current CPU. Output packets are cloned
437    by dev_queue_xmit_nit(), input packets are processed by net_bh
438    sequencially, so that if we return skb to original state on exit,
439    we will not harm anyone.
440  */
441
442 static int packet_rcv(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt)
443 {
444         struct sock *sk;
445         struct sockaddr_ll *sll;
446         struct packet_opt *po;
447         u8 * skb_head = skb->data;
448         int skb_len = skb->len;
449         unsigned snaplen;
450
451         if (skb->pkt_type == PACKET_LOOPBACK)
452                 goto drop;
453
454         sk = pt->af_packet_priv;
455         po = pkt_sk(sk);
456
457         if (sk->sk_xid && sk->sk_xid != skb->xid)
458                 goto drop;
459
460         skb->dev = dev;
461
462         if (dev->hard_header) {
463                 /* The device has an explicit notion of ll header,
464                    exported to higher levels.
465
466                    Otherwise, the device hides datails of it frame
467                    structure, so that corresponding packet head
468                    never delivered to user.
469                  */
470                 if (sk->sk_type != SOCK_DGRAM)
471                         skb_push(skb, skb->data - skb->mac.raw);
472                 else if (skb->pkt_type == PACKET_OUTGOING) {
473                         /* Special case: outgoing packets have ll header at head */
474                         skb_pull(skb, skb->nh.raw - skb->data);
475                 }
476         }
477
478         snaplen = skb->len;
479
480         if (sk->sk_filter) {
481                 unsigned res = run_filter(skb, sk, snaplen);
482                 if (res == 0)
483                         goto drop_n_restore;
484                 if (snaplen > res)
485                         snaplen = res;
486         }
487
488         if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
489             (unsigned)sk->sk_rcvbuf)
490                 goto drop_n_acct;
491
492         if (skb_shared(skb)) {
493                 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
494                 if (nskb == NULL)
495                         goto drop_n_acct;
496
497                 if (skb_head != skb->data) {
498                         skb->data = skb_head;
499                         skb->len = skb_len;
500                 }
501                 kfree_skb(skb);
502                 skb = nskb;
503         }
504
505         sll = (struct sockaddr_ll*)skb->cb;
506         sll->sll_family = AF_PACKET;
507         sll->sll_hatype = dev->type;
508         sll->sll_protocol = skb->protocol;
509         sll->sll_pkttype = skb->pkt_type;
510         sll->sll_ifindex = dev->ifindex;
511         sll->sll_halen = 0;
512
513         if (dev->hard_header_parse)
514                 sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
515
516         if (pskb_trim(skb, snaplen))
517                 goto drop_n_acct;
518
519         skb_set_owner_r(skb, sk);
520         skb->dev = NULL;
521         dst_release(skb->dst);
522         skb->dst = NULL;
523
524         spin_lock(&sk->sk_receive_queue.lock);
525         po->stats.tp_packets++;
526         __skb_queue_tail(&sk->sk_receive_queue, skb);
527         spin_unlock(&sk->sk_receive_queue.lock);
528         sk->sk_data_ready(sk, skb->len);
529         return 0;
530
531 drop_n_acct:
532         spin_lock(&sk->sk_receive_queue.lock);
533         po->stats.tp_drops++;
534         spin_unlock(&sk->sk_receive_queue.lock);
535
536 drop_n_restore:
537         if (skb_head != skb->data && skb_shared(skb)) {
538                 skb->data = skb_head;
539                 skb->len = skb_len;
540         }
541 drop:
542         kfree_skb(skb);
543         return 0;
544 }
545
546 #ifdef CONFIG_PACKET_MMAP
547 static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt)
548 {
549         struct sock *sk;
550         struct packet_opt *po;
551         struct sockaddr_ll *sll;
552         struct tpacket_hdr *h;
553         u8 * skb_head = skb->data;
554         int skb_len = skb->len;
555         unsigned snaplen;
556         unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
557         unsigned short macoff, netoff;
558         struct sk_buff *copy_skb = NULL;
559
560         if (skb->pkt_type == PACKET_LOOPBACK)
561                 goto drop;
562
563         sk = pt->af_packet_priv;
564         po = pkt_sk(sk);
565
566         if (dev->hard_header) {
567                 if (sk->sk_type != SOCK_DGRAM)
568                         skb_push(skb, skb->data - skb->mac.raw);
569                 else if (skb->pkt_type == PACKET_OUTGOING) {
570                         /* Special case: outgoing packets have ll header at head */
571                         skb_pull(skb, skb->nh.raw - skb->data);
572                         if (skb->ip_summed == CHECKSUM_HW)
573                                 status |= TP_STATUS_CSUMNOTREADY;
574                 }
575         }
576
577         snaplen = skb->len;
578
579         if (sk->sk_filter) {
580                 unsigned res = run_filter(skb, sk, snaplen);
581                 if (res == 0)
582                         goto drop_n_restore;
583                 if (snaplen > res)
584                         snaplen = res;
585         }
586
587         if (sk->sk_type == SOCK_DGRAM) {
588                 macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
589         } else {
590                 unsigned maclen = skb->nh.raw - skb->data;
591                 netoff = TPACKET_ALIGN(TPACKET_HDRLEN + (maclen < 16 ? 16 : maclen));
592                 macoff = netoff - maclen;
593         }
594
595         if (macoff + snaplen > po->frame_size) {
596                 if (po->copy_thresh &&
597                     atomic_read(&sk->sk_rmem_alloc) + skb->truesize <
598                     (unsigned)sk->sk_rcvbuf) {
599                         if (skb_shared(skb)) {
600                                 copy_skb = skb_clone(skb, GFP_ATOMIC);
601                         } else {
602                                 copy_skb = skb_get(skb);
603                                 skb_head = skb->data;
604                         }
605                         if (copy_skb)
606                                 skb_set_owner_r(copy_skb, sk);
607                 }
608                 snaplen = po->frame_size - macoff;
609                 if ((int)snaplen < 0)
610                         snaplen = 0;
611         }
612         if (snaplen > skb->len-skb->data_len)
613                 snaplen = skb->len-skb->data_len;
614
615         spin_lock(&sk->sk_receive_queue.lock);
616         h = (struct tpacket_hdr *)packet_lookup_frame(po, po->head);
617         
618         if (h->tp_status)
619                 goto ring_is_full;
620         po->head = po->head != po->frame_max ? po->head+1 : 0;
621         po->stats.tp_packets++;
622         if (copy_skb) {
623                 status |= TP_STATUS_COPY;
624                 __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
625         }
626         if (!po->stats.tp_drops)
627                 status &= ~TP_STATUS_LOSING;
628         spin_unlock(&sk->sk_receive_queue.lock);
629
630         memcpy((u8*)h + macoff, skb->data, snaplen);
631
632         h->tp_len = skb->len;
633         h->tp_snaplen = snaplen;
634         h->tp_mac = macoff;
635         h->tp_net = netoff;
636         if (skb->stamp.tv_sec == 0) { 
637                 do_gettimeofday(&skb->stamp);
638                 sock_enable_timestamp(sk);
639         }
640         h->tp_sec = skb->stamp.tv_sec;
641         h->tp_usec = skb->stamp.tv_usec;
642
643         sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h)));
644         sll->sll_halen = 0;
645         if (dev->hard_header_parse)
646                 sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
647         sll->sll_family = AF_PACKET;
648         sll->sll_hatype = dev->type;
649         sll->sll_protocol = skb->protocol;
650         sll->sll_pkttype = skb->pkt_type;
651         sll->sll_ifindex = dev->ifindex;
652
653         h->tp_status = status;
654         mb();
655
656         {
657                 struct page *p_start, *p_end;
658                 u8 *h_end = (u8 *)h + macoff + snaplen - 1;
659
660                 p_start = virt_to_page(h);
661                 p_end = virt_to_page(h_end);
662                 while (p_start <= p_end) {
663                         flush_dcache_page(p_start);
664                         p_start++;
665                 }
666         }
667
668         sk->sk_data_ready(sk, 0);
669
670 drop_n_restore:
671         if (skb_head != skb->data && skb_shared(skb)) {
672                 skb->data = skb_head;
673                 skb->len = skb_len;
674         }
675 drop:
676         kfree_skb(skb);
677         return 0;
678
679 ring_is_full:
680         po->stats.tp_drops++;
681         spin_unlock(&sk->sk_receive_queue.lock);
682
683         sk->sk_data_ready(sk, 0);
684         if (copy_skb)
685                 kfree_skb(copy_skb);
686         goto drop_n_restore;
687 }
688
689 #endif
690
691
692 static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
693                           struct msghdr *msg, size_t len)
694 {
695         struct sock *sk = sock->sk;
696         struct sockaddr_ll *saddr=(struct sockaddr_ll *)msg->msg_name;
697         struct sk_buff *skb;
698         struct net_device *dev;
699         unsigned short proto;
700         unsigned char *addr;
701         int ifindex, err, reserve = 0;
702
703         /*
704          *      Get and verify the address. 
705          */
706          
707         if (saddr == NULL) {
708                 struct packet_opt *po = pkt_sk(sk);
709
710                 ifindex = po->ifindex;
711                 proto   = po->num;
712                 addr    = NULL;
713         } else {
714                 err = -EINVAL;
715                 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
716                         goto out;
717                 ifindex = saddr->sll_ifindex;
718                 proto   = saddr->sll_protocol;
719                 addr    = saddr->sll_addr;
720         }
721
722
723         dev = dev_get_by_index(ifindex);
724         err = -ENXIO;
725         if (dev == NULL)
726                 goto out_unlock;
727         if (sock->type == SOCK_RAW)
728                 reserve = dev->hard_header_len;
729
730         err = -EMSGSIZE;
731         if (len > dev->mtu+reserve)
732                 goto out_unlock;
733
734         skb = sock_alloc_send_skb(sk, len + LL_RESERVED_SPACE(dev),
735                                 msg->msg_flags & MSG_DONTWAIT, &err);
736         if (skb==NULL)
737                 goto out_unlock;
738
739         skb_reserve(skb, LL_RESERVED_SPACE(dev));
740         skb->nh.raw = skb->data;
741
742         if (dev->hard_header) {
743                 int res;
744                 err = -EINVAL;
745                 res = dev->hard_header(skb, dev, ntohs(proto), addr, NULL, len);
746                 if (sock->type != SOCK_DGRAM) {
747                         skb->tail = skb->data;
748                         skb->len = 0;
749                 } else if (res < 0)
750                         goto out_free;
751         }
752
753         /* Returns -EFAULT on error */
754         err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
755         if (err)
756                 goto out_free;
757
758         skb->protocol = proto;
759         skb->dev = dev;
760         skb->priority = sk->sk_priority;
761
762         err = -ENETDOWN;
763         if (!(dev->flags & IFF_UP))
764                 goto out_free;
765
766         /*
767          *      Now send it
768          */
769
770         err = dev_queue_xmit(skb);
771         if (err > 0 && (err = net_xmit_errno(err)) != 0)
772                 goto out_unlock;
773
774         dev_put(dev);
775
776         return(len);
777
778 out_free:
779         kfree_skb(skb);
780 out_unlock:
781         if (dev)
782                 dev_put(dev);
783 out:
784         return err;
785 }
786
787 /*
788  *      Close a PACKET socket. This is fairly simple. We immediately go
789  *      to 'closed' state and remove our protocol entry in the device list.
790  */
791
792 static int packet_release(struct socket *sock)
793 {
794         struct sock *sk = sock->sk;
795         struct packet_opt *po = pkt_sk(sk);
796
797         if (!sk)
798                 return 0;
799
800         write_lock_bh(&packet_sklist_lock);
801         sk_del_node_init(sk);
802         write_unlock_bh(&packet_sklist_lock);
803
804         /*
805          *      Unhook packet receive handler.
806          */
807
808         if (po->running) {
809                 /*
810                  *      Remove the protocol hook
811                  */
812                 dev_remove_pack(&po->prot_hook);
813                 po->running = 0;
814                 po->num = 0;
815                 __sock_put(sk);
816         }
817
818 #ifdef CONFIG_PACKET_MULTICAST
819         packet_flush_mclist(sk);
820 #endif
821
822 #ifdef CONFIG_PACKET_MMAP
823         if (po->pg_vec) {
824                 struct tpacket_req req;
825                 memset(&req, 0, sizeof(req));
826                 packet_set_ring(sk, &req, 1);
827         }
828 #endif
829
830         clr_vx_info(&sk->sk_vx_info);
831         clr_nx_info(&sk->sk_nx_info);
832
833         /*
834          *      Now the socket is dead. No more input will appear.
835          */
836
837         sock_orphan(sk);
838         sock->sk = NULL;
839
840         /* Purge queues */
841
842         skb_queue_purge(&sk->sk_receive_queue);
843
844         sock_put(sk);
845         return 0;
846 }
847
848 /*
849  *      Attach a packet hook.
850  */
851
852 static int packet_do_bind(struct sock *sk, struct net_device *dev, int protocol)
853 {
854         struct packet_opt *po = pkt_sk(sk);
855         /*
856          *      Detach an existing hook if present.
857          */
858
859         lock_sock(sk);
860
861         spin_lock(&po->bind_lock);
862         if (po->running) {
863                 __sock_put(sk);
864                 po->running = 0;
865                 po->num = 0;
866                 spin_unlock(&po->bind_lock);
867                 dev_remove_pack(&po->prot_hook);
868                 spin_lock(&po->bind_lock);
869         }
870
871         po->num = protocol;
872         po->prot_hook.type = protocol;
873         po->prot_hook.dev = dev;
874
875         po->ifindex = dev ? dev->ifindex : 0;
876
877         if (protocol == 0)
878                 goto out_unlock;
879
880         if (dev) {
881                 if (dev->flags&IFF_UP) {
882                         dev_add_pack(&po->prot_hook);
883                         sock_hold(sk);
884                         po->running = 1;
885                 } else {
886                         sk->sk_err = ENETDOWN;
887                         if (!sock_flag(sk, SOCK_DEAD))
888                                 sk->sk_error_report(sk);
889                 }
890         } else {
891                 dev_add_pack(&po->prot_hook);
892                 sock_hold(sk);
893                 po->running = 1;
894         }
895
896 out_unlock:
897         spin_unlock(&po->bind_lock);
898         release_sock(sk);
899         return 0;
900 }
901
902 /*
903  *      Bind a packet socket to a device
904  */
905
906 #ifdef CONFIG_SOCK_PACKET
907
908 static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len)
909 {
910         struct sock *sk=sock->sk;
911         char name[15];
912         struct net_device *dev;
913         int err = -ENODEV;
914         
915         /*
916          *      Check legality
917          */
918          
919         if(addr_len!=sizeof(struct sockaddr))
920                 return -EINVAL;
921         strlcpy(name,uaddr->sa_data,sizeof(name));
922
923         dev = dev_get_by_name(name);
924         if (dev) {
925                 err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
926                 dev_put(dev);
927         }
928         return err;
929 }
930 #endif
931
932 static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
933 {
934         struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
935         struct sock *sk=sock->sk;
936         struct net_device *dev = NULL;
937         int err;
938
939
940         /*
941          *      Check legality
942          */
943          
944         if (addr_len < sizeof(struct sockaddr_ll))
945                 return -EINVAL;
946         if (sll->sll_family != AF_PACKET)
947                 return -EINVAL;
948
949         if (sll->sll_ifindex) {
950                 err = -ENODEV;
951                 dev = dev_get_by_index(sll->sll_ifindex);
952                 if (dev == NULL)
953                         goto out;
954         }
955         err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
956         if (dev)
957                 dev_put(dev);
958
959 out:
960         return err;
961 }
962
963
964 /*
965  *      Create a packet of type SOCK_PACKET. 
966  */
967
968 static int packet_create(struct socket *sock, int protocol)
969 {
970         struct sock *sk;
971         struct packet_opt *po;
972         int err;
973
974         if (!capable(CAP_NET_RAW))
975                 return -EPERM;
976         if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW
977 #ifdef CONFIG_SOCK_PACKET
978             && sock->type != SOCK_PACKET
979 #endif
980             )
981                 return -ESOCKTNOSUPPORT;
982
983         sock->state = SS_UNCONNECTED;
984
985         err = -ENOBUFS;
986         sk = sk_alloc(PF_PACKET, GFP_KERNEL, 1, NULL);
987         if (sk == NULL)
988                 goto out;
989
990         sock->ops = &packet_ops;
991 #ifdef CONFIG_SOCK_PACKET
992         if (sock->type == SOCK_PACKET)
993                 sock->ops = &packet_ops_spkt;
994 #endif
995         sock_init_data(sock,sk);
996         sk_set_owner(sk, THIS_MODULE);
997
998         po = sk->sk_protinfo = kmalloc(sizeof(*po), GFP_KERNEL);
999         if (!po)
1000                 goto out_free;
1001         memset(po, 0, sizeof(*po));
1002         sk->sk_family = PF_PACKET;
1003         po->num = protocol;
1004
1005         sk->sk_destruct = packet_sock_destruct;
1006         atomic_inc(&packet_socks_nr);
1007
1008         set_vx_info(&sk->sk_vx_info, current->vx_info);
1009         sk->sk_xid = vx_current_xid();
1010         set_nx_info(&sk->sk_nx_info, current->nx_info);
1011         sk->sk_nid = nx_current_nid();
1012
1013         /*
1014          *      Attach a protocol block
1015          */
1016
1017         spin_lock_init(&po->bind_lock);
1018         po->prot_hook.func = packet_rcv;
1019 #ifdef CONFIG_SOCK_PACKET
1020         if (sock->type == SOCK_PACKET)
1021                 po->prot_hook.func = packet_rcv_spkt;
1022 #endif
1023         po->prot_hook.af_packet_priv = sk;
1024
1025         if (protocol) {
1026                 po->prot_hook.type = protocol;
1027                 dev_add_pack(&po->prot_hook);
1028                 sock_hold(sk);
1029                 po->running = 1;
1030         }
1031
1032         write_lock_bh(&packet_sklist_lock);
1033         sk_add_node(sk, &packet_sklist);
1034         write_unlock_bh(&packet_sklist_lock);
1035         return(0);
1036
1037 out_free:
1038         sk_free(sk);
1039 out:
1040         return err;
1041 }
1042
1043 /*
1044  *      Pull a packet from our receive queue and hand it to the user.
1045  *      If necessary we block.
1046  */
1047
1048 static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1049                           struct msghdr *msg, size_t len, int flags)
1050 {
1051         struct sock *sk = sock->sk;
1052         struct sk_buff *skb;
1053         int copied, err;
1054
1055         err = -EINVAL;
1056         if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
1057                 goto out;
1058
1059 #if 0
1060         /* What error should we return now? EUNATTACH? */
1061         if (pkt_sk(sk)->ifindex < 0)
1062                 return -ENODEV;
1063 #endif
1064
1065         /*
1066          *      If the address length field is there to be filled in, we fill
1067          *      it in now.
1068          */
1069
1070         if (sock->type == SOCK_PACKET)
1071                 msg->msg_namelen = sizeof(struct sockaddr_pkt);
1072         else
1073                 msg->msg_namelen = sizeof(struct sockaddr_ll);
1074
1075         /*
1076          *      Call the generic datagram receiver. This handles all sorts
1077          *      of horrible races and re-entrancy so we can forget about it
1078          *      in the protocol layers.
1079          *
1080          *      Now it will return ENETDOWN, if device have just gone down,
1081          *      but then it will block.
1082          */
1083
1084         skb=skb_recv_datagram(sk,flags,flags&MSG_DONTWAIT,&err);
1085
1086         /*
1087          *      An error occurred so return it. Because skb_recv_datagram() 
1088          *      handles the blocking we don't see and worry about blocking
1089          *      retries.
1090          */
1091
1092         if(skb==NULL)
1093                 goto out;
1094
1095         /*
1096          *      You lose any data beyond the buffer you gave. If it worries a
1097          *      user program they can ask the device for its MTU anyway.
1098          */
1099
1100         copied = skb->len;
1101         if (copied > len)
1102         {
1103                 copied=len;
1104                 msg->msg_flags|=MSG_TRUNC;
1105         }
1106
1107         err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1108         if (err)
1109                 goto out_free;
1110
1111         sock_recv_timestamp(msg, sk, skb);
1112
1113         if (msg->msg_name)
1114                 memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
1115
1116         /*
1117          *      Free or return the buffer as appropriate. Again this
1118          *      hides all the races and re-entrancy issues from us.
1119          */
1120         err = (flags&MSG_TRUNC) ? skb->len : copied;
1121
1122 out_free:
1123         skb_free_datagram(sk, skb);
1124 out:
1125         return err;
1126 }
1127
1128 #ifdef CONFIG_SOCK_PACKET
1129 static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1130                                int *uaddr_len, int peer)
1131 {
1132         struct net_device *dev;
1133         struct sock *sk = sock->sk;
1134
1135         if (peer)
1136                 return -EOPNOTSUPP;
1137
1138         uaddr->sa_family = AF_PACKET;
1139         dev = dev_get_by_index(pkt_sk(sk)->ifindex);
1140         if (dev) {
1141                 strlcpy(uaddr->sa_data, dev->name, 15);
1142                 dev_put(dev);
1143         } else
1144                 memset(uaddr->sa_data, 0, 14);
1145         *uaddr_len = sizeof(*uaddr);
1146
1147         return 0;
1148 }
1149 #endif
1150
1151 static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1152                           int *uaddr_len, int peer)
1153 {
1154         struct net_device *dev;
1155         struct sock *sk = sock->sk;
1156         struct packet_opt *po = pkt_sk(sk);
1157         struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
1158
1159         if (peer)
1160                 return -EOPNOTSUPP;
1161
1162         sll->sll_family = AF_PACKET;
1163         sll->sll_ifindex = po->ifindex;
1164         sll->sll_protocol = po->num;
1165         dev = dev_get_by_index(po->ifindex);
1166         if (dev) {
1167                 sll->sll_hatype = dev->type;
1168                 sll->sll_halen = dev->addr_len;
1169                 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
1170                 dev_put(dev);
1171         } else {
1172                 sll->sll_hatype = 0;    /* Bad: we have no ARPHRD_UNSPEC */
1173                 sll->sll_halen = 0;
1174         }
1175         *uaddr_len = sizeof(*sll);
1176
1177         return 0;
1178 }
1179
1180 #ifdef CONFIG_PACKET_MULTICAST
1181 static void packet_dev_mc(struct net_device *dev, struct packet_mclist *i, int what)
1182 {
1183         switch (i->type) {
1184         case PACKET_MR_MULTICAST:
1185                 if (what > 0)
1186                         dev_mc_add(dev, i->addr, i->alen, 0);
1187                 else
1188                         dev_mc_delete(dev, i->addr, i->alen, 0);
1189                 break;
1190         case PACKET_MR_PROMISC:
1191                 dev_set_promiscuity(dev, what);
1192                 break;
1193         case PACKET_MR_ALLMULTI:
1194                 dev_set_allmulti(dev, what);
1195                 break;
1196         default:;
1197         }
1198 }
1199
1200 static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1201 {
1202         for ( ; i; i=i->next) {
1203                 if (i->ifindex == dev->ifindex)
1204                         packet_dev_mc(dev, i, what);
1205         }
1206 }
1207
1208 static int packet_mc_add(struct sock *sk, struct packet_mreq *mreq)
1209 {
1210         struct packet_opt *po = pkt_sk(sk);
1211         struct packet_mclist *ml, *i;
1212         struct net_device *dev;
1213         int err;
1214
1215         rtnl_lock();
1216
1217         err = -ENODEV;
1218         dev = __dev_get_by_index(mreq->mr_ifindex);
1219         if (!dev)
1220                 goto done;
1221
1222         err = -EINVAL;
1223         if (mreq->mr_alen > dev->addr_len)
1224                 goto done;
1225
1226         err = -ENOBUFS;
1227         i = (struct packet_mclist *)kmalloc(sizeof(*i), GFP_KERNEL);
1228         if (i == NULL)
1229                 goto done;
1230
1231         err = 0;
1232         for (ml = po->mclist; ml; ml = ml->next) {
1233                 if (ml->ifindex == mreq->mr_ifindex &&
1234                     ml->type == mreq->mr_type &&
1235                     ml->alen == mreq->mr_alen &&
1236                     memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1237                         ml->count++;
1238                         /* Free the new element ... */
1239                         kfree(i);
1240                         goto done;
1241                 }
1242         }
1243
1244         i->type = mreq->mr_type;
1245         i->ifindex = mreq->mr_ifindex;
1246         i->alen = mreq->mr_alen;
1247         memcpy(i->addr, mreq->mr_address, i->alen);
1248         i->count = 1;
1249         i->next = po->mclist;
1250         po->mclist = i;
1251         packet_dev_mc(dev, i, +1);
1252
1253 done:
1254         rtnl_unlock();
1255         return err;
1256 }
1257
1258 static int packet_mc_drop(struct sock *sk, struct packet_mreq *mreq)
1259 {
1260         struct packet_mclist *ml, **mlp;
1261
1262         rtnl_lock();
1263
1264         for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
1265                 if (ml->ifindex == mreq->mr_ifindex &&
1266                     ml->type == mreq->mr_type &&
1267                     ml->alen == mreq->mr_alen &&
1268                     memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1269                         if (--ml->count == 0) {
1270                                 struct net_device *dev;
1271                                 *mlp = ml->next;
1272                                 dev = dev_get_by_index(ml->ifindex);
1273                                 if (dev) {
1274                                         packet_dev_mc(dev, ml, -1);
1275                                         dev_put(dev);
1276                                 }
1277                                 kfree(ml);
1278                         }
1279                         rtnl_unlock();
1280                         return 0;
1281                 }
1282         }
1283         rtnl_unlock();
1284         return -EADDRNOTAVAIL;
1285 }
1286
1287 static void packet_flush_mclist(struct sock *sk)
1288 {
1289         struct packet_opt *po = pkt_sk(sk);
1290         struct packet_mclist *ml;
1291
1292         if (!po->mclist)
1293                 return;
1294
1295         rtnl_lock();
1296         while ((ml = po->mclist) != NULL) {
1297                 struct net_device *dev;
1298
1299                 po->mclist = ml->next;
1300                 if ((dev = dev_get_by_index(ml->ifindex)) != NULL) {
1301                         packet_dev_mc(dev, ml, -1);
1302                         dev_put(dev);
1303                 }
1304                 kfree(ml);
1305         }
1306         rtnl_unlock();
1307 }
1308 #endif
1309
1310 static int
1311 packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen)
1312 {
1313         struct sock *sk = sock->sk;
1314         int ret;
1315
1316         if (level != SOL_PACKET)
1317                 return -ENOPROTOOPT;
1318
1319         switch(optname) {
1320 #ifdef CONFIG_PACKET_MULTICAST
1321         case PACKET_ADD_MEMBERSHIP:     
1322         case PACKET_DROP_MEMBERSHIP:
1323         {
1324                 struct packet_mreq mreq;
1325                 if (optlen<sizeof(mreq))
1326                         return -EINVAL;
1327                 if (copy_from_user(&mreq,optval,sizeof(mreq)))
1328                         return -EFAULT;
1329                 if (optname == PACKET_ADD_MEMBERSHIP)
1330                         ret = packet_mc_add(sk, &mreq);
1331                 else
1332                         ret = packet_mc_drop(sk, &mreq);
1333                 return ret;
1334         }
1335 #endif
1336 #ifdef CONFIG_PACKET_MMAP
1337         case PACKET_RX_RING:
1338         {
1339                 struct tpacket_req req;
1340
1341                 if (optlen<sizeof(req))
1342                         return -EINVAL;
1343                 if (copy_from_user(&req,optval,sizeof(req)))
1344                         return -EFAULT;
1345                 return packet_set_ring(sk, &req, 0);
1346         }
1347         case PACKET_COPY_THRESH:
1348         {
1349                 int val;
1350
1351                 if (optlen!=sizeof(val))
1352                         return -EINVAL;
1353                 if (copy_from_user(&val,optval,sizeof(val)))
1354                         return -EFAULT;
1355
1356                 pkt_sk(sk)->copy_thresh = val;
1357                 return 0;
1358         }
1359 #endif
1360         default:
1361                 return -ENOPROTOOPT;
1362         }
1363 }
1364
1365 int packet_getsockopt(struct socket *sock, int level, int optname,
1366                       char __user *optval, int __user *optlen)
1367 {
1368         int len;
1369         struct sock *sk = sock->sk;
1370         struct packet_opt *po = pkt_sk(sk);
1371
1372         if (level != SOL_PACKET)
1373                 return -ENOPROTOOPT;
1374
1375         if (get_user(len,optlen))
1376                 return -EFAULT;
1377
1378         if (len < 0)
1379                 return -EINVAL;
1380                 
1381         switch(optname) {
1382         case PACKET_STATISTICS:
1383         {
1384                 struct tpacket_stats st;
1385
1386                 if (len > sizeof(struct tpacket_stats))
1387                         len = sizeof(struct tpacket_stats);
1388                 spin_lock_bh(&sk->sk_receive_queue.lock);
1389                 st = po->stats;
1390                 memset(&po->stats, 0, sizeof(st));
1391                 spin_unlock_bh(&sk->sk_receive_queue.lock);
1392                 st.tp_packets += st.tp_drops;
1393
1394                 if (copy_to_user(optval, &st, len))
1395                         return -EFAULT;
1396                 break;
1397         }
1398         default:
1399                 return -ENOPROTOOPT;
1400         }
1401
1402         if (put_user(len, optlen))
1403                 return -EFAULT;
1404         return 0;
1405 }
1406
1407
1408 static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
1409 {
1410         struct sock *sk;
1411         struct hlist_node *node;
1412         struct net_device *dev = (struct net_device*)data;
1413
1414         read_lock(&packet_sklist_lock);
1415         sk_for_each(sk, node, &packet_sklist) {
1416                 struct packet_opt *po = pkt_sk(sk);
1417
1418                 switch (msg) {
1419                 case NETDEV_UNREGISTER:
1420 #ifdef CONFIG_PACKET_MULTICAST
1421                         if (po->mclist)
1422                                 packet_dev_mclist(dev, po->mclist, -1);
1423                         // fallthrough
1424 #endif
1425                 case NETDEV_DOWN:
1426                         if (dev->ifindex == po->ifindex) {
1427                                 spin_lock(&po->bind_lock);
1428                                 if (po->running) {
1429                                         __dev_remove_pack(&po->prot_hook);
1430                                         __sock_put(sk);
1431                                         po->running = 0;
1432                                         sk->sk_err = ENETDOWN;
1433                                         if (!sock_flag(sk, SOCK_DEAD))
1434                                                 sk->sk_error_report(sk);
1435                                 }
1436                                 if (msg == NETDEV_UNREGISTER) {
1437                                         po->ifindex = -1;
1438                                         po->prot_hook.dev = NULL;
1439                                 }
1440                                 spin_unlock(&po->bind_lock);
1441                         }
1442                         break;
1443                 case NETDEV_UP:
1444                         spin_lock(&po->bind_lock);
1445                         if (dev->ifindex == po->ifindex && po->num &&
1446                             !po->running) {
1447                                 dev_add_pack(&po->prot_hook);
1448                                 sock_hold(sk);
1449                                 po->running = 1;
1450                         }
1451                         spin_unlock(&po->bind_lock);
1452                         break;
1453                 }
1454         }
1455         read_unlock(&packet_sklist_lock);
1456         return NOTIFY_DONE;
1457 }
1458
1459
1460 static int packet_ioctl(struct socket *sock, unsigned int cmd,
1461                         unsigned long arg)
1462 {
1463         struct sock *sk = sock->sk;
1464
1465         switch(cmd) {
1466                 case SIOCOUTQ:
1467                 {
1468                         int amount = atomic_read(&sk->sk_wmem_alloc);
1469                         return put_user(amount, (int __user *)arg);
1470                 }
1471                 case SIOCINQ:
1472                 {
1473                         struct sk_buff *skb;
1474                         int amount = 0;
1475
1476                         spin_lock_bh(&sk->sk_receive_queue.lock);
1477                         skb = skb_peek(&sk->sk_receive_queue);
1478                         if (skb)
1479                                 amount = skb->len;
1480                         spin_unlock_bh(&sk->sk_receive_queue.lock);
1481                         return put_user(amount, (int __user *)arg);
1482                 }
1483                 case SIOCGSTAMP:
1484                         return sock_get_timestamp(sk, (struct timeval __user *)arg);
1485                         
1486 #ifdef CONFIG_INET
1487                 case SIOCADDRT:
1488                 case SIOCDELRT:
1489                 case SIOCDARP:
1490                 case SIOCGARP:
1491                 case SIOCSARP:
1492                 case SIOCGIFADDR:
1493                 case SIOCSIFADDR:
1494                 case SIOCGIFBRDADDR:
1495                 case SIOCSIFBRDADDR:
1496                 case SIOCGIFNETMASK:
1497                 case SIOCSIFNETMASK:
1498                 case SIOCGIFDSTADDR:
1499                 case SIOCSIFDSTADDR:
1500                 case SIOCSIFFLAGS:
1501                         return inet_dgram_ops.ioctl(sock, cmd, arg);
1502 #endif
1503
1504                 default:
1505                         return dev_ioctl(cmd, (void __user *)arg);
1506         }
1507         return 0;
1508 }
1509
1510 #ifndef CONFIG_PACKET_MMAP
1511 #define packet_mmap sock_no_mmap
1512 #define packet_poll datagram_poll
1513 #else
1514
1515 unsigned int packet_poll(struct file * file, struct socket *sock, poll_table *wait)
1516 {
1517         struct sock *sk = sock->sk;
1518         struct packet_opt *po = pkt_sk(sk);
1519         unsigned int mask = datagram_poll(file, sock, wait);
1520
1521         spin_lock_bh(&sk->sk_receive_queue.lock);
1522         if (po->pg_vec) {
1523                 unsigned last = po->head ? po->head-1 : po->frame_max;
1524                 struct tpacket_hdr *h;
1525
1526                 h = (struct tpacket_hdr *)packet_lookup_frame(po, last);
1527
1528                 if (h->tp_status)
1529                         mask |= POLLIN | POLLRDNORM;
1530         }
1531         spin_unlock_bh(&sk->sk_receive_queue.lock);
1532         return mask;
1533 }
1534
1535
1536 /* Dirty? Well, I still did not learn better way to account
1537  * for user mmaps.
1538  */
1539
1540 static void packet_mm_open(struct vm_area_struct *vma)
1541 {
1542         struct file *file = vma->vm_file;
1543         struct inode *inode = file->f_dentry->d_inode;
1544         struct socket * sock = SOCKET_I(inode);
1545         struct sock *sk = sock->sk;
1546         
1547         if (sk)
1548                 atomic_inc(&pkt_sk(sk)->mapped);
1549 }
1550
1551 static void packet_mm_close(struct vm_area_struct *vma)
1552 {
1553         struct file *file = vma->vm_file;
1554         struct inode *inode = file->f_dentry->d_inode;
1555         struct socket * sock = SOCKET_I(inode);
1556         struct sock *sk = sock->sk;
1557         
1558         if (sk)
1559                 atomic_dec(&pkt_sk(sk)->mapped);
1560 }
1561
1562 static struct vm_operations_struct packet_mmap_ops = {
1563         .open = packet_mm_open,
1564         .close =packet_mm_close,
1565 };
1566
1567 static void free_pg_vec(unsigned long *pg_vec, unsigned order, unsigned len)
1568 {
1569         int i;
1570
1571         for (i=0; i<len; i++) {
1572                 if (pg_vec[i]) {
1573                         struct page *page, *pend;
1574
1575                         pend = virt_to_page(pg_vec[i] + (PAGE_SIZE << order) - 1);
1576                         for (page = virt_to_page(pg_vec[i]); page <= pend; page++)
1577                                 ClearPageReserved(page);
1578                         free_pages(pg_vec[i], order);
1579                 }
1580         }
1581         kfree(pg_vec);
1582 }
1583
1584
1585 static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing)
1586 {
1587         unsigned long *pg_vec = NULL;
1588         struct packet_opt *po = pkt_sk(sk);
1589         int was_running, num, order = 0;
1590         int err = 0;
1591         
1592         if (req->tp_block_nr) {
1593                 int i, l;
1594
1595                 /* Sanity tests and some calculations */
1596
1597                 if (po->pg_vec)
1598                         return -EBUSY;
1599
1600                 if ((int)req->tp_block_size <= 0)
1601                         return -EINVAL;
1602                 if (req->tp_block_size&(PAGE_SIZE-1))
1603                         return -EINVAL;
1604                 if (req->tp_frame_size < TPACKET_HDRLEN)
1605                         return -EINVAL;
1606                 if (req->tp_frame_size&(TPACKET_ALIGNMENT-1))
1607                         return -EINVAL;
1608
1609                 po->frames_per_block = req->tp_block_size/req->tp_frame_size;
1610                 if (po->frames_per_block <= 0)
1611                         return -EINVAL;
1612                 if (po->frames_per_block*req->tp_block_nr != req->tp_frame_nr)
1613                         return -EINVAL;
1614                 /* OK! */
1615
1616                 /* Allocate page vector */
1617                 while ((PAGE_SIZE<<order) < req->tp_block_size)
1618                         order++;
1619
1620                 err = -ENOMEM;
1621
1622                 pg_vec = kmalloc(req->tp_block_nr*sizeof(unsigned long*), GFP_KERNEL);
1623                 if (pg_vec == NULL)
1624                         goto out;
1625                 memset(pg_vec, 0, req->tp_block_nr*sizeof(unsigned long*));
1626
1627                 for (i=0; i<req->tp_block_nr; i++) {
1628                         struct page *page, *pend;
1629                         pg_vec[i] = __get_free_pages(GFP_KERNEL, order);
1630                         if (!pg_vec[i])
1631                                 goto out_free_pgvec;
1632
1633                         pend = virt_to_page(pg_vec[i] + (PAGE_SIZE << order) - 1);
1634                         for (page = virt_to_page(pg_vec[i]); page <= pend; page++)
1635                                 SetPageReserved(page);
1636                 }
1637                 /* Page vector is allocated */
1638
1639                 l = 0;
1640                 for (i=0; i<req->tp_block_nr; i++) {
1641                         unsigned long ptr = pg_vec[i];
1642                         struct tpacket_hdr *header;
1643                         int k;
1644
1645                         for (k=0; k<po->frames_per_block; k++) {
1646                                 
1647                                 header = (struct tpacket_hdr*)ptr;
1648                                 header->tp_status = TP_STATUS_KERNEL;
1649                                 ptr += req->tp_frame_size;
1650                         }
1651                 }
1652                 /* Done */
1653         } else {
1654                 if (req->tp_frame_nr)
1655                         return -EINVAL;
1656         }
1657
1658         lock_sock(sk);
1659
1660         /* Detach socket from network */
1661         spin_lock(&po->bind_lock);
1662         was_running = po->running;
1663         num = po->num;
1664         if (was_running) {
1665                 __dev_remove_pack(&po->prot_hook);
1666                 po->num = 0;
1667                 po->running = 0;
1668                 __sock_put(sk);
1669         }
1670         spin_unlock(&po->bind_lock);
1671                 
1672         synchronize_net();
1673
1674         err = -EBUSY;
1675         if (closing || atomic_read(&po->mapped) == 0) {
1676                 err = 0;
1677 #define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
1678
1679                 spin_lock_bh(&sk->sk_receive_queue.lock);
1680                 pg_vec = XC(po->pg_vec, pg_vec);
1681                 po->frame_max = req->tp_frame_nr-1;
1682                 po->head = 0;
1683                 po->frame_size = req->tp_frame_size;
1684                 spin_unlock_bh(&sk->sk_receive_queue.lock);
1685
1686                 order = XC(po->pg_vec_order, order);
1687                 req->tp_block_nr = XC(po->pg_vec_len, req->tp_block_nr);
1688
1689                 po->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
1690                 po->prot_hook.func = po->pg_vec ? tpacket_rcv : packet_rcv;
1691                 skb_queue_purge(&sk->sk_receive_queue);
1692 #undef XC
1693                 if (atomic_read(&po->mapped))
1694                         printk(KERN_DEBUG "packet_mmap: vma is busy: %d\n", atomic_read(&po->mapped));
1695         }
1696
1697         spin_lock(&po->bind_lock);
1698         if (was_running && !po->running) {
1699                 sock_hold(sk);
1700                 po->running = 1;
1701                 po->num = num;
1702                 dev_add_pack(&po->prot_hook);
1703         }
1704         spin_unlock(&po->bind_lock);
1705
1706         release_sock(sk);
1707
1708 out_free_pgvec:
1709         if (pg_vec)
1710                 free_pg_vec(pg_vec, order, req->tp_block_nr);
1711 out:
1712         return err;
1713 }
1714
1715 static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1716 {
1717         struct sock *sk = sock->sk;
1718         struct packet_opt *po = pkt_sk(sk);
1719         unsigned long size;
1720         unsigned long start;
1721         int err = -EINVAL;
1722         int i;
1723
1724         if (vma->vm_pgoff)
1725                 return -EINVAL;
1726
1727         size = vma->vm_end - vma->vm_start;
1728
1729         lock_sock(sk);
1730         if (po->pg_vec == NULL)
1731                 goto out;
1732         if (size != po->pg_vec_len*po->pg_vec_pages*PAGE_SIZE)
1733                 goto out;
1734
1735         atomic_inc(&po->mapped);
1736         start = vma->vm_start;
1737         err = -EAGAIN;
1738         for (i=0; i<po->pg_vec_len; i++) {
1739                 if (remap_page_range(vma, start, __pa(po->pg_vec[i]),
1740                                      po->pg_vec_pages*PAGE_SIZE,
1741                                      vma->vm_page_prot))
1742                         goto out;
1743                 start += po->pg_vec_pages*PAGE_SIZE;
1744         }
1745         vma->vm_ops = &packet_mmap_ops;
1746         err = 0;
1747
1748 out:
1749         release_sock(sk);
1750         return err;
1751 }
1752 #endif
1753
1754
1755 #ifdef CONFIG_SOCK_PACKET
1756 struct proto_ops packet_ops_spkt = {
1757         .family =       PF_PACKET,
1758         .owner =        THIS_MODULE,
1759         .release =      packet_release,
1760         .bind =         packet_bind_spkt,
1761         .connect =      sock_no_connect,
1762         .socketpair =   sock_no_socketpair,
1763         .accept =       sock_no_accept,
1764         .getname =      packet_getname_spkt,
1765         .poll =         datagram_poll,
1766         .ioctl =        packet_ioctl,
1767         .listen =       sock_no_listen,
1768         .shutdown =     sock_no_shutdown,
1769         .setsockopt =   sock_no_setsockopt,
1770         .getsockopt =   sock_no_getsockopt,
1771         .sendmsg =      packet_sendmsg_spkt,
1772         .recvmsg =      packet_recvmsg,
1773         .mmap =         sock_no_mmap,
1774         .sendpage =     sock_no_sendpage,
1775 };
1776 #endif
1777
1778 struct proto_ops packet_ops = {
1779         .family =       PF_PACKET,
1780         .owner =        THIS_MODULE,
1781         .release =      packet_release,
1782         .bind =         packet_bind,
1783         .connect =      sock_no_connect,
1784         .socketpair =   sock_no_socketpair,
1785         .accept =       sock_no_accept,
1786         .getname =      packet_getname, 
1787         .poll =         packet_poll,
1788         .ioctl =        packet_ioctl,
1789         .listen =       sock_no_listen,
1790         .shutdown =     sock_no_shutdown,
1791         .setsockopt =   packet_setsockopt,
1792         .getsockopt =   packet_getsockopt,
1793         .sendmsg =      packet_sendmsg,
1794         .recvmsg =      packet_recvmsg,
1795         .mmap =         packet_mmap,
1796         .sendpage =     sock_no_sendpage,
1797 };
1798 EXPORT_SYMBOL(packet_ops);
1799
1800 struct net_proto_family packet_family_ops = {
1801         .family =       PF_PACKET,
1802         .create =       packet_create,
1803         .owner  =       THIS_MODULE,
1804 };
1805 EXPORT_SYMBOL(packet_family_ops);
1806
1807 static struct notifier_block packet_netdev_notifier = {
1808         .notifier_call =packet_notifier,
1809 };
1810
1811 #ifdef CONFIG_PROC_FS
1812 static inline struct sock *packet_seq_idx(loff_t off)
1813 {
1814         struct sock *s;
1815         struct hlist_node *node;
1816
1817         sk_for_each(s, node, &packet_sklist) {
1818                 if (!off--)
1819                         return s;
1820         }
1821         return NULL;
1822 }
1823
1824 static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
1825 {
1826         read_lock(&packet_sklist_lock);
1827         return *pos ? packet_seq_idx(*pos - 1) : SEQ_START_TOKEN;
1828 }
1829
1830 static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1831 {
1832         ++*pos;
1833         return  (v == SEQ_START_TOKEN) 
1834                 ? sk_head(&packet_sklist) 
1835                 : sk_next((struct sock*)v) ;
1836 }
1837
1838 static void packet_seq_stop(struct seq_file *seq, void *v)
1839 {
1840         read_unlock(&packet_sklist_lock);               
1841 }
1842
1843 static int packet_seq_show(struct seq_file *seq, void *v) 
1844 {
1845         if (v == SEQ_START_TOKEN)
1846                 seq_puts(seq, "sk       RefCnt Type Proto  Iface R Rmem   User   Inode\n");
1847         else {
1848                 struct sock *s = v;
1849                 const struct packet_opt *po = pkt_sk(s);
1850
1851                 seq_printf(seq,
1852                            "%p %-6d %-4d %04x   %-5d %1d %-6u %-6u %-6lu\n",
1853                            s,
1854                            atomic_read(&s->sk_refcnt),
1855                            s->sk_type,
1856                            ntohs(po->num),
1857                            po->ifindex,
1858                            po->running,
1859                            atomic_read(&s->sk_rmem_alloc),
1860                            sock_i_uid(s),
1861                            sock_i_ino(s) );
1862         }
1863
1864         return 0;
1865 }
1866
1867 static struct seq_operations packet_seq_ops = {
1868         .start  = packet_seq_start,
1869         .next   = packet_seq_next,
1870         .stop   = packet_seq_stop,
1871         .show   = packet_seq_show,
1872 };
1873
1874 static int packet_seq_open(struct inode *inode, struct file *file)
1875 {
1876         return seq_open(file, &packet_seq_ops);
1877 }
1878
1879 static struct file_operations packet_seq_fops = {
1880         .owner          = THIS_MODULE,
1881         .open           = packet_seq_open,
1882         .read           = seq_read,
1883         .llseek         = seq_lseek,
1884         .release        = seq_release,
1885 };
1886
1887 #endif
1888
1889 static void __exit packet_exit(void)
1890 {
1891         proc_net_remove("packet");
1892         unregister_netdevice_notifier(&packet_netdev_notifier);
1893         sock_unregister(PF_PACKET);
1894         return;
1895 }
1896
1897 static int __init packet_init(void)
1898 {
1899         sock_register(&packet_family_ops);
1900         register_netdevice_notifier(&packet_netdev_notifier);
1901         proc_net_fops_create("packet", 0, &packet_seq_fops);
1902
1903         return 0;
1904 }
1905
1906 module_init(packet_init);
1907 module_exit(packet_exit);
1908 MODULE_LICENSE("GPL");
1909 MODULE_ALIAS_NETPROTO(PF_PACKET);