2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * PACKET - implements raw packet sockets.
8 * Version: $Id: af_packet.c,v 1.61 2002/02/08 03:57:19 davem Exp $
10 * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Alan Cox, <gw4pts@gw4pts.ampr.org>
15 * Alan Cox : verify_area() now used correctly
16 * Alan Cox : new skbuff lists, look ma no backlogs!
17 * Alan Cox : tidied skbuff lists.
18 * Alan Cox : Now uses generic datagram routines I
19 * added. Also fixed the peek/read crash
20 * from all old Linux datagram code.
21 * Alan Cox : Uses the improved datagram code.
22 * Alan Cox : Added NULL's for socket options.
23 * Alan Cox : Re-commented the code.
24 * Alan Cox : Use new kernel side addressing
25 * Rob Janssen : Correct MTU usage.
26 * Dave Platt : Counter leaks caused by incorrect
27 * interrupt locking and some slightly
28 * dubious gcc output. Can you read
29 * compiler: it said _VOLATILE_
30 * Richard Kooijman : Timestamp fixes.
31 * Alan Cox : New buffers. Use sk->mac.raw.
32 * Alan Cox : sendmsg/recvmsg support.
33 * Alan Cox : Protocol setting support
34 * Alexey Kuznetsov : Untied from IPv4 stack.
35 * Cyrus Durgin : Fixed kerneld for kmod.
36 * Michal Ostrowski : Module initialization cleanup.
37 * Ulises Alonso : Frame number limit removal and
38 * packet_set_ring memory leak.
40 * This program is free software; you can redistribute it and/or
41 * modify it under the terms of the GNU General Public License
42 * as published by the Free Software Foundation; either version
43 * 2 of the License, or (at your option) any later version.
47 #include <linux/config.h>
48 #include <linux/types.h>
49 #include <linux/sched.h>
51 #include <linux/fcntl.h>
52 #include <linux/socket.h>
54 #include <linux/inet.h>
55 #include <linux/netdevice.h>
56 #include <linux/if_packet.h>
57 #include <linux/wireless.h>
58 #include <linux/kmod.h>
60 #include <net/protocol.h>
61 #include <linux/skbuff.h>
63 #include <linux/errno.h>
64 #include <linux/timer.h>
65 #include <asm/system.h>
66 #include <asm/uaccess.h>
67 #include <asm/ioctls.h>
68 #include <linux/proc_fs.h>
69 #include <linux/seq_file.h>
70 #include <linux/poll.h>
71 #include <linux/module.h>
72 #include <linux/init.h>
75 #include <net/inet_common.h>
78 #define CONFIG_SOCK_PACKET 1
81 Proposed replacement for SIOC{ADD,DEL}MULTI and
82 IFF_PROMISC, IFF_ALLMULTI flags.
84 It is more expensive, but I believe,
85 it is really correct solution: reentereble, safe and fault tolerant.
87 IFF_PROMISC/IFF_ALLMULTI/SIOC{ADD/DEL}MULTI are faked by keeping
88 reference count and global flag, so that real status is
89 (gflag|(count != 0)), so that we can use obsolete faulty interface
90 not harming clever users.
92 #define CONFIG_PACKET_MULTICAST 1
96 - if device has no dev->hard_header routine, it adds and removes ll header
97 inside itself. In this case ll header is invisible outside of device,
98 but higher levels still should reserve dev->hard_header_len.
99 Some devices are enough clever to reallocate skb, when header
100 will not fit to reserved space (tunnel), another ones are silly
102 - packet socket receives packets with pulled ll header,
103 so that SOCK_RAW should push it back.
108 Incoming, dev->hard_header!=NULL
112 Outgoing, dev->hard_header!=NULL
116 Incoming, dev->hard_header==NULL
117 mac.raw -> UNKNOWN position. It is very likely, that it points to ll header.
118 PPP makes it, that is wrong, because introduce assymetry
119 between rx and tx paths.
122 Outgoing, dev->hard_header==NULL
123 mac.raw -> data. ll header is still not built!
127 If dev->hard_header==NULL we are unlikely to restore sensible ll header.
133 dev->hard_header != NULL
137 dev->hard_header == NULL (ll header is added by device, we cannot control it)
141 We should set nh.raw on output to correct posistion,
142 packet classifier depends on it.
145 /* List of all packet sockets. */
146 HLIST_HEAD(packet_sklist);
147 static rwlock_t packet_sklist_lock = RW_LOCK_UNLOCKED;
149 atomic_t packet_socks_nr;
152 /* Private packet socket structures. */
154 #ifdef CONFIG_PACKET_MULTICAST
157 struct packet_mclist *next;
162 unsigned char addr[8];
165 #ifdef CONFIG_PACKET_MMAP
166 static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing);
169 static void packet_flush_mclist(struct sock *sk);
173 struct tpacket_stats stats;
174 #ifdef CONFIG_PACKET_MMAP
175 unsigned long *pg_vec;
177 unsigned int frames_per_block;
178 unsigned int frame_size;
179 unsigned int frame_max;
182 struct packet_type prot_hook;
183 spinlock_t bind_lock;
184 char running; /* prot_hook is attached*/
185 int ifindex; /* bound device */
187 #ifdef CONFIG_PACKET_MULTICAST
188 struct packet_mclist *mclist;
190 #ifdef CONFIG_PACKET_MMAP
192 unsigned int pg_vec_order;
193 unsigned int pg_vec_pages;
194 unsigned int pg_vec_len;
198 #ifdef CONFIG_PACKET_MMAP
200 static inline unsigned long packet_lookup_frame(struct packet_opt *po, unsigned int position)
202 unsigned int pg_vec_pos, frame_offset;
205 pg_vec_pos = position / po->frames_per_block;
206 frame_offset = position % po->frames_per_block;
208 frame = (unsigned long) (po->pg_vec[pg_vec_pos] + (frame_offset * po->frame_size));
214 #define pkt_sk(__sk) ((struct packet_opt *)(__sk)->sk_protinfo)
216 void packet_sock_destruct(struct sock *sk)
218 BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
219 BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
220 BUG_ON(sk->sk_nx_info);
221 BUG_ON(sk->sk_vx_info);
223 if (!sock_flag(sk, SOCK_DEAD)) {
224 printk("Attempt to release alive packet socket: %p\n", sk);
230 atomic_dec(&packet_socks_nr);
231 #ifdef PACKET_REFCNT_DEBUG
232 printk(KERN_DEBUG "PACKET socket %p is free, %d are alive\n", sk, atomic_read(&packet_socks_nr));
237 extern struct proto_ops packet_ops;
239 #ifdef CONFIG_SOCK_PACKET
240 extern struct proto_ops packet_ops_spkt;
242 static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
245 struct sockaddr_pkt *spkt;
248 * When we registered the protocol we saved the socket in the data
249 * field for just this event.
252 sk = pt->af_packet_priv;
255 * Yank back the headers [hope the device set this
256 * right or kerboom...]
258 * Incoming packets have ll header pulled,
261 * For outgoing ones skb->data == skb->mac.raw
262 * so that this procedure is noop.
265 if (skb->pkt_type == PACKET_LOOPBACK)
268 if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
271 /* drop any routing info */
272 dst_release(skb->dst);
275 spkt = (struct sockaddr_pkt*)skb->cb;
277 skb_push(skb, skb->data-skb->mac.raw);
280 * The SOCK_PACKET socket receives _all_ frames.
283 spkt->spkt_family = dev->type;
284 strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
285 spkt->spkt_protocol = skb->protocol;
288 * Charge the memory to the socket. This is done specifically
289 * to prevent sockets using all the memory up.
292 if (sock_queue_rcv_skb(sk,skb) == 0)
303 * Output a raw packet to a device layer. This bypasses all the other
304 * protocol layers and you must therefore supply it with a complete frame
307 static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
308 struct msghdr *msg, size_t len)
310 struct sock *sk = sock->sk;
311 struct sockaddr_pkt *saddr=(struct sockaddr_pkt *)msg->msg_name;
313 struct net_device *dev;
314 unsigned short proto=0;
318 * Get and verify the address.
323 if (msg->msg_namelen < sizeof(struct sockaddr))
325 if (msg->msg_namelen==sizeof(struct sockaddr_pkt))
326 proto=saddr->spkt_protocol;
329 return(-ENOTCONN); /* SOCK_PACKET must be sent giving an address */
332 * Find the device first to size check it
335 saddr->spkt_device[13] = 0;
336 dev = dev_get_by_name(saddr->spkt_device);
342 * You may not queue a frame bigger than the mtu. This is the lowest level
343 * raw protocol and you must do your own fragmentation at this level.
347 if(len>dev->mtu+dev->hard_header_len)
351 skb = sock_wmalloc(sk, len + LL_RESERVED_SPACE(dev), 0, GFP_KERNEL);
354 * If the write buffer is full, then tough. At this level the user gets to
355 * deal with the problem - do your own algorithmic backoffs. That's far
366 /* FIXME: Save some space for broken drivers that write a
367 * hard header at transmission time by themselves. PPP is the
368 * notable one here. This should really be fixed at the driver level.
370 skb_reserve(skb, LL_RESERVED_SPACE(dev));
371 skb->nh.raw = skb->data;
373 /* Try to align data part correctly */
374 if (dev->hard_header) {
375 skb->data -= dev->hard_header_len;
376 skb->tail -= dev->hard_header_len;
377 if (len < dev->hard_header_len)
378 skb->nh.raw = skb->data;
381 /* Returns -EFAULT on error */
382 err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
383 skb->protocol = proto;
385 skb->priority = sk->sk_priority;
390 if (!(dev->flags & IFF_UP))
410 static inline unsigned run_filter(struct sk_buff *skb, struct sock *sk, unsigned res)
412 struct sk_filter *filter;
415 filter = sk->sk_filter;
417 * Our caller already checked that filter != NULL but we need to
418 * verify that under bh_lock_sock() to be safe
420 if (likely(filter != NULL))
421 res = sk_run_filter(skb, filter->insns, filter->len);
428 This function makes lazy skb cloning in hope that most of packets
429 are discarded by BPF.
431 Note tricky part: we DO mangle shared skb! skb->data, skb->len
432 and skb->cb are mangled. It works because (and until) packets
433 falling here are owned by current CPU. Output packets are cloned
434 by dev_queue_xmit_nit(), input packets are processed by net_bh
435 sequencially, so that if we return skb to original state on exit,
436 we will not harm anyone.
439 static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
442 struct sockaddr_ll *sll;
443 struct packet_opt *po;
444 u8 * skb_head = skb->data;
445 int skb_len = skb->len;
448 if (skb->pkt_type == PACKET_LOOPBACK)
451 sk = pt->af_packet_priv;
454 if (sk->sk_xid && sk->sk_xid != skb->xid)
459 if (dev->hard_header) {
460 /* The device has an explicit notion of ll header,
461 exported to higher levels.
463 Otherwise, the device hides datails of it frame
464 structure, so that corresponding packet head
465 never delivered to user.
467 if (sk->sk_type != SOCK_DGRAM)
468 skb_push(skb, skb->data - skb->mac.raw);
469 else if (skb->pkt_type == PACKET_OUTGOING) {
470 /* Special case: outgoing packets have ll header at head */
471 skb_pull(skb, skb->nh.raw - skb->data);
478 unsigned res = run_filter(skb, sk, snaplen);
485 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
486 (unsigned)sk->sk_rcvbuf)
489 if (skb_shared(skb)) {
490 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
494 if (skb_head != skb->data) {
495 skb->data = skb_head;
502 sll = (struct sockaddr_ll*)skb->cb;
503 sll->sll_family = AF_PACKET;
504 sll->sll_hatype = dev->type;
505 sll->sll_protocol = skb->protocol;
506 sll->sll_pkttype = skb->pkt_type;
507 sll->sll_ifindex = dev->ifindex;
510 if (dev->hard_header_parse)
511 sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
513 if (pskb_trim(skb, snaplen))
516 skb_set_owner_r(skb, sk);
518 dst_release(skb->dst);
521 spin_lock(&sk->sk_receive_queue.lock);
522 po->stats.tp_packets++;
523 __skb_queue_tail(&sk->sk_receive_queue, skb);
524 spin_unlock(&sk->sk_receive_queue.lock);
525 sk->sk_data_ready(sk, skb->len);
529 spin_lock(&sk->sk_receive_queue.lock);
530 po->stats.tp_drops++;
531 spin_unlock(&sk->sk_receive_queue.lock);
534 if (skb_head != skb->data && skb_shared(skb)) {
535 skb->data = skb_head;
543 #ifdef CONFIG_PACKET_MMAP
544 static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
547 struct packet_opt *po;
548 struct sockaddr_ll *sll;
549 struct tpacket_hdr *h;
550 u8 * skb_head = skb->data;
551 int skb_len = skb->len;
553 unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
554 unsigned short macoff, netoff;
555 struct sk_buff *copy_skb = NULL;
557 if (skb->pkt_type == PACKET_LOOPBACK)
560 sk = pt->af_packet_priv;
563 if (dev->hard_header) {
564 if (sk->sk_type != SOCK_DGRAM)
565 skb_push(skb, skb->data - skb->mac.raw);
566 else if (skb->pkt_type == PACKET_OUTGOING) {
567 /* Special case: outgoing packets have ll header at head */
568 skb_pull(skb, skb->nh.raw - skb->data);
569 if (skb->ip_summed == CHECKSUM_HW)
570 status |= TP_STATUS_CSUMNOTREADY;
577 unsigned res = run_filter(skb, sk, snaplen);
584 if (sk->sk_type == SOCK_DGRAM) {
585 macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
587 unsigned maclen = skb->nh.raw - skb->data;
588 netoff = TPACKET_ALIGN(TPACKET_HDRLEN + (maclen < 16 ? 16 : maclen));
589 macoff = netoff - maclen;
592 if (macoff + snaplen > po->frame_size) {
593 if (po->copy_thresh &&
594 atomic_read(&sk->sk_rmem_alloc) + skb->truesize <
595 (unsigned)sk->sk_rcvbuf) {
596 if (skb_shared(skb)) {
597 copy_skb = skb_clone(skb, GFP_ATOMIC);
599 copy_skb = skb_get(skb);
600 skb_head = skb->data;
603 skb_set_owner_r(copy_skb, sk);
605 snaplen = po->frame_size - macoff;
606 if ((int)snaplen < 0)
609 if (snaplen > skb->len-skb->data_len)
610 snaplen = skb->len-skb->data_len;
612 spin_lock(&sk->sk_receive_queue.lock);
613 h = (struct tpacket_hdr *)packet_lookup_frame(po, po->head);
617 po->head = po->head != po->frame_max ? po->head+1 : 0;
618 po->stats.tp_packets++;
620 status |= TP_STATUS_COPY;
621 __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
623 if (!po->stats.tp_drops)
624 status &= ~TP_STATUS_LOSING;
625 spin_unlock(&sk->sk_receive_queue.lock);
627 memcpy((u8*)h + macoff, skb->data, snaplen);
629 h->tp_len = skb->len;
630 h->tp_snaplen = snaplen;
633 if (skb->stamp.tv_sec == 0) {
634 do_gettimeofday(&skb->stamp);
635 sock_enable_timestamp(sk);
637 h->tp_sec = skb->stamp.tv_sec;
638 h->tp_usec = skb->stamp.tv_usec;
640 sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h)));
642 if (dev->hard_header_parse)
643 sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
644 sll->sll_family = AF_PACKET;
645 sll->sll_hatype = dev->type;
646 sll->sll_protocol = skb->protocol;
647 sll->sll_pkttype = skb->pkt_type;
648 sll->sll_ifindex = dev->ifindex;
650 h->tp_status = status;
654 struct page *p_start, *p_end;
655 u8 *h_end = (u8 *)h + macoff + snaplen - 1;
657 p_start = virt_to_page(h);
658 p_end = virt_to_page(h_end);
659 while (p_start <= p_end) {
660 flush_dcache_page(p_start);
665 sk->sk_data_ready(sk, 0);
668 if (skb_head != skb->data && skb_shared(skb)) {
669 skb->data = skb_head;
677 po->stats.tp_drops++;
678 spin_unlock(&sk->sk_receive_queue.lock);
680 sk->sk_data_ready(sk, 0);
689 static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
690 struct msghdr *msg, size_t len)
692 struct sock *sk = sock->sk;
693 struct sockaddr_ll *saddr=(struct sockaddr_ll *)msg->msg_name;
695 struct net_device *dev;
696 unsigned short proto;
698 int ifindex, err, reserve = 0;
701 * Get and verify the address.
705 struct packet_opt *po = pkt_sk(sk);
707 ifindex = po->ifindex;
712 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
714 ifindex = saddr->sll_ifindex;
715 proto = saddr->sll_protocol;
716 addr = saddr->sll_addr;
720 dev = dev_get_by_index(ifindex);
724 if (sock->type == SOCK_RAW)
725 reserve = dev->hard_header_len;
728 if (len > dev->mtu+reserve)
731 skb = sock_alloc_send_skb(sk, len + LL_RESERVED_SPACE(dev),
732 msg->msg_flags & MSG_DONTWAIT, &err);
736 skb_reserve(skb, LL_RESERVED_SPACE(dev));
737 skb->nh.raw = skb->data;
739 if (dev->hard_header) {
742 res = dev->hard_header(skb, dev, ntohs(proto), addr, NULL, len);
743 if (sock->type != SOCK_DGRAM) {
744 skb->tail = skb->data;
750 /* Returns -EFAULT on error */
751 err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
755 skb->protocol = proto;
757 skb->priority = sk->sk_priority;
760 if (!(dev->flags & IFF_UP))
767 err = dev_queue_xmit(skb);
768 if (err > 0 && (err = net_xmit_errno(err)) != 0)
785 * Close a PACKET socket. This is fairly simple. We immediately go
786 * to 'closed' state and remove our protocol entry in the device list.
789 static int packet_release(struct socket *sock)
791 struct sock *sk = sock->sk;
792 struct packet_opt *po = pkt_sk(sk);
797 write_lock_bh(&packet_sklist_lock);
798 sk_del_node_init(sk);
799 write_unlock_bh(&packet_sklist_lock);
802 * Unhook packet receive handler.
807 * Remove the protocol hook
809 dev_remove_pack(&po->prot_hook);
815 #ifdef CONFIG_PACKET_MULTICAST
816 packet_flush_mclist(sk);
819 #ifdef CONFIG_PACKET_MMAP
821 struct tpacket_req req;
822 memset(&req, 0, sizeof(req));
823 packet_set_ring(sk, &req, 1);
827 clr_vx_info(&sk->sk_vx_info);
828 clr_nx_info(&sk->sk_nx_info);
831 * Now the socket is dead. No more input will appear.
839 skb_queue_purge(&sk->sk_receive_queue);
846 * Attach a packet hook.
849 static int packet_do_bind(struct sock *sk, struct net_device *dev, int protocol)
851 struct packet_opt *po = pkt_sk(sk);
853 * Detach an existing hook if present.
858 spin_lock(&po->bind_lock);
863 spin_unlock(&po->bind_lock);
864 dev_remove_pack(&po->prot_hook);
865 spin_lock(&po->bind_lock);
869 po->prot_hook.type = protocol;
870 po->prot_hook.dev = dev;
872 po->ifindex = dev ? dev->ifindex : 0;
878 if (dev->flags&IFF_UP) {
879 dev_add_pack(&po->prot_hook);
883 sk->sk_err = ENETDOWN;
884 if (!sock_flag(sk, SOCK_DEAD))
885 sk->sk_error_report(sk);
888 dev_add_pack(&po->prot_hook);
894 spin_unlock(&po->bind_lock);
900 * Bind a packet socket to a device
903 #ifdef CONFIG_SOCK_PACKET
905 static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len)
907 struct sock *sk=sock->sk;
909 struct net_device *dev;
916 if(addr_len!=sizeof(struct sockaddr))
918 strlcpy(name,uaddr->sa_data,sizeof(name));
920 dev = dev_get_by_name(name);
922 err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
929 static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
931 struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
932 struct sock *sk=sock->sk;
933 struct net_device *dev = NULL;
941 if (addr_len < sizeof(struct sockaddr_ll))
943 if (sll->sll_family != AF_PACKET)
946 if (sll->sll_ifindex) {
948 dev = dev_get_by_index(sll->sll_ifindex);
952 err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
962 * Create a packet of type SOCK_PACKET.
965 static int packet_create(struct socket *sock, int protocol)
968 struct packet_opt *po;
971 if (!capable(CAP_NET_RAW))
973 if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW
974 #ifdef CONFIG_SOCK_PACKET
975 && sock->type != SOCK_PACKET
978 return -ESOCKTNOSUPPORT;
980 sock->state = SS_UNCONNECTED;
983 sk = sk_alloc(PF_PACKET, GFP_KERNEL, 1, NULL);
987 sock->ops = &packet_ops;
988 #ifdef CONFIG_SOCK_PACKET
989 if (sock->type == SOCK_PACKET)
990 sock->ops = &packet_ops_spkt;
992 sock_init_data(sock,sk);
993 sk_set_owner(sk, THIS_MODULE);
995 po = sk->sk_protinfo = kmalloc(sizeof(*po), GFP_KERNEL);
998 memset(po, 0, sizeof(*po));
999 sk->sk_family = PF_PACKET;
1002 sk->sk_destruct = packet_sock_destruct;
1003 atomic_inc(&packet_socks_nr);
1005 set_vx_info(&sk->sk_vx_info, current->vx_info);
1006 sk->sk_xid = vx_current_xid();
1007 set_nx_info(&sk->sk_nx_info, current->nx_info);
1008 sk->sk_nid = nx_current_nid();
1011 * Attach a protocol block
1014 spin_lock_init(&po->bind_lock);
1015 po->prot_hook.func = packet_rcv;
1016 #ifdef CONFIG_SOCK_PACKET
1017 if (sock->type == SOCK_PACKET)
1018 po->prot_hook.func = packet_rcv_spkt;
1020 po->prot_hook.af_packet_priv = sk;
1023 po->prot_hook.type = protocol;
1024 dev_add_pack(&po->prot_hook);
1029 write_lock_bh(&packet_sklist_lock);
1030 sk_add_node(sk, &packet_sklist);
1031 write_unlock_bh(&packet_sklist_lock);
1041 * Pull a packet from our receive queue and hand it to the user.
1042 * If necessary we block.
1045 static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1046 struct msghdr *msg, size_t len, int flags)
1048 struct sock *sk = sock->sk;
1049 struct sk_buff *skb;
1053 if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
1057 /* What error should we return now? EUNATTACH? */
1058 if (pkt_sk(sk)->ifindex < 0)
1063 * If the address length field is there to be filled in, we fill
1067 if (sock->type == SOCK_PACKET)
1068 msg->msg_namelen = sizeof(struct sockaddr_pkt);
1070 msg->msg_namelen = sizeof(struct sockaddr_ll);
1073 * Call the generic datagram receiver. This handles all sorts
1074 * of horrible races and re-entrancy so we can forget about it
1075 * in the protocol layers.
1077 * Now it will return ENETDOWN, if device have just gone down,
1078 * but then it will block.
1081 skb=skb_recv_datagram(sk,flags,flags&MSG_DONTWAIT,&err);
1084 * An error occurred so return it. Because skb_recv_datagram()
1085 * handles the blocking we don't see and worry about blocking
1093 * You lose any data beyond the buffer you gave. If it worries a
1094 * user program they can ask the device for its MTU anyway.
1101 msg->msg_flags|=MSG_TRUNC;
1104 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1108 sock_recv_timestamp(msg, sk, skb);
1111 memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
1114 * Free or return the buffer as appropriate. Again this
1115 * hides all the races and re-entrancy issues from us.
1117 err = (flags&MSG_TRUNC) ? skb->len : copied;
1120 skb_free_datagram(sk, skb);
1125 #ifdef CONFIG_SOCK_PACKET
1126 static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1127 int *uaddr_len, int peer)
1129 struct net_device *dev;
1130 struct sock *sk = sock->sk;
1135 uaddr->sa_family = AF_PACKET;
1136 dev = dev_get_by_index(pkt_sk(sk)->ifindex);
1138 strlcpy(uaddr->sa_data, dev->name, 15);
1141 memset(uaddr->sa_data, 0, 14);
1142 *uaddr_len = sizeof(*uaddr);
1148 static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1149 int *uaddr_len, int peer)
1151 struct net_device *dev;
1152 struct sock *sk = sock->sk;
1153 struct packet_opt *po = pkt_sk(sk);
1154 struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
1159 sll->sll_family = AF_PACKET;
1160 sll->sll_ifindex = po->ifindex;
1161 sll->sll_protocol = po->num;
1162 dev = dev_get_by_index(po->ifindex);
1164 sll->sll_hatype = dev->type;
1165 sll->sll_halen = dev->addr_len;
1166 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
1169 sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */
1172 *uaddr_len = sizeof(*sll);
1177 #ifdef CONFIG_PACKET_MULTICAST
1178 static void packet_dev_mc(struct net_device *dev, struct packet_mclist *i, int what)
1181 case PACKET_MR_MULTICAST:
1183 dev_mc_add(dev, i->addr, i->alen, 0);
1185 dev_mc_delete(dev, i->addr, i->alen, 0);
1187 case PACKET_MR_PROMISC:
1188 dev_set_promiscuity(dev, what);
1190 case PACKET_MR_ALLMULTI:
1191 dev_set_allmulti(dev, what);
1197 static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1199 for ( ; i; i=i->next) {
1200 if (i->ifindex == dev->ifindex)
1201 packet_dev_mc(dev, i, what);
1205 static int packet_mc_add(struct sock *sk, struct packet_mreq *mreq)
1207 struct packet_opt *po = pkt_sk(sk);
1208 struct packet_mclist *ml, *i;
1209 struct net_device *dev;
1215 dev = __dev_get_by_index(mreq->mr_ifindex);
1220 if (mreq->mr_alen > dev->addr_len)
1224 i = (struct packet_mclist *)kmalloc(sizeof(*i), GFP_KERNEL);
1229 for (ml = po->mclist; ml; ml = ml->next) {
1230 if (ml->ifindex == mreq->mr_ifindex &&
1231 ml->type == mreq->mr_type &&
1232 ml->alen == mreq->mr_alen &&
1233 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1235 /* Free the new element ... */
1241 i->type = mreq->mr_type;
1242 i->ifindex = mreq->mr_ifindex;
1243 i->alen = mreq->mr_alen;
1244 memcpy(i->addr, mreq->mr_address, i->alen);
1246 i->next = po->mclist;
1248 packet_dev_mc(dev, i, +1);
1255 static int packet_mc_drop(struct sock *sk, struct packet_mreq *mreq)
1257 struct packet_mclist *ml, **mlp;
1261 for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
1262 if (ml->ifindex == mreq->mr_ifindex &&
1263 ml->type == mreq->mr_type &&
1264 ml->alen == mreq->mr_alen &&
1265 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1266 if (--ml->count == 0) {
1267 struct net_device *dev;
1269 dev = dev_get_by_index(ml->ifindex);
1271 packet_dev_mc(dev, ml, -1);
1281 return -EADDRNOTAVAIL;
1284 static void packet_flush_mclist(struct sock *sk)
1286 struct packet_opt *po = pkt_sk(sk);
1287 struct packet_mclist *ml;
1293 while ((ml = po->mclist) != NULL) {
1294 struct net_device *dev;
1296 po->mclist = ml->next;
1297 if ((dev = dev_get_by_index(ml->ifindex)) != NULL) {
1298 packet_dev_mc(dev, ml, -1);
1308 packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen)
1310 struct sock *sk = sock->sk;
1313 if (level != SOL_PACKET)
1314 return -ENOPROTOOPT;
1317 #ifdef CONFIG_PACKET_MULTICAST
1318 case PACKET_ADD_MEMBERSHIP:
1319 case PACKET_DROP_MEMBERSHIP:
1321 struct packet_mreq mreq;
1322 if (optlen<sizeof(mreq))
1324 if (copy_from_user(&mreq,optval,sizeof(mreq)))
1326 if (optname == PACKET_ADD_MEMBERSHIP)
1327 ret = packet_mc_add(sk, &mreq);
1329 ret = packet_mc_drop(sk, &mreq);
1333 #ifdef CONFIG_PACKET_MMAP
1334 case PACKET_RX_RING:
1336 struct tpacket_req req;
1338 if (optlen<sizeof(req))
1340 if (copy_from_user(&req,optval,sizeof(req)))
1342 return packet_set_ring(sk, &req, 0);
1344 case PACKET_COPY_THRESH:
1348 if (optlen!=sizeof(val))
1350 if (copy_from_user(&val,optval,sizeof(val)))
1353 pkt_sk(sk)->copy_thresh = val;
1358 return -ENOPROTOOPT;
1362 int packet_getsockopt(struct socket *sock, int level, int optname,
1363 char __user *optval, int __user *optlen)
1366 struct sock *sk = sock->sk;
1367 struct packet_opt *po = pkt_sk(sk);
1369 if (level != SOL_PACKET)
1370 return -ENOPROTOOPT;
1372 if (get_user(len,optlen))
1379 case PACKET_STATISTICS:
1381 struct tpacket_stats st;
1383 if (len > sizeof(struct tpacket_stats))
1384 len = sizeof(struct tpacket_stats);
1385 spin_lock_bh(&sk->sk_receive_queue.lock);
1387 memset(&po->stats, 0, sizeof(st));
1388 spin_unlock_bh(&sk->sk_receive_queue.lock);
1389 st.tp_packets += st.tp_drops;
1391 if (copy_to_user(optval, &st, len))
1396 return -ENOPROTOOPT;
1399 if (put_user(len, optlen))
1405 static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
1408 struct hlist_node *node;
1409 struct net_device *dev = (struct net_device*)data;
1411 read_lock(&packet_sklist_lock);
1412 sk_for_each(sk, node, &packet_sklist) {
1413 struct packet_opt *po = pkt_sk(sk);
1416 case NETDEV_UNREGISTER:
1417 #ifdef CONFIG_PACKET_MULTICAST
1419 packet_dev_mclist(dev, po->mclist, -1);
1423 if (dev->ifindex == po->ifindex) {
1424 spin_lock(&po->bind_lock);
1426 __dev_remove_pack(&po->prot_hook);
1429 sk->sk_err = ENETDOWN;
1430 if (!sock_flag(sk, SOCK_DEAD))
1431 sk->sk_error_report(sk);
1433 if (msg == NETDEV_UNREGISTER) {
1435 po->prot_hook.dev = NULL;
1437 spin_unlock(&po->bind_lock);
1441 spin_lock(&po->bind_lock);
1442 if (dev->ifindex == po->ifindex && po->num &&
1444 dev_add_pack(&po->prot_hook);
1448 spin_unlock(&po->bind_lock);
1452 read_unlock(&packet_sklist_lock);
1457 static int packet_ioctl(struct socket *sock, unsigned int cmd,
1460 struct sock *sk = sock->sk;
1465 int amount = atomic_read(&sk->sk_wmem_alloc);
1466 return put_user(amount, (int __user *)arg);
1470 struct sk_buff *skb;
1473 spin_lock_bh(&sk->sk_receive_queue.lock);
1474 skb = skb_peek(&sk->sk_receive_queue);
1477 spin_unlock_bh(&sk->sk_receive_queue.lock);
1478 return put_user(amount, (int __user *)arg);
1481 return sock_get_timestamp(sk, (struct timeval __user *)arg);
1491 case SIOCGIFBRDADDR:
1492 case SIOCSIFBRDADDR:
1493 case SIOCGIFNETMASK:
1494 case SIOCSIFNETMASK:
1495 case SIOCGIFDSTADDR:
1496 case SIOCSIFDSTADDR:
1498 return inet_dgram_ops.ioctl(sock, cmd, arg);
1502 return dev_ioctl(cmd, (void __user *)arg);
1507 #ifndef CONFIG_PACKET_MMAP
1508 #define packet_mmap sock_no_mmap
1509 #define packet_poll datagram_poll
1512 unsigned int packet_poll(struct file * file, struct socket *sock, poll_table *wait)
1514 struct sock *sk = sock->sk;
1515 struct packet_opt *po = pkt_sk(sk);
1516 unsigned int mask = datagram_poll(file, sock, wait);
1518 spin_lock_bh(&sk->sk_receive_queue.lock);
1520 unsigned last = po->head ? po->head-1 : po->frame_max;
1521 struct tpacket_hdr *h;
1523 h = (struct tpacket_hdr *)packet_lookup_frame(po, last);
1526 mask |= POLLIN | POLLRDNORM;
1528 spin_unlock_bh(&sk->sk_receive_queue.lock);
1533 /* Dirty? Well, I still did not learn better way to account
1537 static void packet_mm_open(struct vm_area_struct *vma)
1539 struct file *file = vma->vm_file;
1540 struct inode *inode = file->f_dentry->d_inode;
1541 struct socket * sock = SOCKET_I(inode);
1542 struct sock *sk = sock->sk;
1545 atomic_inc(&pkt_sk(sk)->mapped);
1548 static void packet_mm_close(struct vm_area_struct *vma)
1550 struct file *file = vma->vm_file;
1551 struct inode *inode = file->f_dentry->d_inode;
1552 struct socket * sock = SOCKET_I(inode);
1553 struct sock *sk = sock->sk;
1556 atomic_dec(&pkt_sk(sk)->mapped);
1559 static struct vm_operations_struct packet_mmap_ops = {
1560 .open = packet_mm_open,
1561 .close =packet_mm_close,
1564 static void free_pg_vec(unsigned long *pg_vec, unsigned order, unsigned len)
1568 for (i=0; i<len; i++) {
1570 struct page *page, *pend;
1572 pend = virt_to_page(pg_vec[i] + (PAGE_SIZE << order) - 1);
1573 for (page = virt_to_page(pg_vec[i]); page <= pend; page++)
1574 ClearPageReserved(page);
1575 free_pages(pg_vec[i], order);
1582 static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing)
1584 unsigned long *pg_vec = NULL;
1585 struct packet_opt *po = pkt_sk(sk);
1586 int was_running, num, order = 0;
1589 if (req->tp_block_nr) {
1592 /* Sanity tests and some calculations */
1597 if ((int)req->tp_block_size <= 0)
1599 if (req->tp_block_size&(PAGE_SIZE-1))
1601 if (req->tp_frame_size < TPACKET_HDRLEN)
1603 if (req->tp_frame_size&(TPACKET_ALIGNMENT-1))
1606 po->frames_per_block = req->tp_block_size/req->tp_frame_size;
1607 if (po->frames_per_block <= 0)
1609 if (po->frames_per_block*req->tp_block_nr != req->tp_frame_nr)
1613 /* Allocate page vector */
1614 while ((PAGE_SIZE<<order) < req->tp_block_size)
1619 pg_vec = kmalloc(req->tp_block_nr*sizeof(unsigned long*), GFP_KERNEL);
1622 memset(pg_vec, 0, req->tp_block_nr*sizeof(unsigned long*));
1624 for (i=0; i<req->tp_block_nr; i++) {
1625 struct page *page, *pend;
1626 pg_vec[i] = __get_free_pages(GFP_KERNEL, order);
1628 goto out_free_pgvec;
1630 pend = virt_to_page(pg_vec[i] + (PAGE_SIZE << order) - 1);
1631 for (page = virt_to_page(pg_vec[i]); page <= pend; page++)
1632 SetPageReserved(page);
1634 /* Page vector is allocated */
1637 for (i=0; i<req->tp_block_nr; i++) {
1638 unsigned long ptr = pg_vec[i];
1639 struct tpacket_hdr *header;
1642 for (k=0; k<po->frames_per_block; k++) {
1644 header = (struct tpacket_hdr*)ptr;
1645 header->tp_status = TP_STATUS_KERNEL;
1646 ptr += req->tp_frame_size;
1651 if (req->tp_frame_nr)
1657 /* Detach socket from network */
1658 spin_lock(&po->bind_lock);
1659 was_running = po->running;
1662 __dev_remove_pack(&po->prot_hook);
1667 spin_unlock(&po->bind_lock);
1672 if (closing || atomic_read(&po->mapped) == 0) {
1674 #define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
1676 spin_lock_bh(&sk->sk_receive_queue.lock);
1677 pg_vec = XC(po->pg_vec, pg_vec);
1678 po->frame_max = req->tp_frame_nr-1;
1680 po->frame_size = req->tp_frame_size;
1681 spin_unlock_bh(&sk->sk_receive_queue.lock);
1683 order = XC(po->pg_vec_order, order);
1684 req->tp_block_nr = XC(po->pg_vec_len, req->tp_block_nr);
1686 po->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
1687 po->prot_hook.func = po->pg_vec ? tpacket_rcv : packet_rcv;
1688 skb_queue_purge(&sk->sk_receive_queue);
1690 if (atomic_read(&po->mapped))
1691 printk(KERN_DEBUG "packet_mmap: vma is busy: %d\n", atomic_read(&po->mapped));
1694 spin_lock(&po->bind_lock);
1695 if (was_running && !po->running) {
1699 dev_add_pack(&po->prot_hook);
1701 spin_unlock(&po->bind_lock);
1707 free_pg_vec(pg_vec, order, req->tp_block_nr);
1712 static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1714 struct sock *sk = sock->sk;
1715 struct packet_opt *po = pkt_sk(sk);
1717 unsigned long start;
1724 size = vma->vm_end - vma->vm_start;
1727 if (po->pg_vec == NULL)
1729 if (size != po->pg_vec_len*po->pg_vec_pages*PAGE_SIZE)
1732 atomic_inc(&po->mapped);
1733 start = vma->vm_start;
1735 for (i=0; i<po->pg_vec_len; i++) {
1736 if (remap_page_range(vma, start, __pa(po->pg_vec[i]),
1737 po->pg_vec_pages*PAGE_SIZE,
1740 start += po->pg_vec_pages*PAGE_SIZE;
1742 vma->vm_ops = &packet_mmap_ops;
1752 #ifdef CONFIG_SOCK_PACKET
1753 struct proto_ops packet_ops_spkt = {
1754 .family = PF_PACKET,
1755 .owner = THIS_MODULE,
1756 .release = packet_release,
1757 .bind = packet_bind_spkt,
1758 .connect = sock_no_connect,
1759 .socketpair = sock_no_socketpair,
1760 .accept = sock_no_accept,
1761 .getname = packet_getname_spkt,
1762 .poll = datagram_poll,
1763 .ioctl = packet_ioctl,
1764 .listen = sock_no_listen,
1765 .shutdown = sock_no_shutdown,
1766 .setsockopt = sock_no_setsockopt,
1767 .getsockopt = sock_no_getsockopt,
1768 .sendmsg = packet_sendmsg_spkt,
1769 .recvmsg = packet_recvmsg,
1770 .mmap = sock_no_mmap,
1771 .sendpage = sock_no_sendpage,
1775 struct proto_ops packet_ops = {
1776 .family = PF_PACKET,
1777 .owner = THIS_MODULE,
1778 .release = packet_release,
1779 .bind = packet_bind,
1780 .connect = sock_no_connect,
1781 .socketpair = sock_no_socketpair,
1782 .accept = sock_no_accept,
1783 .getname = packet_getname,
1784 .poll = packet_poll,
1785 .ioctl = packet_ioctl,
1786 .listen = sock_no_listen,
1787 .shutdown = sock_no_shutdown,
1788 .setsockopt = packet_setsockopt,
1789 .getsockopt = packet_getsockopt,
1790 .sendmsg = packet_sendmsg,
1791 .recvmsg = packet_recvmsg,
1792 .mmap = packet_mmap,
1793 .sendpage = sock_no_sendpage,
1795 EXPORT_SYMBOL(packet_ops);
1797 struct net_proto_family packet_family_ops = {
1798 .family = PF_PACKET,
1799 .create = packet_create,
1800 .owner = THIS_MODULE,
1802 EXPORT_SYMBOL(packet_family_ops);
1804 static struct notifier_block packet_netdev_notifier = {
1805 .notifier_call =packet_notifier,
1808 #ifdef CONFIG_PROC_FS
1809 static inline struct sock *packet_seq_idx(loff_t off)
1812 struct hlist_node *node;
1814 sk_for_each(s, node, &packet_sklist) {
1821 static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
1823 read_lock(&packet_sklist_lock);
1824 return *pos ? packet_seq_idx(*pos - 1) : SEQ_START_TOKEN;
1827 static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1830 return (v == SEQ_START_TOKEN)
1831 ? sk_head(&packet_sklist)
1832 : sk_next((struct sock*)v) ;
1835 static void packet_seq_stop(struct seq_file *seq, void *v)
1837 read_unlock(&packet_sklist_lock);
1840 static int packet_seq_show(struct seq_file *seq, void *v)
1842 if (v == SEQ_START_TOKEN)
1843 seq_puts(seq, "sk RefCnt Type Proto Iface R Rmem User Inode\n");
1846 const struct packet_opt *po = pkt_sk(s);
1849 "%p %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n",
1851 atomic_read(&s->sk_refcnt),
1856 atomic_read(&s->sk_rmem_alloc),
1864 static struct seq_operations packet_seq_ops = {
1865 .start = packet_seq_start,
1866 .next = packet_seq_next,
1867 .stop = packet_seq_stop,
1868 .show = packet_seq_show,
1871 static int packet_seq_open(struct inode *inode, struct file *file)
1873 return seq_open(file, &packet_seq_ops);
1876 static struct file_operations packet_seq_fops = {
1877 .owner = THIS_MODULE,
1878 .open = packet_seq_open,
1880 .llseek = seq_lseek,
1881 .release = seq_release,
1886 static void __exit packet_exit(void)
1888 proc_net_remove("packet");
1889 unregister_netdevice_notifier(&packet_netdev_notifier);
1890 sock_unregister(PF_PACKET);
1894 static int __init packet_init(void)
1896 sock_register(&packet_family_ops);
1897 register_netdevice_notifier(&packet_netdev_notifier);
1898 proc_net_fops_create("packet", 0, &packet_seq_fops);
1903 module_init(packet_init);
1904 module_exit(packet_exit);
1905 MODULE_LICENSE("GPL");
1906 MODULE_ALIAS_NETPROTO(PF_PACKET);