*
* Version: $Id: af_packet.c,v 1.61 2002/02/08 03:57:19 davem Exp $
*
- * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
+ * Authors: Ross Biro
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
* Alan Cox, <gw4pts@gw4pts.ampr.org>
*
* Michal Ostrowski : Module initialization cleanup.
* Ulises Alonso : Frame number limit removal and
* packet_set_ring memory leak.
+ * Eric Biederman : Allow for > 8 byte hardware addresses.
+ * The convention is that longer addresses
+ * will simply extend the hardware address
+ * byte arrays at the end of sockaddr_ll
+ * and packet_mreq.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
*
*/
-#include <linux/config.h>
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/mm.h>
+#include <linux/capability.h>
#include <linux/fcntl.h>
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/netdevice.h>
#include <linux/if_packet.h>
#include <linux/wireless.h>
+#include <linux/kernel.h>
#include <linux/kmod.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/ioctls.h>
+#include <asm/page.h>
+#include <asm/cacheflush.h>
+#include <asm/io.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/poll.h>
*/
/* List of all packet sockets. */
-HLIST_HEAD(packet_sklist);
-static rwlock_t packet_sklist_lock = RW_LOCK_UNLOCKED;
+static HLIST_HEAD(packet_sklist);
+static DEFINE_RWLOCK(packet_sklist_lock);
-atomic_t packet_socks_nr;
+static atomic_t packet_socks_nr;
/* Private packet socket structures. */
int count;
unsigned short type;
unsigned short alen;
- unsigned char addr[8];
+ unsigned char addr[MAX_ADDR_LEN];
+};
+/* identical to struct packet_mreq except it has
+ * a longer address field.
+ */
+struct packet_mreq_max
+{
+ int mr_ifindex;
+ unsigned short mr_type;
+ unsigned short mr_alen;
+ unsigned char mr_address[MAX_ADDR_LEN];
};
#endif
#ifdef CONFIG_PACKET_MMAP
static void packet_flush_mclist(struct sock *sk);
-struct packet_opt
-{
+struct packet_sock {
+ /* struct sock has to be the first member of packet_sock */
+ struct sock sk;
struct tpacket_stats stats;
#ifdef CONFIG_PACKET_MMAP
- unsigned long *pg_vec;
+ char * *pg_vec;
unsigned int head;
unsigned int frames_per_block;
unsigned int frame_size;
#endif
struct packet_type prot_hook;
spinlock_t bind_lock;
- char running; /* prot_hook is attached*/
+ unsigned int running:1, /* prot_hook is attached*/
+ auxdata:1;
int ifindex; /* bound device */
- unsigned short num;
+ __be16 num;
#ifdef CONFIG_PACKET_MULTICAST
struct packet_mclist *mclist;
#endif
#endif
};
+struct packet_skb_cb {
+ unsigned int origlen;
+ union {
+ struct sockaddr_pkt pkt;
+ struct sockaddr_ll ll;
+ } sa;
+};
+
+#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb))
+
#ifdef CONFIG_PACKET_MMAP
-static inline unsigned long packet_lookup_frame(struct packet_opt *po, unsigned int position)
+static inline char *packet_lookup_frame(struct packet_sock *po, unsigned int position)
{
unsigned int pg_vec_pos, frame_offset;
- unsigned long frame;
+ char *frame;
pg_vec_pos = position / po->frames_per_block;
frame_offset = position % po->frames_per_block;
- frame = (unsigned long) (po->pg_vec[pg_vec_pos] + (frame_offset * po->frame_size));
+ frame = po->pg_vec[pg_vec_pos] + (frame_offset * po->frame_size);
return frame;
}
#endif
-#define pkt_sk(__sk) ((struct packet_opt *)(__sk)->sk_protinfo)
+static inline struct packet_sock *pkt_sk(struct sock *sk)
+{
+ return (struct packet_sock *)sk;
+}
-void packet_sock_destruct(struct sock *sk)
+static void packet_sock_destruct(struct sock *sk)
{
BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
return;
}
- if (pkt_sk(sk))
- kfree(pkt_sk(sk));
atomic_dec(&packet_socks_nr);
#ifdef PACKET_REFCNT_DEBUG
printk(KERN_DEBUG "PACKET socket %p is free, %d are alive\n", sk, atomic_read(&packet_socks_nr));
}
-extern struct proto_ops packet_ops;
+static const struct proto_ops packet_ops;
#ifdef CONFIG_SOCK_PACKET
-extern struct proto_ops packet_ops_spkt;
+static const struct proto_ops packet_ops_spkt;
-static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
+static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
{
struct sock *sk;
struct sockaddr_pkt *spkt;
dst_release(skb->dst);
skb->dst = NULL;
- spkt = (struct sockaddr_pkt*)skb->cb;
+ /* drop conntrack reference */
+ nf_reset(skb);
+
+ spkt = &PACKET_SKB_CB(skb)->sa.pkt;
skb_push(skb, skb->data-skb->mac.raw);
struct sockaddr_pkt *saddr=(struct sockaddr_pkt *)msg->msg_name;
struct sk_buff *skb;
struct net_device *dev;
- unsigned short proto=0;
+ __be16 proto=0;
int err;
/*
if (dev == NULL)
goto out_unlock;
+ err = -ENETDOWN;
+ if (!(dev->flags & IFF_UP))
+ goto out_unlock;
+
/*
* You may not queue a frame bigger than the mtu. This is the lowest level
* raw protocol and you must do your own fragmentation at this level.
*/
err = -EMSGSIZE;
- if(len>dev->mtu+dev->hard_header_len)
+ if (len > dev->mtu + dev->hard_header_len)
goto out_unlock;
err = -ENOBUFS;
if (err)
goto out_free;
- err = -ENETDOWN;
- if (!(dev->flags & IFF_UP))
- goto out_free;
-
/*
* Now send it
*/
}
#endif
-static inline unsigned run_filter(struct sk_buff *skb, struct sock *sk, unsigned res)
+static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
+ unsigned int res)
{
struct sk_filter *filter;
- bh_lock_sock(sk);
- filter = sk->sk_filter;
- /*
- * Our caller already checked that filter != NULL but we need to
- * verify that under bh_lock_sock() to be safe
- */
- if (likely(filter != NULL))
+ rcu_read_lock_bh();
+ filter = rcu_dereference(sk->sk_filter);
+ if (filter != NULL)
res = sk_run_filter(skb, filter->insns, filter->len);
- bh_unlock_sock(sk);
+ rcu_read_unlock_bh();
return res;
}
we will not harm anyone.
*/
-static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
+static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
{
struct sock *sk;
struct sockaddr_ll *sll;
- struct packet_opt *po;
+ struct packet_sock *po;
u8 * skb_head = skb->data;
int skb_len = skb->len;
- unsigned snaplen;
+ unsigned int snaplen, res;
if (skb->pkt_type == PACKET_LOOPBACK)
goto drop;
snaplen = skb->len;
- if (sk->sk_filter) {
- unsigned res = run_filter(skb, sk, snaplen);
- if (res == 0)
- goto drop_n_restore;
- if (snaplen > res)
- snaplen = res;
- }
+ res = run_filter(skb, sk, snaplen);
+ if (!res)
+ goto drop_n_restore;
+ if (snaplen > res)
+ snaplen = res;
if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
(unsigned)sk->sk_rcvbuf)
skb = nskb;
}
- sll = (struct sockaddr_ll*)skb->cb;
+ BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
+ sizeof(skb->cb));
+
+ sll = &PACKET_SKB_CB(skb)->sa.ll;
sll->sll_family = AF_PACKET;
sll->sll_hatype = dev->type;
sll->sll_protocol = skb->protocol;
if (dev->hard_header_parse)
sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
+ PACKET_SKB_CB(skb)->origlen = skb->len;
+
if (pskb_trim(skb, snaplen))
goto drop_n_acct;
dst_release(skb->dst);
skb->dst = NULL;
+ /* drop conntrack reference */
+ nf_reset(skb);
+
spin_lock(&sk->sk_receive_queue.lock);
po->stats.tp_packets++;
__skb_queue_tail(&sk->sk_receive_queue, skb);
}
#ifdef CONFIG_PACKET_MMAP
-static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
+static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
{
struct sock *sk;
- struct packet_opt *po;
+ struct packet_sock *po;
struct sockaddr_ll *sll;
struct tpacket_hdr *h;
u8 * skb_head = skb->data;
int skb_len = skb->len;
- unsigned snaplen;
+ unsigned int snaplen, res;
unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
unsigned short macoff, netoff;
struct sk_buff *copy_skb = NULL;
else if (skb->pkt_type == PACKET_OUTGOING) {
/* Special case: outgoing packets have ll header at head */
skb_pull(skb, skb->nh.raw - skb->data);
- if (skb->ip_summed == CHECKSUM_HW)
- status |= TP_STATUS_CSUMNOTREADY;
}
}
+ if (skb->ip_summed == CHECKSUM_PARTIAL)
+ status |= TP_STATUS_CSUMNOTREADY;
+
snaplen = skb->len;
- if (sk->sk_filter) {
- unsigned res = run_filter(skb, sk, snaplen);
- if (res == 0)
- goto drop_n_restore;
- if (snaplen > res)
- snaplen = res;
- }
+ res = run_filter(skb, sk, snaplen);
+ if (!res)
+ goto drop_n_restore;
+ if (snaplen > res)
+ snaplen = res;
if (sk->sk_type == SOCK_DGRAM) {
macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
if ((int)snaplen < 0)
snaplen = 0;
}
- if (snaplen > skb->len-skb->data_len)
- snaplen = skb->len-skb->data_len;
spin_lock(&sk->sk_receive_queue.lock);
h = (struct tpacket_hdr *)packet_lookup_frame(po, po->head);
status &= ~TP_STATUS_LOSING;
spin_unlock(&sk->sk_receive_queue.lock);
- memcpy((u8*)h + macoff, skb->data, snaplen);
+ skb_copy_bits(skb, 0, (u8*)h + macoff, snaplen);
h->tp_len = skb->len;
h->tp_snaplen = snaplen;
h->tp_mac = macoff;
h->tp_net = netoff;
- if (skb->stamp.tv_sec == 0) {
- do_gettimeofday(&skb->stamp);
+ if (skb->tstamp.off_sec == 0) {
+ __net_timestamp(skb);
sock_enable_timestamp(sk);
}
- h->tp_sec = skb->stamp.tv_sec;
- h->tp_usec = skb->stamp.tv_usec;
+ h->tp_sec = skb->tstamp.off_sec;
+ h->tp_usec = skb->tstamp.off_usec;
sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h)));
sll->sll_halen = 0;
sll->sll_ifindex = dev->ifindex;
h->tp_status = status;
- mb();
+ smp_mb();
{
struct page *p_start, *p_end;
struct sockaddr_ll *saddr=(struct sockaddr_ll *)msg->msg_name;
struct sk_buff *skb;
struct net_device *dev;
- unsigned short proto;
+ __be16 proto;
unsigned char *addr;
int ifindex, err, reserve = 0;
*/
if (saddr == NULL) {
- struct packet_opt *po = pkt_sk(sk);
+ struct packet_sock *po = pkt_sk(sk);
ifindex = po->ifindex;
proto = po->num;
err = -EINVAL;
if (msg->msg_namelen < sizeof(struct sockaddr_ll))
goto out;
+ if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
+ goto out;
ifindex = saddr->sll_ifindex;
proto = saddr->sll_protocol;
addr = saddr->sll_addr;
if (sock->type == SOCK_RAW)
reserve = dev->hard_header_len;
+ err = -ENETDOWN;
+ if (!(dev->flags & IFF_UP))
+ goto out_unlock;
+
err = -EMSGSIZE;
if (len > dev->mtu+reserve)
goto out_unlock;
skb->dev = dev;
skb->priority = sk->sk_priority;
- err = -ENETDOWN;
- if (!(dev->flags & IFF_UP))
- goto out_free;
-
/*
* Now send it
*/
static int packet_release(struct socket *sock)
{
struct sock *sk = sock->sk;
- struct packet_opt *po = pkt_sk(sk);
+ struct packet_sock *po;
if (!sk)
return 0;
+ po = pkt_sk(sk);
+
write_lock_bh(&packet_sklist_lock);
sk_del_node_init(sk);
write_unlock_bh(&packet_sklist_lock);
* Attach a packet hook.
*/
-static int packet_do_bind(struct sock *sk, struct net_device *dev, int protocol)
+static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol)
{
- struct packet_opt *po = pkt_sk(sk);
+ struct packet_sock *po = pkt_sk(sk);
/*
* Detach an existing hook if present.
*/
* Check legality
*/
- if(addr_len!=sizeof(struct sockaddr))
+ if (addr_len != sizeof(struct sockaddr))
return -EINVAL;
strlcpy(name,uaddr->sa_data,sizeof(name));
return err;
}
+static struct proto packet_proto = {
+ .name = "PACKET",
+ .owner = THIS_MODULE,
+ .obj_size = sizeof(struct packet_sock),
+};
/*
* Create a packet of type SOCK_PACKET.
static int packet_create(struct socket *sock, int protocol)
{
struct sock *sk;
- struct packet_opt *po;
+ struct packet_sock *po;
+ __be16 proto = (__force __be16)protocol; /* weird, but documented */
int err;
if (!capable(CAP_NET_RAW))
sock->state = SS_UNCONNECTED;
err = -ENOBUFS;
- sk = sk_alloc(PF_PACKET, GFP_KERNEL, 1, NULL);
+ sk = sk_alloc(PF_PACKET, GFP_KERNEL, &packet_proto, 1);
if (sk == NULL)
goto out;
if (sock->type == SOCK_PACKET)
sock->ops = &packet_ops_spkt;
#endif
- sock_init_data(sock,sk);
- sk_set_owner(sk, THIS_MODULE);
+ sock_init_data(sock, sk);
- po = sk->sk_protinfo = kmalloc(sizeof(*po), GFP_KERNEL);
- if (!po)
- goto out_free;
- memset(po, 0, sizeof(*po));
+ po = pkt_sk(sk);
sk->sk_family = PF_PACKET;
- po->num = protocol;
+ po->num = proto;
sk->sk_destruct = packet_sock_destruct;
atomic_inc(&packet_socks_nr);
#endif
po->prot_hook.af_packet_priv = sk;
- if (protocol) {
- po->prot_hook.type = protocol;
+ if (proto) {
+ po->prot_hook.type = proto;
dev_add_pack(&po->prot_hook);
sock_hold(sk);
po->running = 1;
sk_add_node(sk, &packet_sklist);
write_unlock_bh(&packet_sklist_lock);
return(0);
-
-out_free:
- sk_free(sk);
out:
return err;
}
struct sock *sk = sock->sk;
struct sk_buff *skb;
int copied, err;
+ struct sockaddr_ll *sll;
err = -EINVAL;
- if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC))
+ if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
goto out;
#if 0
return -ENODEV;
#endif
- /*
- * If the address length field is there to be filled in, we fill
- * it in now.
- */
-
- if (sock->type == SOCK_PACKET)
- msg->msg_namelen = sizeof(struct sockaddr_pkt);
- else
- msg->msg_namelen = sizeof(struct sockaddr_ll);
-
/*
* Call the generic datagram receiver. This handles all sorts
* of horrible races and re-entrancy so we can forget about it
* retries.
*/
- if(skb==NULL)
+ if (skb == NULL)
goto out;
+ /*
+ * If the address length field is there to be filled in, we fill
+ * it in now.
+ */
+
+ sll = &PACKET_SKB_CB(skb)->sa.ll;
+ if (sock->type == SOCK_PACKET)
+ msg->msg_namelen = sizeof(struct sockaddr_pkt);
+ else
+ msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr);
+
/*
* You lose any data beyond the buffer you gave. If it worries a
* user program they can ask the device for its MTU anyway.
sock_recv_timestamp(msg, sk, skb);
if (msg->msg_name)
- memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
+ memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
+ msg->msg_namelen);
+
+ if (pkt_sk(sk)->auxdata) {
+ struct tpacket_auxdata aux;
+
+ aux.tp_status = TP_STATUS_USER;
+ if (skb->ip_summed == CHECKSUM_PARTIAL)
+ aux.tp_status |= TP_STATUS_CSUMNOTREADY;
+ aux.tp_len = PACKET_SKB_CB(skb)->origlen;
+ aux.tp_snaplen = skb->len;
+ aux.tp_mac = 0;
+ aux.tp_net = skb->nh.raw - skb->data;
+
+ put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
+ }
/*
* Free or return the buffer as appropriate. Again this
{
struct net_device *dev;
struct sock *sk = sock->sk;
- struct packet_opt *po = pkt_sk(sk);
+ struct packet_sock *po = pkt_sk(sk);
struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
if (peer)
sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */
sll->sll_halen = 0;
}
- *uaddr_len = sizeof(*sll);
+ *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
return 0;
}
}
}
-static int packet_mc_add(struct sock *sk, struct packet_mreq *mreq)
+static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
{
- struct packet_opt *po = pkt_sk(sk);
+ struct packet_sock *po = pkt_sk(sk);
struct packet_mclist *ml, *i;
struct net_device *dev;
int err;
goto done;
err = -ENOBUFS;
- i = (struct packet_mclist *)kmalloc(sizeof(*i), GFP_KERNEL);
+ i = kmalloc(sizeof(*i), GFP_KERNEL);
if (i == NULL)
goto done;
return err;
}
-static int packet_mc_drop(struct sock *sk, struct packet_mreq *mreq)
+static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
{
struct packet_mclist *ml, **mlp;
static void packet_flush_mclist(struct sock *sk)
{
- struct packet_opt *po = pkt_sk(sk);
+ struct packet_sock *po = pkt_sk(sk);
struct packet_mclist *ml;
if (!po->mclist)
#endif
static int
-packet_setsockopt(struct socket *sock, int level, int optname, char *optval, int optlen)
+packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen)
{
struct sock *sk = sock->sk;
+ struct packet_sock *po = pkt_sk(sk);
int ret;
if (level != SOL_PACKET)
case PACKET_ADD_MEMBERSHIP:
case PACKET_DROP_MEMBERSHIP:
{
- struct packet_mreq mreq;
- if (optlen<sizeof(mreq))
+ struct packet_mreq_max mreq;
+ int len = optlen;
+ memset(&mreq, 0, sizeof(mreq));
+ if (len < sizeof(struct packet_mreq))
return -EINVAL;
- if (copy_from_user(&mreq,optval,sizeof(mreq)))
+ if (len > sizeof(mreq))
+ len = sizeof(mreq);
+ if (copy_from_user(&mreq,optval,len))
return -EFAULT;
+ if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
+ return -EINVAL;
if (optname == PACKET_ADD_MEMBERSHIP)
ret = packet_mc_add(sk, &mreq);
else
return 0;
}
#endif
+ case PACKET_AUXDATA:
+ {
+ int val;
+
+ if (optlen < sizeof(val))
+ return -EINVAL;
+ if (copy_from_user(&val, optval, sizeof(val)))
+ return -EFAULT;
+
+ po->auxdata = !!val;
+ return 0;
+ }
default:
return -ENOPROTOOPT;
}
}
-int packet_getsockopt(struct socket *sock, int level, int optname,
- char *optval, int *optlen)
+static int packet_getsockopt(struct socket *sock, int level, int optname,
+ char __user *optval, int __user *optlen)
{
int len;
+ int val;
struct sock *sk = sock->sk;
- struct packet_opt *po = pkt_sk(sk);
+ struct packet_sock *po = pkt_sk(sk);
+ void *data;
+ struct tpacket_stats st;
if (level != SOL_PACKET)
return -ENOPROTOOPT;
- if (get_user(len,optlen))
- return -EFAULT;
+ if (get_user(len, optlen))
+ return -EFAULT;
if (len < 0)
return -EINVAL;
switch(optname) {
case PACKET_STATISTICS:
- {
- struct tpacket_stats st;
-
if (len > sizeof(struct tpacket_stats))
len = sizeof(struct tpacket_stats);
spin_lock_bh(&sk->sk_receive_queue.lock);
spin_unlock_bh(&sk->sk_receive_queue.lock);
st.tp_packets += st.tp_drops;
- if (copy_to_user(optval, &st, len))
- return -EFAULT;
+ data = &st;
+ break;
+ case PACKET_AUXDATA:
+ if (len > sizeof(int))
+ len = sizeof(int);
+ val = po->auxdata;
+
+ data = &val;
break;
- }
default:
return -ENOPROTOOPT;
}
- if (put_user(len, optlen))
- return -EFAULT;
- return 0;
+ if (put_user(len, optlen))
+ return -EFAULT;
+ if (copy_to_user(optval, data, len))
+ return -EFAULT;
+ return 0;
}
read_lock(&packet_sklist_lock);
sk_for_each(sk, node, &packet_sklist) {
- struct packet_opt *po = pkt_sk(sk);
+ struct packet_sock *po = pkt_sk(sk);
switch (msg) {
case NETDEV_UNREGISTER:
case SIOCOUTQ:
{
int amount = atomic_read(&sk->sk_wmem_alloc);
- return put_user(amount, (int *)arg);
+ return put_user(amount, (int __user *)arg);
}
case SIOCINQ:
{
if (skb)
amount = skb->len;
spin_unlock_bh(&sk->sk_receive_queue.lock);
- return put_user(amount, (int *)arg);
+ return put_user(amount, (int __user *)arg);
}
case SIOCGSTAMP:
- return sock_get_timestamp(sk, (struct timeval *)arg);
+ return sock_get_timestamp(sk, (struct timeval __user *)arg);
#ifdef CONFIG_INET
case SIOCADDRT:
#endif
default:
- return dev_ioctl(cmd, (void *)arg);
+ return -ENOIOCTLCMD;
}
return 0;
}
#define packet_poll datagram_poll
#else
-unsigned int packet_poll(struct file * file, struct socket *sock, poll_table *wait)
+static unsigned int packet_poll(struct file * file, struct socket *sock,
+ poll_table *wait)
{
struct sock *sk = sock->sk;
- struct packet_opt *po = pkt_sk(sk);
+ struct packet_sock *po = pkt_sk(sk);
unsigned int mask = datagram_poll(file, sock, wait);
spin_lock_bh(&sk->sk_receive_queue.lock);
static void packet_mm_open(struct vm_area_struct *vma)
{
struct file *file = vma->vm_file;
- struct inode *inode = file->f_dentry->d_inode;
- struct socket * sock = SOCKET_I(inode);
+ struct socket * sock = file->private_data;
struct sock *sk = sock->sk;
if (sk)
static void packet_mm_close(struct vm_area_struct *vma)
{
struct file *file = vma->vm_file;
- struct inode *inode = file->f_dentry->d_inode;
- struct socket * sock = SOCKET_I(inode);
+ struct socket * sock = file->private_data;
struct sock *sk = sock->sk;
if (sk)
.close =packet_mm_close,
};
-static void free_pg_vec(unsigned long *pg_vec, unsigned order, unsigned len)
+static inline struct page *pg_vec_endpage(char *one_pg_vec, unsigned int order)
{
- int i;
+ return virt_to_page(one_pg_vec + (PAGE_SIZE << order) - 1);
+}
- for (i=0; i<len; i++) {
- if (pg_vec[i]) {
- struct page *page, *pend;
+static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len)
+{
+ int i;
- pend = virt_to_page(pg_vec[i] + (PAGE_SIZE << order) - 1);
- for (page = virt_to_page(pg_vec[i]); page <= pend; page++)
- ClearPageReserved(page);
- free_pages(pg_vec[i], order);
- }
+ for (i = 0; i < len; i++) {
+ if (likely(pg_vec[i]))
+ free_pages((unsigned long) pg_vec[i], order);
}
kfree(pg_vec);
}
+static inline char *alloc_one_pg_vec_page(unsigned long order)
+{
+ return (char *) __get_free_pages(GFP_KERNEL | __GFP_COMP | __GFP_ZERO,
+ order);
+}
+
+static char **alloc_pg_vec(struct tpacket_req *req, int order)
+{
+ unsigned int block_nr = req->tp_block_nr;
+ char **pg_vec;
+ int i;
+
+ pg_vec = kzalloc(block_nr * sizeof(char *), GFP_KERNEL);
+ if (unlikely(!pg_vec))
+ goto out;
+
+ for (i = 0; i < block_nr; i++) {
+ pg_vec[i] = alloc_one_pg_vec_page(order);
+ if (unlikely(!pg_vec[i]))
+ goto out_free_pgvec;
+ }
+
+out:
+ return pg_vec;
+
+out_free_pgvec:
+ free_pg_vec(pg_vec, order, block_nr);
+ pg_vec = NULL;
+ goto out;
+}
static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing)
{
- unsigned long *pg_vec = NULL;
- struct packet_opt *po = pkt_sk(sk);
- int was_running, num, order = 0;
+ char **pg_vec = NULL;
+ struct packet_sock *po = pkt_sk(sk);
+ int was_running, order = 0;
+ __be16 num;
int err = 0;
if (req->tp_block_nr) {
/* Sanity tests and some calculations */
- if (po->pg_vec)
+ if (unlikely(po->pg_vec))
return -EBUSY;
- if ((int)req->tp_block_size <= 0)
+ if (unlikely((int)req->tp_block_size <= 0))
return -EINVAL;
- if (req->tp_block_size&(PAGE_SIZE-1))
+ if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
return -EINVAL;
- if (req->tp_frame_size < TPACKET_HDRLEN)
+ if (unlikely(req->tp_frame_size < TPACKET_HDRLEN))
return -EINVAL;
- if (req->tp_frame_size&(TPACKET_ALIGNMENT-1))
+ if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
return -EINVAL;
po->frames_per_block = req->tp_block_size/req->tp_frame_size;
- if (po->frames_per_block <= 0)
+ if (unlikely(po->frames_per_block <= 0))
return -EINVAL;
- if (po->frames_per_block*req->tp_block_nr != req->tp_frame_nr)
+ if (unlikely((po->frames_per_block * req->tp_block_nr) !=
+ req->tp_frame_nr))
return -EINVAL;
- /* OK! */
-
- /* Allocate page vector */
- while ((PAGE_SIZE<<order) < req->tp_block_size)
- order++;
err = -ENOMEM;
-
- pg_vec = kmalloc(req->tp_block_nr*sizeof(unsigned long*), GFP_KERNEL);
- if (pg_vec == NULL)
+ order = get_order(req->tp_block_size);
+ pg_vec = alloc_pg_vec(req, order);
+ if (unlikely(!pg_vec))
goto out;
- memset(pg_vec, 0, req->tp_block_nr*sizeof(unsigned long*));
-
- for (i=0; i<req->tp_block_nr; i++) {
- struct page *page, *pend;
- pg_vec[i] = __get_free_pages(GFP_KERNEL, order);
- if (!pg_vec[i])
- goto out_free_pgvec;
-
- pend = virt_to_page(pg_vec[i] + (PAGE_SIZE << order) - 1);
- for (page = virt_to_page(pg_vec[i]); page <= pend; page++)
- SetPageReserved(page);
- }
- /* Page vector is allocated */
l = 0;
- for (i=0; i<req->tp_block_nr; i++) {
- unsigned long ptr = pg_vec[i];
+ for (i = 0; i < req->tp_block_nr; i++) {
+ char *ptr = pg_vec[i];
struct tpacket_hdr *header;
int k;
- for (k=0; k<po->frames_per_block; k++) {
-
- header = (struct tpacket_hdr*)ptr;
+ for (k = 0; k < po->frames_per_block; k++) {
+ header = (struct tpacket_hdr *) ptr;
header->tp_status = TP_STATUS_KERNEL;
ptr += req->tp_frame_size;
}
}
/* Done */
} else {
- if (req->tp_frame_nr)
+ if (unlikely(req->tp_frame_nr))
return -EINVAL;
}
spin_lock_bh(&sk->sk_receive_queue.lock);
pg_vec = XC(po->pg_vec, pg_vec);
- po->frame_max = req->tp_frame_nr-1;
+ po->frame_max = (req->tp_frame_nr - 1);
po->head = 0;
po->frame_size = req->tp_frame_size;
spin_unlock_bh(&sk->sk_receive_queue.lock);
release_sock(sk);
-out_free_pgvec:
if (pg_vec)
free_pg_vec(pg_vec, order, req->tp_block_nr);
out:
static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
{
struct sock *sk = sock->sk;
- struct packet_opt *po = pkt_sk(sk);
+ struct packet_sock *po = pkt_sk(sk);
unsigned long size;
unsigned long start;
int err = -EINVAL;
if (size != po->pg_vec_len*po->pg_vec_pages*PAGE_SIZE)
goto out;
- atomic_inc(&po->mapped);
start = vma->vm_start;
- err = -EAGAIN;
- for (i=0; i<po->pg_vec_len; i++) {
- if (remap_page_range(vma, start, __pa(po->pg_vec[i]),
- po->pg_vec_pages*PAGE_SIZE,
- vma->vm_page_prot))
- goto out;
- start += po->pg_vec_pages*PAGE_SIZE;
+ for (i = 0; i < po->pg_vec_len; i++) {
+ struct page *page = virt_to_page(po->pg_vec[i]);
+ int pg_num;
+
+ for (pg_num = 0; pg_num < po->pg_vec_pages; pg_num++, page++) {
+ err = vm_insert_page(vma, start, page);
+ if (unlikely(err))
+ goto out;
+ start += PAGE_SIZE;
+ }
}
+ atomic_inc(&po->mapped);
vma->vm_ops = &packet_mmap_ops;
err = 0;
#ifdef CONFIG_SOCK_PACKET
-struct proto_ops packet_ops_spkt = {
+static const struct proto_ops packet_ops_spkt = {
.family = PF_PACKET,
.owner = THIS_MODULE,
.release = packet_release,
};
#endif
-struct proto_ops packet_ops = {
+static const struct proto_ops packet_ops = {
.family = PF_PACKET,
.owner = THIS_MODULE,
.release = packet_release,
seq_puts(seq, "sk RefCnt Type Proto Iface R Rmem User Inode\n");
else {
struct sock *s = v;
- const struct packet_opt *po = pkt_sk(s);
+ const struct packet_sock *po = pkt_sk(s);
seq_printf(seq,
"%p %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n",
proc_net_remove("packet");
unregister_netdevice_notifier(&packet_netdev_notifier);
sock_unregister(PF_PACKET);
- return;
+ proto_unregister(&packet_proto);
}
static int __init packet_init(void)
{
+ int rc = proto_register(&packet_proto, 0);
+
+ if (rc != 0)
+ goto out;
+
sock_register(&packet_family_ops);
register_netdevice_notifier(&packet_netdev_notifier);
proc_net_fops_create("packet", 0, &packet_seq_fops);
-
- return 0;
+out:
+ return rc;
}
module_init(packet_init);