vserver 1.9.5.x5
[linux-2.6.git] / drivers / net / tun.c
index d0fe0f6..42c4e80 100644 (file)
  */
 
 /*
+ *  Changes:
+ *
+ *  Mark Smith <markzzzsmith@yahoo.com.au>
+ *   Use random_ether_addr() for tap MAC address.
+ *
+ *  Harald Roelle <harald.roelle@ifi.lmu.de>  2004/04/20
+ *    Fixes in packet dropping, queue length setting and queue wakeup.
+ *    Increased default tx queue length.
+ *    Added ethtool API.
+ *    Minor cleanups
+ *
  *  Daniel Podlejski <underley@underley.eu.org>
  *    Modifications for 2.3.99-pre5 kernel.
  */
 
-#define TUN_VER "1.5"
+#define DRV_NAME       "tun"
+#define DRV_VERSION    "1.6"
+#define DRV_DESCRIPTION        "Universal TUN/TAP device driver"
+#define DRV_COPYRIGHT  "(C) 1999-2004 Max Krasnyansky <maxk@qualcomm.com>"
 
 #include <linux/config.h>
 #include <linux/module.h>
 #include <linux/poll.h>
 #include <linux/fcntl.h>
 #include <linux/init.h>
-#include <linux/random.h>
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/miscdevice.h>
+#include <linux/ethtool.h>
 #include <linux/rtnetlink.h>
 #include <linux/if.h>
 #include <linux/if_arp.h>
 #include <linux/if_ether.h>
 #include <linux/if_tun.h>
+#include <linux/crc32.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
@@ -52,6 +67,7 @@ static int debug;
 /* Network device part of the driver */
 
 static LIST_HEAD(tun_dev_list);
+static struct ethtool_ops tun_ethtool_ops;
 
 /* Net device open. */
 static int tun_net_open(struct net_device *dev)
@@ -78,19 +94,26 @@ static int tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
        if (!tun->attached)
                goto drop;
 
-       /* Queue packet */
-       if (!(tun->flags & TUN_ONE_QUEUE)) {
-               /* Normal queueing mode.
-                * Packet scheduler handles dropping. */
-               if (skb_queue_len(&tun->readq) >= TUN_READQ_SIZE)
+       /* Packet dropping */
+       if (skb_queue_len(&tun->readq) >= dev->tx_queue_len) {
+               if (!(tun->flags & TUN_ONE_QUEUE)) {
+                       /* Normal queueing mode. */
+                       /* Packet scheduler handles dropping of further packets. */
                        netif_stop_queue(dev);
-       } else {
-               /* Single queue mode.
-                * Driver handles dropping itself. */
-               if (skb_queue_len(&tun->readq) >= dev->tx_queue_len)
+
+                       /* We won't see all dropped packets individually, so overrun
+                        * error is more appropriate. */
+                       tun->stats.tx_fifo_errors++;
+               } else {
+                       /* Single queue mode.
+                        * Driver handles dropping of all packets itself. */
                        goto drop;
+               }
        }
+
+       /* Queue packet */
        skb_queue_tail(&tun->readq, skb);
+       dev->trans_start = jiffies;
 
        /* Notify and wake up reader process */
        if (tun->flags & TUN_FASYNC)
@@ -104,11 +127,42 @@ drop:
        return 0;
 }
 
-static void tun_net_mclist(struct net_device *dev)
+/** Add the specified Ethernet address to this multicast filter. */
+static void
+add_multi(u32* filter, const u8* addr)
+{
+       int bit_nr = ether_crc(ETH_ALEN, addr) >> 26;
+       filter[bit_nr >> 5] |= 1 << (bit_nr & 31);
+}
+
+/** Remove the specified Ethernet addres from this multicast filter. */
+static void
+del_multi(u32* filter, const u8* addr)
 {
-       /* Nothing to do for multicast filters. 
-        * We always accept all frames. */
-       return;
+       int bit_nr = ether_crc(ETH_ALEN, addr) >> 26;
+       filter[bit_nr >> 5] &= ~(1 << (bit_nr & 31));
+}
+
+/** Update the list of multicast groups to which the network device belongs.
+ * This list is used to filter packets being sent from the character device to
+ * the network device. */
+static void
+tun_net_mclist(struct net_device *dev)
+{
+       struct tun_struct *tun = netdev_priv(dev);
+       const struct dev_mc_list *mclist;
+       int i;
+       DBG(KERN_DEBUG "%s: tun_net_mclist: mc_count %d\n",
+                       dev->name, dev->mc_count);
+       memset(tun->chr_filter, 0, sizeof tun->chr_filter);
+       for (i = 0, mclist = dev->mc_list; i < dev->mc_count && mclist != NULL;
+                       i++, mclist = mclist->next) {
+               add_multi(tun->net_filter, mclist->dmi_addr);
+               DBG(KERN_DEBUG "%s: tun_net_mclist: %x:%x:%x:%x:%x:%x\n",
+                               dev->name,
+                               mclist->dmi_addr[0], mclist->dmi_addr[1], mclist->dmi_addr[2],
+                               mclist->dmi_addr[3], mclist->dmi_addr[4], mclist->dmi_addr[5]);
+       }
 }
 
 static struct net_device_stats *tun_net_stats(struct net_device *dev)
@@ -132,18 +186,16 @@ static void tun_net_init(struct net_device *dev)
                /* Zero header length */
                dev->type = ARPHRD_NONE; 
                dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
-               dev->tx_queue_len = 10;
+               dev->tx_queue_len = TUN_READQ_SIZE;  /* We prefer our own queue length */
                break;
 
        case TUN_TAP_DEV:
                /* Ethernet TAP Device */
                dev->set_multicast_list = tun_net_mclist;
 
-               /* Generate random Ethernet address.  */
-               *(u16 *)dev->dev_addr = htons(0x00FF);
-               get_random_bytes(dev->dev_addr + sizeof(u16), 4);
-
                ether_setup(dev);
+               random_ether_addr(dev->dev_addr);
+               dev->tx_queue_len = TUN_READQ_SIZE;  /* We prefer our own queue length */
                break;
        }
 }
@@ -177,7 +229,7 @@ static __inline__ ssize_t tun_get_user(struct tun_struct *tun, struct iovec *iv,
        size_t len = count;
 
        if (!(tun->flags & TUN_NO_PI)) {
-               if ((len -= sizeof(pi)) > len)
+               if ((len -= sizeof(pi)) > count)
                        return -EINVAL;
 
                if(memcpy_fromiovec((void *)&pi, iv, sizeof(pi)))
@@ -208,6 +260,7 @@ static __inline__ ssize_t tun_get_user(struct tun_struct *tun, struct iovec *iv,
                skb->ip_summed = CHECKSUM_UNNECESSARY;
  
        netif_rx_ni(skb);
+       tun->dev->last_rx = jiffies;
    
        tun->stats.rx_packets++;
        tun->stats.rx_bytes += len;
@@ -301,6 +354,10 @@ static ssize_t tun_chr_readv(struct file *file, const struct iovec *iv,
 
        add_wait_queue(&tun->read_wait, &wait);
        while (len) {
+               const u8 ones[ ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+               u8 addr[ ETH_ALEN];
+               int bit_nr;
+
                current->state = TASK_INTERRUPTIBLE;
 
                /* Read frames from the queue */
@@ -318,12 +375,40 @@ static ssize_t tun_chr_readv(struct file *file, const struct iovec *iv,
                        schedule();
                        continue;
                }
-               netif_start_queue(tun->dev);
-
-               ret = tun_put_user(tun, skb, (struct iovec *) iv, len);
-
-               kfree_skb(skb);
-               break;
+               netif_wake_queue(tun->dev);
+
+               /** Decide whether to accept this packet. This code is designed to
+                * behave identically to an Ethernet interface. Accept the packet if
+                * - we are promiscuous.
+                * - the packet is addressed to us.
+                * - the packet is broadcast.
+                * - the packet is multicast and
+                *   - we are multicast promiscous.
+                *   - we belong to the multicast group.
+                */
+               memcpy(addr, skb->data,
+                      min_t(size_t, sizeof addr, skb->len));
+               bit_nr = ether_crc(sizeof addr, addr) >> 26;
+               if ((tun->if_flags & IFF_PROMISC) ||
+                               memcmp(addr, tun->dev_addr, sizeof addr) == 0 ||
+                               memcmp(addr, ones, sizeof addr) == 0 ||
+                               (((addr[0] == 1 && addr[1] == 0 && addr[2] == 0x5e) ||
+                                 (addr[0] == 0x33 && addr[1] == 0x33)) &&
+                                ((tun->if_flags & IFF_ALLMULTI) ||
+                                 (tun->chr_filter[bit_nr >> 5] & (1 << (bit_nr & 31)))))) {
+                       DBG(KERN_DEBUG "%s: tun_chr_readv: accepted: %x:%x:%x:%x:%x:%x\n",
+                                       tun->dev->name, addr[0], addr[1], addr[2],
+                                       addr[3], addr[4], addr[5]);
+                       ret = tun_put_user(tun, skb, (struct iovec *) iv, len);
+                       kfree_skb(skb);
+                       break;
+               } else {
+                       DBG(KERN_DEBUG "%s: tun_chr_readv: rejected: %x:%x:%x:%x:%x:%x\n",
+                                       tun->dev->name, addr[0], addr[1], addr[2],
+                                       addr[3], addr[4], addr[5]);
+                       kfree_skb(skb);
+                       continue;
+               }
        }
 
        current->state = TASK_RUNNING;
@@ -354,6 +439,7 @@ static void tun_setup(struct net_device *dev)
        dev->hard_start_xmit = tun_net_xmit;
        dev->stop = tun_net_close;
        dev->get_stats = tun_net_stats;
+       dev->ethtool_ops = &tun_ethtool_ops;
        dev->destructor = free_netdev;
 }
 
@@ -417,6 +503,12 @@ static int tun_set_iff(struct file *file, struct ifreq *ifr)
                tun = netdev_priv(dev);
                tun->dev = dev;
                tun->flags = flags;
+               /* Be promiscuous by default to maintain previous behaviour. */
+               tun->if_flags = IFF_PROMISC;
+               /* Generate random Ethernet address. */
+               *(u16 *)tun->dev_addr = htons(0x00FF);
+               get_random_bytes(tun->dev_addr + sizeof(u16), 4);
+               memset(tun->chr_filter, 0, sizeof tun->chr_filter);
 
                tun_net_init(dev);
 
@@ -457,13 +549,16 @@ static int tun_chr_ioctl(struct inode *inode, struct file *file,
                         unsigned int cmd, unsigned long arg)
 {
        struct tun_struct *tun = file->private_data;
+       void __user* argp = (void __user*)arg;
+       struct ifreq ifr;
+
+       if (cmd == TUNSETIFF || _IOC_TYPE(cmd) == 0x89)
+               if (copy_from_user(&ifr, argp, sizeof ifr))
+                       return -EFAULT;
 
        if (cmd == TUNSETIFF && !tun) {
-               struct ifreq ifr;
                int err;
 
-               if (copy_from_user(&ifr, (void __user *)arg, sizeof(ifr)))
-                       return -EFAULT;
                ifr.ifr_name[IFNAMSIZ-1] = '\0';
 
                rtnl_lock();
@@ -473,7 +568,7 @@ static int tun_chr_ioctl(struct inode *inode, struct file *file,
                if (err)
                        return err;
 
-               if (copy_to_user((void __user *)arg, &ifr, sizeof(ifr)))
+               if (copy_to_user(argp, &ifr, sizeof(ifr)))
                        return -EFAULT;
                return 0;
        }
@@ -519,6 +614,61 @@ static int tun_chr_ioctl(struct inode *inode, struct file *file,
                break;
 #endif
 
+       case SIOCGIFFLAGS:
+               ifr.ifr_flags = tun->if_flags;
+               if (copy_to_user( argp, &ifr, sizeof ifr))
+                       return -EFAULT;
+               return 0;
+
+       case SIOCSIFFLAGS:
+               /** Set the character device's interface flags. Currently only
+                * IFF_PROMISC and IFF_ALLMULTI are used. */
+               tun->if_flags = ifr.ifr_flags;
+               DBG(KERN_INFO "%s: interface flags 0x%lx\n",
+                               tun->dev->name, tun->if_flags);
+               return 0;
+
+       case SIOCGIFHWADDR:
+               memcpy(ifr.ifr_hwaddr.sa_data, tun->dev_addr,
+                               min(sizeof ifr.ifr_hwaddr.sa_data, sizeof tun->dev_addr));
+               if (copy_to_user( argp, &ifr, sizeof ifr))
+                       return -EFAULT;
+               return 0;
+
+       case SIOCSIFHWADDR:
+               /** Set the character device's hardware address. This is used when
+                * filtering packets being sent from the network device to the character
+                * device. */
+               memcpy(tun->dev_addr, ifr.ifr_hwaddr.sa_data,
+                               min(sizeof ifr.ifr_hwaddr.sa_data, sizeof tun->dev_addr));
+               DBG(KERN_DEBUG "%s: set hardware address: %x:%x:%x:%x:%x:%x\n",
+                               tun->dev->name,
+                               tun->dev_addr[0], tun->dev_addr[1], tun->dev_addr[2],
+                               tun->dev_addr[3], tun->dev_addr[4], tun->dev_addr[5]);
+               return 0;
+
+       case SIOCADDMULTI:
+               /** Add the specified group to the character device's multicast filter
+                * list. */
+               add_multi(tun->chr_filter, ifr.ifr_hwaddr.sa_data);
+               DBG(KERN_DEBUG "%s: add multi: %x:%x:%x:%x:%x:%x\n",
+                               tun->dev->name,
+                               (u8)ifr.ifr_hwaddr.sa_data[0], (u8)ifr.ifr_hwaddr.sa_data[1],
+                               (u8)ifr.ifr_hwaddr.sa_data[2], (u8)ifr.ifr_hwaddr.sa_data[3],
+                               (u8)ifr.ifr_hwaddr.sa_data[4], (u8)ifr.ifr_hwaddr.sa_data[5]);
+               return 0;
+
+       case SIOCDELMULTI:
+               /** Remove the specified group from the character device's multicast
+                * filter list. */
+               del_multi(tun->chr_filter, ifr.ifr_hwaddr.sa_data);
+               DBG(KERN_DEBUG "%s: del multi: %x:%x:%x:%x:%x:%x\n",
+                               tun->dev->name,
+                               (u8)ifr.ifr_hwaddr.sa_data[0], (u8)ifr.ifr_hwaddr.sa_data[1],
+                               (u8)ifr.ifr_hwaddr.sa_data[2], (u8)ifr.ifr_hwaddr.sa_data[3],
+                               (u8)ifr.ifr_hwaddr.sa_data[4], (u8)ifr.ifr_hwaddr.sa_data[5]);
+               return 0;
+
        default:
                return -EINVAL;
        };
@@ -608,12 +758,97 @@ static struct miscdevice tun_miscdev = {
        .devfs_name = "net/tun",
 };
 
+/* ethtool interface */
+
+static int tun_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+{
+       cmd->supported          = 0;
+       cmd->advertising        = 0;
+       cmd->speed              = SPEED_10;
+       cmd->duplex             = DUPLEX_FULL;
+       cmd->port               = PORT_TP;
+       cmd->phy_address        = 0;
+       cmd->transceiver        = XCVR_INTERNAL;
+       cmd->autoneg            = AUTONEG_DISABLE;
+       cmd->maxtxpkt           = 0;
+       cmd->maxrxpkt           = 0;
+       return 0;
+}
+
+static void tun_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
+{
+       struct tun_struct *tun = netdev_priv(dev);
+
+       strcpy(info->driver, DRV_NAME);
+       strcpy(info->version, DRV_VERSION);
+       strcpy(info->fw_version, "N/A");
+
+       switch (tun->flags & TUN_TYPE_MASK) {
+       case TUN_TUN_DEV:
+               strcpy(info->bus_info, "tun");
+               break;
+       case TUN_TAP_DEV:
+               strcpy(info->bus_info, "tap");
+               break;
+       }
+}
+
+static u32 tun_get_msglevel(struct net_device *dev)
+{
+#ifdef TUN_DEBUG
+       struct tun_struct *tun = netdev_priv(dev);
+       return tun->debug;
+#else
+       return -EOPNOTSUPP;
+#endif
+}
+
+static void tun_set_msglevel(struct net_device *dev, u32 value)
+{
+#ifdef TUN_DEBUG
+       struct tun_struct *tun = netdev_priv(dev);
+       tun->debug = value;
+#endif
+}
+
+static u32 tun_get_link(struct net_device *dev)
+{
+       struct tun_struct *tun = netdev_priv(dev);
+       return tun->attached;
+}
+
+static u32 tun_get_rx_csum(struct net_device *dev)
+{
+       struct tun_struct *tun = netdev_priv(dev);
+       return (tun->flags & TUN_NOCHECKSUM) == 0;
+}
+
+static int tun_set_rx_csum(struct net_device *dev, u32 data)
+{
+       struct tun_struct *tun = netdev_priv(dev);
+       if (data)
+               tun->flags &= ~TUN_NOCHECKSUM;
+       else
+               tun->flags |= TUN_NOCHECKSUM;
+       return 0;
+}
+
+static struct ethtool_ops tun_ethtool_ops = {
+       .get_settings   = tun_get_settings,
+       .get_drvinfo    = tun_get_drvinfo,
+       .get_msglevel   = tun_get_msglevel,
+       .set_msglevel   = tun_set_msglevel,
+       .get_link       = tun_get_link,
+       .get_rx_csum    = tun_get_rx_csum,
+       .set_rx_csum    = tun_set_rx_csum
+};
+
 int __init tun_init(void)
 {
        int ret = 0;
 
-       printk(KERN_INFO "Universal TUN/TAP device driver %s " 
-              "(C)1999-2002 Maxim Krasnyansky\n", TUN_VER);
+       printk(KERN_INFO "tun: %s, %s\n", DRV_DESCRIPTION, DRV_VERSION);
+       printk(KERN_INFO "tun: %s\n", DRV_COPYRIGHT);
 
        ret = misc_register(&tun_miscdev);
        if (ret)
@@ -638,5 +873,7 @@ void tun_cleanup(void)
 
 module_init(tun_init);
 module_exit(tun_cleanup);
+MODULE_DESCRIPTION(DRV_DESCRIPTION);
+MODULE_AUTHOR(DRV_COPYRIGHT);
 MODULE_LICENSE("GPL");
 MODULE_ALIAS_MISCDEV(TUN_MINOR);