Revert to Fedora kernel-2.6.17-1.2187_FC5 patched with vs2.0.2.1; there are too many...
[linux-2.6.git] / drivers / xen / netfront / netfront.c
index 5dc55e8..ef6e134 100644 (file)
@@ -29,6 +29,7 @@
  * IN THE SOFTWARE.
  */
 
+#include <linux/config.h>
 #include <linux/module.h>
 #include <linux/version.h>
 #include <linux/kernel.h>
 #include <linux/in.h>
 #include <linux/if_ether.h>
 #include <linux/io.h>
-#include <linux/moduleparam.h>
 #include <net/sock.h>
 #include <net/pkt_sched.h>
 #include <net/arp.h>
 #include <net/route.h>
-#include <asm/hypercall.h>
 #include <asm/uaccess.h>
 #include <xen/evtchn.h>
 #include <xen/xenbus.h>
 #include <xen/interface/memory.h>
 #include <xen/balloon.h>
 #include <asm/page.h>
-#include <asm/maddr.h>
 #include <asm/uaccess.h>
 #include <xen/interface/grant_table.h>
 #include <xen/gnttab.h>
 
-/*
- * Mutually-exclusive module options to select receive data path:
- *  rx_copy : Packets are copied by network backend into local memory
- *  rx_flip : Page containing packet data is transferred to our ownership
- * For fully-virtualised guests there is no option - copying must be used.
- * For paravirtualised guests, flipping is the default.
- */
-#ifdef CONFIG_XEN
-static int MODPARM_rx_copy = 0;
-module_param_named(rx_copy, MODPARM_rx_copy, bool, 0);
-MODULE_PARM_DESC(rx_copy, "Copy packets from network card (rather than flip)");
-static int MODPARM_rx_flip = 0;
-module_param_named(rx_flip, MODPARM_rx_flip, bool, 0);
-MODULE_PARM_DESC(rx_flip, "Flip packets from network card (rather than copy)");
-#else
-static const int MODPARM_rx_copy = 1;
-static const int MODPARM_rx_flip = 0;
-#endif
-
 #define RX_COPY_THRESHOLD 256
 
-/* If we don't have GSO, fake things up so that we never try to use it. */
-#if defined(NETIF_F_GSO)
-#define HAVE_GSO                       1
-#define HAVE_TSO                       1 /* TSO is a subset of GSO */
-static inline void dev_disable_gso_features(struct net_device *dev)
-{
-       /* Turn off all GSO bits except ROBUST. */
-       dev->features &= (1 << NETIF_F_GSO_SHIFT) - 1;
-       dev->features |= NETIF_F_GSO_ROBUST;
-}
-#elif defined(NETIF_F_TSO)
-#define HAVE_TSO                       1
-#define gso_size tso_size
-#define gso_segs tso_segs
-static inline void dev_disable_gso_features(struct net_device *dev)
-{
-       /* Turn off all TSO bits. */
-       dev->features &= ~NETIF_F_TSO;
-}
-static inline int skb_is_gso(const struct sk_buff *skb)
-{
-        return skb_shinfo(skb)->tso_size;
-}
-static inline int skb_gso_ok(struct sk_buff *skb, int features)
-{
-        return (features & NETIF_F_TSO);
-}
-
-static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb)
-{
-        return skb_is_gso(skb) &&
-               (!skb_gso_ok(skb, dev->features) ||
-                unlikely(skb->ip_summed != CHECKSUM_HW));
-}
-#else
-#define netif_needs_gso(dev, skb)      0
-#define dev_disable_gso_features(dev)  ((void)0)
-#endif
-
 #define GRANT_INVALID_REF      0
 
 #define NET_TX_RING_SIZE __RING_SIZE((struct netif_tx_sring *)0, PAGE_SIZE)
@@ -141,8 +81,8 @@ struct netfront_info {
        spinlock_t   tx_lock;
        spinlock_t   rx_lock;
 
+       unsigned int handle;
        unsigned int evtchn, irq;
-       unsigned int copying_receiver;
 
        /* Receive-ring batched refills. */
 #define RX_MIN_TARGET 8
@@ -164,7 +104,7 @@ struct netfront_info {
        grant_ref_t gref_tx_head;
        grant_ref_t grant_tx_ref[NET_TX_RING_SIZE + 1];
        grant_ref_t gref_rx_head;
-       grant_ref_t grant_rx_ref[NET_RX_RING_SIZE];
+       grant_ref_t grant_rx_ref[NET_TX_RING_SIZE];
 
        struct xenbus_device *xbdev;
        int tx_ring_ref;
@@ -229,18 +169,18 @@ static inline grant_ref_t xennet_get_rx_ref(struct netfront_info *np,
 #define WPRINTK(fmt, args...)                          \
        printk(KERN_WARNING "netfront: " fmt, ##args)
 
+static int talk_to_backend(struct xenbus_device *, struct netfront_info *);
 static int setup_device(struct xenbus_device *, struct netfront_info *);
-static struct net_device *create_netdev(struct xenbus_device *);
+static struct net_device *create_netdev(int, struct xenbus_device *);
 
 static void netfront_closing(struct xenbus_device *);
 
 static void end_access(int, void *);
 static void netif_disconnect_backend(struct netfront_info *);
-static int open_netdev(struct netfront_info *);
 static void close_netdev(struct netfront_info *);
 static void netif_free(struct netfront_info *);
 
-static int network_connect(struct net_device *);
+static void network_connect(struct net_device *);
 static void network_tx_buf_gc(struct net_device *);
 static void network_alloc_rx_buffers(struct net_device *);
 static int send_fake_arp(struct net_device *);
@@ -263,7 +203,8 @@ static inline int xennet_can_sg(struct net_device *dev)
 /**
  * Entry point to this code when a new device is created.  Allocate the basic
  * structures and the ring buffers for communication with the backend, and
- * inform the backend of the appropriate details for those.
+ * inform the backend of the appropriate details for those.  Switch to
+ * Connected state.
  */
 static int __devinit netfront_probe(struct xenbus_device *dev,
                                    const struct xenbus_device_id *id)
@@ -271,8 +212,15 @@ static int __devinit netfront_probe(struct xenbus_device *dev,
        int err;
        struct net_device *netdev;
        struct netfront_info *info;
+       unsigned int handle;
 
-       netdev = create_netdev(dev);
+       err = xenbus_scanf(XBT_NIL, dev->nodename, "handle", "%u", &handle);
+       if (err != 1) {
+               xenbus_dev_fatal(dev, err, "reading handle");
+               return err;
+       }
+
+       netdev = create_netdev(handle, dev);
        if (IS_ERR(netdev)) {
                err = PTR_ERR(netdev);
                xenbus_dev_fatal(dev, err, "creating netdev");
@@ -282,16 +230,16 @@ static int __devinit netfront_probe(struct xenbus_device *dev,
        info = netdev_priv(netdev);
        dev->dev.driver_data = info;
 
-       err = open_netdev(info);
-       if (err)
-               goto fail;
+       err = talk_to_backend(dev, info);
+       if (err) {
+               xennet_sysfs_delif(info->netdev);
+               unregister_netdev(netdev);
+               free_netdev(netdev);
+               dev->dev.driver_data = NULL;
+               return err;
+       }
 
        return 0;
-
- fail:
-       free_netdev(netdev);
-       dev->dev.driver_data = NULL;
-       return err;
 }
 
 
@@ -308,7 +256,7 @@ static int netfront_resume(struct xenbus_device *dev)
        DPRINTK("%s\n", dev->nodename);
 
        netif_disconnect_backend(info);
-       return 0;
+       return talk_to_backend(dev, info);
 }
 
 static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
@@ -378,13 +326,6 @@ again:
                goto abort_transaction;
        }
 
-       err = xenbus_printf(xbt, dev->nodename, "request-rx-copy", "%u",
-                           info->copying_receiver);
-       if (err) {
-               message = "writing request-rx-copy";
-               goto abort_transaction;
-       }
-
        err = xenbus_printf(xbt, dev->nodename, "feature-rx-notify", "%d", 1);
        if (err) {
                message = "writing feature-rx-notify";
@@ -397,13 +338,11 @@ again:
                goto abort_transaction;
        }
 
-#ifdef HAVE_TSO
        err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", "%d", 1);
        if (err) {
                message = "writing feature-gso-tcpv4";
                goto abort_transaction;
        }
-#endif
 
        err = xenbus_transaction_end(xbt, 0);
        if (err) {
@@ -419,7 +358,7 @@ again:
        xenbus_transaction_end(xbt, 1);
        xenbus_dev_fatal(dev, err, "%s", message);
  destroy_ring:
-       netif_disconnect_backend(info);
+       netif_free(info);
  out:
        return err;
 }
@@ -476,8 +415,7 @@ static int setup_device(struct xenbus_device *dev, struct netfront_info *info)
 
        memcpy(netdev->dev_addr, info->mac, ETH_ALEN);
        err = bind_evtchn_to_irqhandler(info->evtchn, netif_int,
-                                       SA_SAMPLE_RANDOM, netdev->name,
-                                       netdev);
+                                       SA_SAMPLE_RANDOM, netdev->name, netdev);
        if (err < 0)
                goto fail;
        info->irq = err;
@@ -498,7 +436,7 @@ static void backend_changed(struct xenbus_device *dev,
        struct netfront_info *np = dev->dev.driver_data;
        struct net_device *netdev = np->netdev;
 
-       DPRINTK("%s\n", xenbus_strstate(backend_state));
+       DPRINTK("\n");
 
        switch (backend_state) {
        case XenbusStateInitialising:
@@ -509,10 +447,7 @@ static void backend_changed(struct xenbus_device *dev,
                break;
 
        case XenbusStateInitWait:
-               if (network_connect(netdev) != 0) {
-                       netif_free(np);
-                       break;
-               }
+               network_connect(netdev);
                xenbus_switch_state(dev, XenbusStateConnected);
                (void)send_fake_arp(netdev);
                break;
@@ -559,14 +494,11 @@ static int network_open(struct net_device *dev)
 
        memset(&np->stats, 0, sizeof(np->stats));
 
-       spin_lock(&np->rx_lock);
-       if (netif_carrier_ok(dev)) {
-               network_alloc_rx_buffers(dev);
-               np->rx.sring->rsp_event = np->rx.rsp_cons + 1;
-               if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx))
-                       netif_rx_schedule(dev);
-       }
-       spin_unlock(&np->rx_lock);
+       network_alloc_rx_buffers(dev);
+       np->rx.sring->rsp_event = np->rx.rsp_cons + 1;
+
+       if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx))
+               netif_rx_schedule(dev);
 
        netif_start_queue(dev);
 
@@ -595,7 +527,8 @@ static void network_tx_buf_gc(struct net_device *dev)
        struct netfront_info *np = netdev_priv(dev);
        struct sk_buff *skb;
 
-       BUG_ON(!netif_carrier_ok(dev));
+       if (unlikely(!netif_carrier_ok(dev)))
+               return;
 
        do {
                prod = np->tx.sring->rsp_prod;
@@ -664,8 +597,6 @@ static void network_alloc_rx_buffers(struct net_device *dev)
        grant_ref_t ref;
        unsigned long pfn;
        void *vaddr;
-       int nr_flips;
-       netif_rx_request_t *req;
 
        if (unlikely(!netif_carrier_ok(dev)))
                return;
@@ -721,7 +652,7 @@ no_skb:
                np->rx_target = np->rx_max_target;
 
  refill:
-       for (nr_flips = i = 0; ; i++) {
+       for (i = 0; ; i++) {
                if ((skb = __skb_dequeue(&np->rx_batch)) == NULL)
                        break;
 
@@ -732,6 +663,7 @@ no_skb:
                BUG_ON(np->rx_skbs[id]);
                np->rx_skbs[id] = skb;
 
+               RING_GET_REQUEST(&np->rx, req_prod + i)->id = id;
                ref = gnttab_claim_grant_reference(&np->gref_rx_head);
                BUG_ON((signed short)ref < 0);
                np->grant_rx_ref[id] = ref;
@@ -739,68 +671,49 @@ no_skb:
                pfn = page_to_pfn(skb_shinfo(skb)->frags[0].page);
                vaddr = page_address(skb_shinfo(skb)->frags[0].page);
 
-               req = RING_GET_REQUEST(&np->rx, req_prod + i);
-               if (!np->copying_receiver) {
-                       gnttab_grant_foreign_transfer_ref(ref,
-                                                         np->xbdev->otherend_id,
-                                                         pfn);
-                       np->rx_pfn_array[nr_flips] = pfn_to_mfn(pfn);
-                       if (!xen_feature(XENFEAT_auto_translated_physmap)) {
-                               /* Remove this page before passing
-                                * back to Xen. */
-                               set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
-                               MULTI_update_va_mapping(np->rx_mcl+i,
-                                                       (unsigned long)vaddr,
-                                                       __pte(0), 0);
-                       }
-                       nr_flips++;
-               } else {
-                       gnttab_grant_foreign_access_ref(ref,
-                                                       np->xbdev->otherend_id,
-                                                       pfn_to_mfn(pfn),
-                                                       0);
-               }
+               gnttab_grant_foreign_transfer_ref(ref,
+                                                 np->xbdev->otherend_id, pfn);
+               RING_GET_REQUEST(&np->rx, req_prod + i)->gref = ref;
+               np->rx_pfn_array[i] = pfn_to_mfn(pfn);
 
-               req->id = id;
-               req->gref = ref;
+               if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+                       /* Remove this page before passing back to Xen. */
+                       set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
+                       MULTI_update_va_mapping(np->rx_mcl+i,
+                                               (unsigned long)vaddr,
+                                               __pte(0), 0);
+               }
        }
 
-       if ( nr_flips != 0 ) {
-               /* Tell the ballon driver what is going on. */
-               balloon_update_driver_allowance(i);
+       /* Tell the ballon driver what is going on. */
+       balloon_update_driver_allowance(i);
 
-               set_xen_guest_handle(reservation.extent_start,
-                                    np->rx_pfn_array);
-               reservation.nr_extents   = nr_flips;
-               reservation.extent_order = 0;
-               reservation.address_bits = 0;
-               reservation.domid        = DOMID_SELF;
+       set_xen_guest_handle(reservation.extent_start, np->rx_pfn_array);
+       reservation.nr_extents   = i;
+       reservation.extent_order = 0;
+       reservation.address_bits = 0;
+       reservation.domid        = DOMID_SELF;
 
-               if (!xen_feature(XENFEAT_auto_translated_physmap)) {
-                       /* After all PTEs have been zapped, flush the TLB. */
-                       np->rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] =
-                               UVMF_TLB_FLUSH|UVMF_ALL;
-
-                       /* Give away a batch of pages. */
-                       np->rx_mcl[i].op = __HYPERVISOR_memory_op;
-                       np->rx_mcl[i].args[0] = XENMEM_decrease_reservation;
-                       np->rx_mcl[i].args[1] = (unsigned long)&reservation;
-
-                       /* Zap PTEs and give away pages in one big
-                        * multicall. */
-                       (void)HYPERVISOR_multicall(np->rx_mcl, i+1);
-
-                       /* Check return status of HYPERVISOR_memory_op(). */
-                       if (unlikely(np->rx_mcl[i].result != i))
-                               panic("Unable to reduce memory reservation\n");
-               } else {
-                       if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
-                                                &reservation) != i)
-                               panic("Unable to reduce memory reservation\n");
-               }
-       } else {
-               wmb();
-       }
+       if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+               /* After all PTEs have been zapped, flush the TLB. */
+               np->rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] =
+                       UVMF_TLB_FLUSH|UVMF_ALL;
+
+               /* Give away a batch of pages. */
+               np->rx_mcl[i].op = __HYPERVISOR_memory_op;
+               np->rx_mcl[i].args[0] = XENMEM_decrease_reservation;
+               np->rx_mcl[i].args[1] = (unsigned long)&reservation;
+
+               /* Zap PTEs and give away pages in one big multicall. */
+               (void)HYPERVISOR_multicall(np->rx_mcl, i+1);
+
+               /* Check return status of HYPERVISOR_memory_op(). */
+               if (unlikely(np->rx_mcl[i].result != i))
+                       panic("Unable to reduce memory reservation\n");
+       } else
+               if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
+                                        &reservation) != i)
+                       panic("Unable to reduce memory reservation\n");
 
        /* Above is a suitable barrier to ensure backend will see requests. */
        np->rx.req_prod_pvt = req_prod + i;
@@ -927,13 +840,10 @@ static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
        if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
                tx->flags |= NETTXF_csum_blank | NETTXF_data_validated;
-#ifdef CONFIG_XEN
        if (skb->proto_data_valid) /* remote but checksummed? */
                tx->flags |= NETTXF_data_validated;
-#endif
 
-#ifdef HAVE_TSO
-       if (skb_is_gso(skb)) {
+       if (skb_shinfo(skb)->gso_size) {
                struct netif_extra_info *gso = (struct netif_extra_info *)
                        RING_GET_REQUEST(&np->tx, ++i);
 
@@ -951,7 +861,6 @@ static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
                gso->flags = 0;
                extra = gso;
        }
-#endif
 
        np->tx.req_prod_pvt = i + 1;
 
@@ -987,16 +896,13 @@ static irqreturn_t netif_int(int irq, void *dev_id, struct pt_regs *ptregs)
        unsigned long flags;
 
        spin_lock_irqsave(&np->tx_lock, flags);
-
-       if (likely(netif_carrier_ok(dev))) {
-               network_tx_buf_gc(dev);
-               /* Under tx_lock: protects access to rx shared-ring indexes. */
-               if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx))
-                       netif_rx_schedule(dev);
-       }
-
+       network_tx_buf_gc(dev);
        spin_unlock_irqrestore(&np->tx_lock, flags);
 
+       if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx) &&
+           likely(netif_running(dev)))
+               netif_rx_schedule(dev);
+
        return IRQ_HANDLED;
 }
 
@@ -1041,10 +947,8 @@ int xennet_get_extras(struct netfront_info *np,
                                WPRINTK("Invalid extra type: %d\n",
                                        extra->type);
                        err = -EINVAL;
-               } else {
-                       memcpy(&extras[extra->type - 1], extra,
-                              sizeof(*extra));
-               }
+               } else
+                       memcpy(&extras[extra->type - 1], extra, sizeof(*extra));
 
                skb = xennet_get_rx_skb(np, cons);
                ref = xennet_get_rx_ref(np, cons);
@@ -1057,12 +961,10 @@ int xennet_get_extras(struct netfront_info *np,
 
 static int xennet_get_responses(struct netfront_info *np,
                                struct netfront_rx_info *rinfo, RING_IDX rp,
-                               struct sk_buff_head *list,
-                               int *pages_flipped_p)
+                               struct sk_buff_head *list, int count)
 {
-       int pages_flipped = *pages_flipped_p;
-       struct mmu_update *mmu;
-       struct multicall_entry *mcl;
+       struct mmu_update *mmu = np->rx_mmu + count;
+       struct multicall_entry *mcl = np->rx_mcl + count;
        struct netif_rx_response *rx = &rinfo->rx;
        struct netif_extra_info *extras = rinfo->extras;
        RING_IDX cons = np->rx.rsp_cons;
@@ -1071,7 +973,6 @@ static int xennet_get_responses(struct netfront_info *np,
        int max = MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD);
        int frags = 1;
        int err = 0;
-       unsigned long ret;
 
        if (rx->flags & NETRXF_extra_info) {
                err = xennet_get_extras(np, extras, rp);
@@ -1086,9 +987,7 @@ static int xennet_get_responses(struct netfront_info *np,
                        if (net_ratelimit())
                                WPRINTK("rx->offset: %x, size: %u\n",
                                        rx->offset, rx->status);
-                       xennet_move_rx_slot(np, skb, ref);
                        err = -EINVAL;
-                       goto next;
                }
 
                /*
@@ -1097,54 +996,41 @@ static int xennet_get_responses(struct netfront_info *np,
                 * situation to the system controller to reboot the backed.
                 */
                if (ref == GRANT_INVALID_REF) {
-                       if (net_ratelimit())
-                               WPRINTK("Bad rx response id %d.\n", rx->id);
+                       WPRINTK("Bad rx response id %d.\n", rx->id);
                        err = -EINVAL;
                        goto next;
                }
 
-               if (!np->copying_receiver) {
-                       /* Memory pressure, insufficient buffer
-                        * headroom, ... */
-                       if (!(mfn = gnttab_end_foreign_transfer_ref(ref))) {
-                               if (net_ratelimit())
-                                       WPRINTK("Unfulfilled rx req "
-                                               "(id=%d, st=%d).\n",
-                                               rx->id, rx->status);
-                               xennet_move_rx_slot(np, skb, ref);
-                               err = -ENOMEM;
-                               goto next;
-                       }
-
-                       if (!xen_feature(XENFEAT_auto_translated_physmap)) {
-                               /* Remap the page. */
-                               struct page *page =
-                                       skb_shinfo(skb)->frags[0].page;
-                               unsigned long pfn = page_to_pfn(page);
-                               void *vaddr = page_address(page);
-
-                               mcl = np->rx_mcl + pages_flipped;
-                               mmu = np->rx_mmu + pages_flipped;
-
-                               MULTI_update_va_mapping(mcl,
-                                                       (unsigned long)vaddr,
-                                                       pfn_pte_ma(mfn,
-                                                                  PAGE_KERNEL),
-                                                       0);
-                               mmu->ptr = ((maddr_t)mfn << PAGE_SHIFT)
-                                       | MMU_MACHPHYS_UPDATE;
-                               mmu->val = pfn;
-
-                               set_phys_to_machine(pfn, mfn);
-                       }
-                       pages_flipped++;
-               } else {
-                       ret = gnttab_end_foreign_access_ref(ref, 0);
-                       BUG_ON(!ret);
+               /* Memory pressure, insufficient buffer headroom, ... */
+               if ((mfn = gnttab_end_foreign_transfer_ref(ref)) == 0) {
+                       if (net_ratelimit())
+                               WPRINTK("Unfulfilled rx req (id=%d, st=%d).\n",
+                                       rx->id, rx->status);
+                       xennet_move_rx_slot(np, skb, ref);
+                       err = -ENOMEM;
+                       goto next;
                }
 
                gnttab_release_grant_reference(&np->gref_rx_head, ref);
 
+               if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+                       /* Remap the page. */
+                       struct page *page = skb_shinfo(skb)->frags[0].page;
+                       unsigned long pfn = page_to_pfn(page);
+                       void *vaddr = page_address(page);
+
+                       MULTI_update_va_mapping(mcl, (unsigned long)vaddr,
+                                               pfn_pte_ma(mfn, PAGE_KERNEL),
+                                               0);
+                       mcl++;
+                       mmu->ptr = ((maddr_t)mfn << PAGE_SHIFT)
+                               | MMU_MACHPHYS_UPDATE;
+                       mmu->val = pfn;
+                       mmu++;
+
+                       set_phys_to_machine(pfn, mfn);
+               }
+
                __skb_queue_tail(list, skb);
 
 next:
@@ -1170,11 +1056,6 @@ next:
                err = -E2BIG;
        }
 
-       if (unlikely(err))
-               np->rx.rsp_cons = cons + frags;
-
-       *pages_flipped_p = pages_flipped;
-
        return err;
 }
 
@@ -1209,8 +1090,7 @@ static RING_IDX xennet_fill_frags(struct netfront_info *np,
        return cons;
 }
 
-static int xennet_set_skb_gso(struct sk_buff *skb,
-                             struct netif_extra_info *gso)
+static int xennet_set_skb_gso(struct sk_buff *skb, struct netif_extra_info *gso)
 {
        if (!gso->u.gso.size) {
                if (net_ratelimit())
@@ -1225,22 +1105,14 @@ static int xennet_set_skb_gso(struct sk_buff *skb,
                return -EINVAL;
        }
 
-#ifdef HAVE_TSO
        skb_shinfo(skb)->gso_size = gso->u.gso.size;
-#ifdef HAVE_GSO
        skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
 
        /* Header must be checked, and gso_segs computed. */
        skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
-#endif
        skb_shinfo(skb)->gso_segs = 0;
 
        return 0;
-#else
-       if (net_ratelimit())
-               WPRINTK("GSO unsupported by this kernel.\n");
-       return -EINVAL;
-#endif
 }
 
 static int netif_poll(struct net_device *dev, int *pbudget)
@@ -1258,7 +1130,7 @@ static int netif_poll(struct net_device *dev, int *pbudget)
        struct sk_buff_head tmpq;
        unsigned long flags;
        unsigned int len;
-       int pages_flipped = 0;
+       int pages_done;
        int err;
 
        spin_lock(&np->rx_lock);
@@ -1277,21 +1149,22 @@ static int netif_poll(struct net_device *dev, int *pbudget)
        rp = np->rx.sring->rsp_prod;
        rmb(); /* Ensure we see queued responses up to 'rp'. */
 
-       i = np->rx.rsp_cons;
-       work_done = 0;
-       while ((i != rp) && (work_done < budget)) {
+       for (i = np->rx.rsp_cons, work_done = 0, pages_done = 0;
+            (i != rp) && (work_done < budget);
+            np->rx.rsp_cons = ++i, work_done++) {
                memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx));
                memset(extras, 0, sizeof(extras));
 
-               err = xennet_get_responses(np, &rinfo, rp, &tmpq,
-                                          &pages_flipped);
+               err = xennet_get_responses(np, &rinfo, rp, &tmpq, pages_done);
+               pages_done += skb_queue_len(&tmpq);
 
                if (unlikely(err)) {
-err:   
+err:
+                       i = np->rx.rsp_cons + skb_queue_len(&tmpq) - 1;
+                       work_done--;
                        while ((skb = __skb_dequeue(&tmpq)))
                                __skb_queue_tail(&errq, skb);
                        np->stats.rx_errors++;
-                       i = np->rx.rsp_cons;
                        continue;
                }
 
@@ -1303,7 +1176,6 @@ err:
 
                        if (unlikely(xennet_set_skb_gso(skb, gso))) {
                                __skb_queue_head(&tmpq, skb);
-                               np->rx.rsp_cons += skb_queue_len(&tmpq);
                                goto err;
                        }
                }
@@ -1327,66 +1199,40 @@ err:
                }
 
                i = xennet_fill_frags(np, skb, &tmpq);
-
-               /*
-                * Truesize must approximates the size of true data plus
-                * any supervisor overheads. Adding hypervisor overheads
-                * has been shown to significantly reduce achievable
-                * bandwidth with the default receive buffer size. It is
-                * therefore not wise to account for it here.
-                *
-                * After alloc_skb(RX_COPY_THRESHOLD), truesize is set to
-                * RX_COPY_THRESHOLD + the supervisor overheads. Here, we
-                * add the size of the data pulled in xennet_fill_frags().
-                *
-                * We also adjust for any unused space in the main data
-                * area by subtracting (RX_COPY_THRESHOLD - len). This is
-                * especially important with drivers which split incoming
-                * packets into header and data, using only 66 bytes of
-                * the main data area (see the e1000 driver for example.)
-                * On such systems, without this last adjustement, our
-                * achievable receive throughout using the standard receive
-                * buffer size was cut by 25%(!!!).
-                */
-               skb->truesize += skb->data_len - (RX_COPY_THRESHOLD - len);
+               skb->truesize += skb->data_len;
                skb->len += skb->data_len;
 
                /*
                 * Old backends do not assert data_validated but we
                 * can infer it from csum_blank so test both flags.
                 */
-               if (rx->flags & (NETRXF_data_validated|NETRXF_csum_blank))
+               if (rx->flags & (NETRXF_data_validated|NETRXF_csum_blank)) {
                        skb->ip_summed = CHECKSUM_UNNECESSARY;
-               else
+                       skb->proto_data_valid = 1;
+               } else {
                        skb->ip_summed = CHECKSUM_NONE;
-#ifdef CONFIG_XEN
-               skb->proto_data_valid = (skb->ip_summed != CHECKSUM_NONE);
+                       skb->proto_data_valid = 0;
+               }
                skb->proto_csum_blank = !!(rx->flags & NETRXF_csum_blank);
-#endif
+
                np->stats.rx_packets++;
                np->stats.rx_bytes += skb->len;
 
                __skb_queue_tail(&rxq, skb);
-
-               np->rx.rsp_cons = ++i;
-               work_done++;
        }
 
-       if (pages_flipped) {
-               /* Some pages are no longer absent... */
-               balloon_update_driver_allowance(-pages_flipped);
-
-               /* Do all the remapping work and M2P updates. */
-               if (!xen_feature(XENFEAT_auto_translated_physmap)) {
-                       mcl = np->rx_mcl + pages_flipped;
-                       mcl->op = __HYPERVISOR_mmu_update;
-                       mcl->args[0] = (unsigned long)np->rx_mmu;
-                       mcl->args[1] = pages_flipped;
-                       mcl->args[2] = 0;
-                       mcl->args[3] = DOMID_SELF;
-                       (void)HYPERVISOR_multicall(np->rx_mcl,
-                                                  pages_flipped + 1);
-               }
+       /* Some pages are no longer absent... */
+       balloon_update_driver_allowance(-pages_done);
+
+       /* Do all the remapping work, and M2P updates, in one big hypercall. */
+       if (likely(pages_done)) {
+               mcl = np->rx_mcl + pages_done;
+               mcl->op = __HYPERVISOR_mmu_update;
+               mcl->args[0] = (unsigned long)np->rx_mmu;
+               mcl->args[1] = pages_done;
+               mcl->args[2] = 0;
+               mcl->args[3] = DOMID_SELF;
+               (void)HYPERVISOR_multicall(np->rx_mcl, pages_done + 1);
        }
 
        while ((skb = __skb_dequeue(&errq)))
@@ -1437,111 +1283,6 @@ err:
        return more_to_do;
 }
 
-static void netif_release_tx_bufs(struct netfront_info *np)
-{
-       struct sk_buff *skb;
-       int i;
-
-       for (i = 1; i <= NET_TX_RING_SIZE; i++) {
-               if ((unsigned long)np->tx_skbs[i] < PAGE_OFFSET)
-                       continue;
-
-               skb = np->tx_skbs[i];
-               gnttab_end_foreign_access_ref(
-                       np->grant_tx_ref[i], GNTMAP_readonly);
-               gnttab_release_grant_reference(
-                       &np->gref_tx_head, np->grant_tx_ref[i]);
-               np->grant_tx_ref[i] = GRANT_INVALID_REF;
-               add_id_to_freelist(np->tx_skbs, i);
-               dev_kfree_skb_irq(skb);
-       }
-}
-
-static void netif_release_rx_bufs(struct netfront_info *np)
-{
-       struct mmu_update      *mmu = np->rx_mmu;
-       struct multicall_entry *mcl = np->rx_mcl;
-       struct sk_buff_head free_list;
-       struct sk_buff *skb;
-       unsigned long mfn;
-       int xfer = 0, noxfer = 0, unused = 0;
-       int id, ref;
-
-       if (np->copying_receiver) {
-               printk("%s: fix me for copying receiver.\n", __FUNCTION__);
-               return;
-       }
-
-       skb_queue_head_init(&free_list);
-
-       spin_lock(&np->rx_lock);
-
-       for (id = 0; id < NET_RX_RING_SIZE; id++) {
-               if ((ref = np->grant_rx_ref[id]) == GRANT_INVALID_REF) {
-                       unused++;
-                       continue;
-               }
-
-               skb = np->rx_skbs[id];
-               mfn = gnttab_end_foreign_transfer_ref(ref);
-               gnttab_release_grant_reference(&np->gref_rx_head, ref);
-               np->grant_rx_ref[id] = GRANT_INVALID_REF;
-               add_id_to_freelist(np->rx_skbs, id);
-
-               if (0 == mfn) {
-                       struct page *page = skb_shinfo(skb)->frags[0].page;
-                       balloon_release_driver_page(page);
-                       skb_shinfo(skb)->nr_frags = 0;
-                       dev_kfree_skb(skb);
-                       noxfer++;
-                       continue;
-               }
-
-               if (!xen_feature(XENFEAT_auto_translated_physmap)) {
-                       /* Remap the page. */
-                       struct page *page = skb_shinfo(skb)->frags[0].page;
-                       unsigned long pfn = page_to_pfn(page);
-                       void *vaddr = page_address(page);
-
-                       MULTI_update_va_mapping(mcl, (unsigned long)vaddr,
-                                               pfn_pte_ma(mfn, PAGE_KERNEL),
-                                               0);
-                       mcl++;
-                       mmu->ptr = ((maddr_t)mfn << PAGE_SHIFT)
-                               | MMU_MACHPHYS_UPDATE;
-                       mmu->val = pfn;
-                       mmu++;
-
-                       set_phys_to_machine(pfn, mfn);
-               }
-               __skb_queue_tail(&free_list, skb);
-               xfer++;
-       }
-
-       printk("%s: %d xfer, %d noxfer, %d unused\n",
-              __FUNCTION__, xfer, noxfer, unused);
-
-       if (xfer) {
-               /* Some pages are no longer absent... */
-               balloon_update_driver_allowance(-xfer);
-
-               if (!xen_feature(XENFEAT_auto_translated_physmap)) {
-                       /* Do all the remapping work and M2P updates. */
-                       mcl->op = __HYPERVISOR_mmu_update;
-                       mcl->args[0] = (unsigned long)np->rx_mmu;
-                       mcl->args[1] = mmu - np->rx_mmu;
-                       mcl->args[2] = 0;
-                       mcl->args[3] = DOMID_SELF;
-                       mcl++;
-                       HYPERVISOR_multicall(np->rx_mcl, mcl - np->rx_mcl);
-               }
-       }
-
-       while ((skb = __skb_dequeue(&free_list)) != NULL)
-               dev_kfree_skb(skb);
-
-       spin_unlock(&np->rx_lock);
-}
 
 static int network_close(struct net_device *dev)
 {
@@ -1586,7 +1327,6 @@ static int xennet_set_sg(struct net_device *dev, u32 data)
 
 static int xennet_set_tso(struct net_device *dev, u32 data)
 {
-#ifdef HAVE_TSO
        if (data) {
                struct netfront_info *np = netdev_priv(dev);
                int val;
@@ -1599,72 +1339,53 @@ static int xennet_set_tso(struct net_device *dev, u32 data)
        }
 
        return ethtool_op_set_tso(dev, data);
-#else
-       return -ENOSYS;
-#endif
 }
 
 static void xennet_set_features(struct net_device *dev)
 {
-       dev_disable_gso_features(dev);
+       /* Turn off all GSO bits except ROBUST. */
+       dev->features &= (1 << NETIF_F_GSO_SHIFT) - 1;
+       dev->features |= NETIF_F_GSO_ROBUST;
        xennet_set_sg(dev, 0);
 
-       /* We need checksum offload to enable scatter/gather and TSO. */
-       if (!(dev->features & NETIF_F_IP_CSUM))
-               return;
-
        if (!xennet_set_sg(dev, 1))
                xennet_set_tso(dev, 1);
 }
 
-static int network_connect(struct net_device *dev)
+static void network_connect(struct net_device *dev)
 {
        struct netfront_info *np = netdev_priv(dev);
-       int i, requeue_idx, err;
+       int i, requeue_idx;
        struct sk_buff *skb;
        grant_ref_t ref;
-       netif_rx_request_t *req;
-       unsigned int feature_rx_copy, feature_rx_flip;
-
-       err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
-                          "feature-rx-copy", "%u", &feature_rx_copy);
-       if (err != 1)
-               feature_rx_copy = 0;
-       err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
-                          "feature-rx-flip", "%u", &feature_rx_flip);
-       if (err != 1)
-               feature_rx_flip = 1;
-
-       /*
-        * Copy packets on receive path if:
-        *  (a) This was requested by user, and the backend supports it; or
-        *  (b) Flipping was requested, but this is unsupported by the backend.
-        */
-       np->copying_receiver = ((MODPARM_rx_copy && feature_rx_copy) ||
-                               (MODPARM_rx_flip && !feature_rx_flip));
-
-       err = talk_to_backend(np->xbdev, np);
-       if (err)
-               return err;
 
        xennet_set_features(dev);
 
-       IPRINTK("device %s has %sing receive path.\n",
-               dev->name, np->copying_receiver ? "copy" : "flipp");
-
        spin_lock_irq(&np->tx_lock);
        spin_lock(&np->rx_lock);
 
        /*
-        * Recovery procedure:
+         * Recovery procedure:
         *  NB. Freelist index entries are always going to be less than
         *  PAGE_OFFSET, whereas pointers to skbs will always be equal or
         *  greater than PAGE_OFFSET: we use this property to distinguish
         *  them.
-        */
+         */
 
        /* Step 1: Discard all pending TX packet fragments. */
-       netif_release_tx_bufs(np);
+       for (requeue_idx = 0, i = 1; i <= NET_TX_RING_SIZE; i++) {
+               if ((unsigned long)np->tx_skbs[i] < PAGE_OFFSET)
+                       continue;
+
+               skb = np->tx_skbs[i];
+               gnttab_end_foreign_access_ref(
+                       np->grant_tx_ref[i], GNTMAP_readonly);
+               gnttab_release_grant_reference(
+                       &np->gref_tx_head, np->grant_tx_ref[i]);
+               np->grant_tx_ref[i] = GRANT_INVALID_REF;
+               add_id_to_freelist(np->tx_skbs, i);
+               dev_kfree_skb_irq(skb);
+       }
 
        /* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */
        for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) {
@@ -1673,21 +1394,13 @@ static int network_connect(struct net_device *dev)
 
                skb = np->rx_skbs[requeue_idx] = xennet_get_rx_skb(np, i);
                ref = np->grant_rx_ref[requeue_idx] = xennet_get_rx_ref(np, i);
-               req = RING_GET_REQUEST(&np->rx, requeue_idx);
 
-               if (!np->copying_receiver) {
-                       gnttab_grant_foreign_transfer_ref(
-                               ref, np->xbdev->otherend_id,
-                               page_to_pfn(skb_shinfo(skb)->frags->page));
-               } else {
-                       gnttab_grant_foreign_access_ref(
-                               ref, np->xbdev->otherend_id,
-                               pfn_to_mfn(page_to_pfn(skb_shinfo(skb)->
-                                                      frags->page)),
-                               0);
-               }
-               req->gref = ref;
-               req->id   = requeue_idx;
+               gnttab_grant_foreign_transfer_ref(
+                       ref, np->xbdev->otherend_id,
+                       page_to_pfn(skb_shinfo(skb)->frags->page));
+
+               RING_GET_REQUEST(&np->rx, requeue_idx)->gref = ref;
+               RING_GET_REQUEST(&np->rx, requeue_idx)->id   = requeue_idx;
 
                requeue_idx++;
        }
@@ -1707,15 +1420,11 @@ static int network_connect(struct net_device *dev)
 
        spin_unlock(&np->rx_lock);
        spin_unlock_irq(&np->tx_lock);
-
-       return 0;
 }
 
 static void netif_uninit(struct net_device *dev)
 {
        struct netfront_info *np = netdev_priv(dev);
-       netif_release_tx_bufs(np);
-       netif_release_rx_bufs(np);
        gnttab_free_grant_references(np->gref_tx_head);
        gnttab_free_grant_references(np->gref_rx_head);
 }
@@ -1874,7 +1583,13 @@ static void network_set_multicast_list(struct net_device *dev)
 {
 }
 
-static struct net_device * __devinit create_netdev(struct xenbus_device *dev)
+/** Create a network device.
+ * @param handle device handle
+ * @param val return parameter for created device
+ * @return 0 on success, error code otherwise
+ */
+static struct net_device * __devinit create_netdev(int handle,
+                                                  struct xenbus_device *dev)
 {
        int i, err = 0;
        struct net_device *netdev = NULL;
@@ -1887,8 +1602,9 @@ static struct net_device * __devinit create_netdev(struct xenbus_device *dev)
                return ERR_PTR(-ENOMEM);
        }
 
-       np                   = netdev_priv(netdev);
-       np->xbdev            = dev;
+       np                = netdev_priv(netdev);
+       np->handle        = handle;
+       np->xbdev         = dev;
 
        netif_carrier_off(netdev);
 
@@ -1945,9 +1661,27 @@ static struct net_device * __devinit create_netdev(struct xenbus_device *dev)
        SET_MODULE_OWNER(netdev);
        SET_NETDEV_DEV(netdev, &dev->dev);
 
+       err = register_netdev(netdev);
+       if (err) {
+               printk(KERN_WARNING "%s> register_netdev err=%d\n",
+                      __FUNCTION__, err);
+               goto exit_free_rx;
+       }
+
+       err = xennet_sysfs_addif(netdev);
+       if (err) {
+               /* This can be non-fatal: it only means no tuning parameters */
+               printk(KERN_WARNING "%s> add sysfs failed err=%d\n",
+                      __FUNCTION__, err);
+       }
+
        np->netdev = netdev;
+
        return netdev;
 
+
+ exit_free_rx:
+       gnttab_free_grant_references(np->gref_rx_head);
  exit_free_tx:
        gnttab_free_grant_references(np->gref_tx_head);
  exit:
@@ -1986,10 +1720,11 @@ static void netfront_closing(struct xenbus_device *dev)
 {
        struct netfront_info *info = dev->dev.driver_data;
 
-       DPRINTK("%s\n", dev->nodename);
+       DPRINTK("netfront_closing: %s removed\n", dev->nodename);
 
        close_netdev(info);
-       xenbus_frontend_closed(dev);
+
+       xenbus_switch_state(dev, XenbusStateClosed);
 }
 
 
@@ -2006,28 +1741,6 @@ static int __devexit netfront_remove(struct xenbus_device *dev)
 }
 
 
-static int open_netdev(struct netfront_info *info)
-{
-       int err;
-       
-       err = register_netdev(info->netdev);
-       if (err) {
-               printk(KERN_WARNING "%s: register_netdev err=%d\n",
-                      __FUNCTION__, err);
-               return err;
-       }
-
-       err = xennet_sysfs_addif(info->netdev);
-       if (err) {
-               unregister_netdev(info->netdev);
-               printk(KERN_WARNING "%s: add sysfs failed err=%d\n",
-                      __FUNCTION__, err);
-               return err;
-       }
-
-       return 0;
-}
-
 static void close_netdev(struct netfront_info *info)
 {
        del_timer_sync(&info->rx_refill_timer);
@@ -2105,17 +1818,7 @@ static int __init netif_init(void)
        if (!is_running_on_xen())
                return -ENODEV;
 
-#ifdef CONFIG_XEN
-       if (MODPARM_rx_flip && MODPARM_rx_copy) {
-               WPRINTK("Cannot specify both rx_copy and rx_flip.\n");
-               return -EINVAL;
-       }
-
-       if (!MODPARM_rx_flip && !MODPARM_rx_copy)
-               MODPARM_rx_flip = 1; /* Default is to flip. */
-#endif
-
-       if (is_initial_xendomain())
+       if (xen_start_info->flags & SIF_INITDOMAIN)
                return 0;
 
        IPRINTK("Initialising virtual ethernet driver.\n");