drivers/xen/netback/netback.c

   1 /******************************************************************************
   2  * drivers/xen/netback/netback.c
   3  *
   4  * Back-end of the driver for virtual network devices. This portion of the
   5  * driver exports a 'unified' network-device interface that can be accessed
   6  * by any operating system that implements a compatible front end. A
   7  * reference front-end implementation can be found in:
   8  *  drivers/xen/netfront/netfront.c
   9  *
  10  * Copyright (c) 2002-2005, K A Fraser
  11  *
  12  * This program is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU General Public License version 2
  14  * as published by the Free Software Foundation; or, when distributed
  15  * separately from the Linux kernel or incorporated into other
  16  * software packages, subject to the following license:
  17  *
  18  * Permission is hereby granted, free of charge, to any person obtaining a copy
  19  * of this source file (the "Software"), to deal in the Software without
  20  * restriction, including without limitation the rights to use, copy, modify,
  21  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  22  * and to permit persons to whom the Software is furnished to do so, subject to
  23  * the following conditions:
  24  *
  25  * The above copyright notice and this permission notice shall be included in
  26  * all copies or substantial portions of the Software.
  27  *
  28  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  29  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  30  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  31  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  32  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  33  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  34  * IN THE SOFTWARE.
  35  */
  36
  37 #include "common.h"
  38 #include <xen/balloon.h>
  39 #include <xen/interface/memory.h>
  40 #include <asm/page.h>
  41
  42 /*#define NETBE_DEBUG_INTERRUPT*/
  43
  44 struct netbk_rx_meta {
  45         skb_frag_t frag;
  46         int id;
  47         int copy:1;
  48 };
  49
  50 static void netif_idx_release(u16 pending_idx);
  51 static void netif_page_release(struct page *page);
  52 static void make_tx_response(netif_t *netif,
  53                              netif_tx_request_t *txp,
  54                              s8       st);
  55 static netif_rx_response_t *make_rx_response(netif_t *netif,
  56                                              u16      id,
  57                                              s8       st,
  58                                              u16      offset,
  59                                              u16      size,
  60                                              u16      flags);
  61
  62 static void net_tx_action(unsigned long unused);
  63 static DECLARE_TASKLET(net_tx_tasklet, net_tx_action, 0);
  64
  65 static void net_rx_action(unsigned long unused);
  66 static DECLARE_TASKLET(net_rx_tasklet, net_rx_action, 0);
  67
  68 static struct timer_list net_timer;
  69
  70 #define MAX_PENDING_REQS 256
  71
  72 static struct sk_buff_head rx_queue;
  73
  74 static struct page **mmap_pages;
  75 static inline unsigned long idx_to_kaddr(unsigned int idx)
  76 {
  77         return (unsigned long)pfn_to_kaddr(page_to_pfn(mmap_pages[idx]));
  78 }
  79
  80 #define PKT_PROT_LEN 64
  81
  82 static struct pending_tx_info {
  83         netif_tx_request_t req;
  84         netif_t *netif;
  85 } pending_tx_info[MAX_PENDING_REQS];
  86 static u16 pending_ring[MAX_PENDING_REQS];
  87 typedef unsigned int PEND_RING_IDX;
  88 #define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
  89 static PEND_RING_IDX pending_prod, pending_cons;
  90 #define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
  91
  92 /* Freed TX SKBs get batched on this ring before return to pending_ring. */
  93 static u16 dealloc_ring[MAX_PENDING_REQS];
  94 static PEND_RING_IDX dealloc_prod, dealloc_cons;
  95
  96 static struct sk_buff_head tx_queue;
  97
  98 static grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
  99 static gnttab_unmap_grant_ref_t tx_unmap_ops[MAX_PENDING_REQS];
 100 static gnttab_map_grant_ref_t tx_map_ops[MAX_PENDING_REQS];
 101
 102 static struct list_head net_schedule_list;
 103 static spinlock_t net_schedule_list_lock;
 104
 105 #define MAX_MFN_ALLOC 64
 106 static unsigned long mfn_list[MAX_MFN_ALLOC];
 107 static unsigned int alloc_index = 0;
 108
 109 static inline unsigned long alloc_mfn(void)
 110 {
 111         return mfn_list[--alloc_index];
 112 }
 113
 114 static int check_mfn(int nr)
 115 {
 116         struct xen_memory_reservation reservation = {
 117                 .extent_order = 0,
 118                 .domid        = DOMID_SELF
 119         };
 120
 121         if (likely(alloc_index >= nr))
 122                 return 0;
 123
 124         set_xen_guest_handle(reservation.extent_start, mfn_list + alloc_index);
 125         reservation.nr_extents = MAX_MFN_ALLOC - alloc_index;
 126         alloc_index += HYPERVISOR_memory_op(XENMEM_increase_reservation,
 127                                             &reservation);
 128
 129         return alloc_index >= nr ? 0 : -ENOMEM;
 130 }
 131
 132 static inline void maybe_schedule_tx_action(void)
 133 {
 134         smp_mb();
 135         if ((NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
 136             !list_empty(&net_schedule_list))
 137                 tasklet_schedule(&net_tx_tasklet);
 138 }
 139
 140 /*
 141  * A gross way of confirming the origin of an skb data page. The slab
 142  * allocator abuses a field in the page struct to cache the struct kmem_cache ptr.
 143  */
 144 static inline int is_xen_skb(struct sk_buff *skb)
 145 {
 146         extern struct kmem_cache *skbuff_cachep;
 147         struct kmem_cache *cp = (struct kmem_cache *)virt_to_page(skb->head)->lru.next;
 148         return (cp == skbuff_cachep);
 149 }
 150
 151 /*
 152  * We can flip without copying the packet unless:
 153  *  1. The data is not allocated from our special cache; or
 154  *  2. The main data area is shared; or
 155  *  3. One or more fragments are shared; or
 156  *  4. There are chained fragments.
 157  */
 158 static inline int is_flippable_skb(struct sk_buff *skb)
 159 {
 160         int frag;
 161
 162         if (!is_xen_skb(skb) || skb_cloned(skb))
 163                 return 0;
 164
 165         for (frag = 0; frag < skb_shinfo(skb)->nr_frags; frag++) {
 166                 if (page_count(skb_shinfo(skb)->frags[frag].page) > 1)
 167                         return 0;
 168         }
 169
 170         if (skb_shinfo(skb)->frag_list != NULL)
 171                 return 0;
 172
 173         return 1;
 174 }
 175
 176 static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
 177 {
 178         struct skb_shared_info *ninfo;
 179         struct sk_buff *nskb;
 180         unsigned long offset;
 181         int ret;
 182         int len;
 183         int headlen;
 184
 185         BUG_ON(skb_shinfo(skb)->frag_list != NULL);
 186
 187         nskb = alloc_skb(SKB_MAX_HEAD(0), GFP_ATOMIC);
 188         if (unlikely(!nskb))
 189                 goto err;
 190
 191         skb_reserve(nskb, 16);
 192         headlen = nskb->end - nskb->data;
 193         if (headlen > skb_headlen(skb))
 194                 headlen = skb_headlen(skb);
 195         ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen);
 196         BUG_ON(ret);
 197
 198         ninfo = skb_shinfo(nskb);
 199         ninfo->gso_size = skb_shinfo(skb)->gso_size;
 200         ninfo->gso_type = skb_shinfo(skb)->gso_type;
 201
 202         offset = headlen;
 203         len = skb->len - headlen;
 204
 205         nskb->len = skb->len;
 206         nskb->data_len = len;
 207         nskb->truesize += len;
 208
 209         while (len) {
 210                 struct page *page;
 211                 int copy;
 212                 int zero;
 213
 214                 if (unlikely(ninfo->nr_frags >= MAX_SKB_FRAGS)) {
 215                         dump_stack();
 216                         goto err_free;
 217                 }
 218
 219                 copy = len >= PAGE_SIZE ? PAGE_SIZE : len;
 220                 zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO;
 221
 222                 page = alloc_page(GFP_ATOMIC | __GFP_NOWARN | zero);
 223                 if (unlikely(!page))
 224                         goto err_free;
 225
 226                 ret = skb_copy_bits(skb, offset, page_address(page), copy);
 227                 BUG_ON(ret);
 228
 229                 ninfo->frags[ninfo->nr_frags].page = page;
 230                 ninfo->frags[ninfo->nr_frags].page_offset = 0;
 231                 ninfo->frags[ninfo->nr_frags].size = copy;
 232                 ninfo->nr_frags++;
 233
 234                 offset += copy;
 235                 len -= copy;
 236         }
 237
 238         offset = nskb->data - skb->data;
 239
 240         nskb->h.raw = skb->h.raw + offset;
 241         nskb->nh.raw = skb->nh.raw + offset;
 242         nskb->mac.raw = skb->mac.raw + offset;
 243
 244         return nskb;
 245
 246  err_free:
 247         kfree_skb(nskb);
 248  err:
 249         return NULL;
 250 }
 251
 252 static inline int netbk_max_required_rx_slots(netif_t *netif)
 253 {
 254         if (netif->features & (NETIF_F_SG|NETIF_F_TSO))
 255                 return MAX_SKB_FRAGS + 2; /* header + extra_info + frags */
 256         return 1; /* all in one */
 257 }
 258
 259 static inline int netbk_queue_full(netif_t *netif)
 260 {
 261         RING_IDX peek   = netif->rx_req_cons_peek;
 262         RING_IDX needed = netbk_max_required_rx_slots(netif);
 263
 264         return ((netif->rx.sring->req_prod - peek) < needed) ||
 265                ((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) < needed);
 266 }
 267
 268 static void tx_queue_callback(unsigned long data)
 269 {
 270         netif_t *netif = (netif_t *)data;
 271         if (netif_schedulable(netif->dev))
 272                 netif_wake_queue(netif->dev);
 273 }
 274
 275 int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
 276 {
 277         netif_t *netif = netdev_priv(dev);
 278
 279         BUG_ON(skb->dev != dev);
 280
 281         /* Drop the packet if the target domain has no receive buffers. */
 282         if (unlikely(!netif_schedulable(dev) || netbk_queue_full(netif)))
 283                 goto drop;
 284
 285         /*
 286          * Copy the packet here if it's destined for a flipping interface
 287          * but isn't flippable (e.g. extra references to data).
 288          */
 289         if (!netif->copying_receiver && !is_flippable_skb(skb)) {
 290                 struct sk_buff *nskb = netbk_copy_skb(skb);
 291                 if ( unlikely(nskb == NULL) )
 292                         goto drop;
 293                 /* Copy only the header fields we use in this driver. */
 294                 nskb->dev = skb->dev;
 295                 nskb->ip_summed = skb->ip_summed;
 296                 nskb->proto_data_valid = skb->proto_data_valid;
 297                 dev_kfree_skb(skb);
 298                 skb = nskb;
 299         }
 300
 301         netif->rx_req_cons_peek += skb_shinfo(skb)->nr_frags + 1 +
 302                                    !!skb_shinfo(skb)->gso_size;
 303         netif_get(netif);
 304
 305         if (netbk_can_queue(dev) && netbk_queue_full(netif)) {
 306                 netif->rx.sring->req_event = netif->rx_req_cons_peek +
 307                         netbk_max_required_rx_slots(netif);
 308                 mb(); /* request notification /then/ check & stop the queue */
 309                 if (netbk_queue_full(netif)) {
 310                         netif_stop_queue(dev);
 311                         /*
 312                          * Schedule 500ms timeout to restart the queue, thus
 313                          * ensuring that an inactive queue will be drained.
 314                          * Packets will be immediately be dropped until more
 315                          * receive buffers become available (see
 316                          * netbk_queue_full() check above).
 317                          */
 318                         netif->tx_queue_timeout.data = (unsigned long)netif;
 319                         netif->tx_queue_timeout.function = tx_queue_callback;
 320                         __mod_timer(&netif->tx_queue_timeout, jiffies + HZ/2);
 321                 }
 322         }
 323
 324         skb_queue_tail(&rx_queue, skb);
 325         tasklet_schedule(&net_rx_tasklet);
 326
 327         return 0;
 328
 329  drop:
 330         netif->stats.tx_dropped++;
 331         dev_kfree_skb(skb);
 332         return 0;
 333 }
 334
 335 #if 0
 336 static void xen_network_done_notify(void)
 337 {
 338         static struct net_device *eth0_dev = NULL;
 339         if (unlikely(eth0_dev == NULL))
 340                 eth0_dev = __dev_get_by_name("eth0");
 341         netif_rx_schedule(eth0_dev);
 342 }
 343 /*
 344  * Add following to poll() function in NAPI driver (Tigon3 is example):
 345  *  if ( xen_network_done() )
 346  *      tg3_enable_ints(tp);
 347  */
 348 int xen_network_done(void)
 349 {
 350         return skb_queue_empty(&rx_queue);
 351 }
 352 #endif
 353
 354 struct netrx_pending_operations {
 355         unsigned trans_prod, trans_cons;
 356         unsigned mmu_prod, mmu_cons;
 357         unsigned mcl_prod, mcl_cons;
 358         unsigned copy_prod, copy_cons;
 359         unsigned meta_prod, meta_cons;
 360         mmu_update_t *mmu;
 361         gnttab_transfer_t *trans;
 362         gnttab_copy_t *copy;
 363         multicall_entry_t *mcl;
 364         struct netbk_rx_meta *meta;
 365 };
 366
 367 /* Set up the grant operations for this fragment.  If it's a flipping
 368    interface, we also set up the unmap request from here. */
 369 static u16 netbk_gop_frag(netif_t *netif, struct netbk_rx_meta *meta,
 370                           int i, struct netrx_pending_operations *npo,
 371                           struct page *page, unsigned long size,
 372                           unsigned long offset)
 373 {
 374         mmu_update_t *mmu;
 375         gnttab_transfer_t *gop;
 376         gnttab_copy_t *copy_gop;
 377         multicall_entry_t *mcl;
 378         netif_rx_request_t *req;
 379         unsigned long old_mfn, new_mfn;
 380
 381         old_mfn = virt_to_mfn(page_address(page));
 382
 383         req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons + i);
 384         if (netif->copying_receiver) {
 385                 /* The fragment needs to be copied rather than
 386                    flipped. */
 387                 meta->copy = 1;
 388                 copy_gop = npo->copy + npo->copy_prod++;
 389                 copy_gop->flags = GNTCOPY_dest_gref;
 390                 if (PageForeign(page)) {
 391                         struct pending_tx_info *src_pend =
 392                                 &pending_tx_info[page->index];
 393                         copy_gop->source.domid = src_pend->netif->domid;
 394                         copy_gop->source.u.ref = src_pend->req.gref;
 395                         copy_gop->flags |= GNTCOPY_source_gref;
 396                 } else {
 397                         copy_gop->source.domid = DOMID_SELF;
 398                         copy_gop->source.u.gmfn = old_mfn;
 399                 }
 400                 copy_gop->source.offset = offset;
 401                 copy_gop->dest.domid = netif->domid;
 402                 copy_gop->dest.offset = 0;
 403                 copy_gop->dest.u.ref = req->gref;
 404                 copy_gop->len = size;
 405         } else {
 406                 meta->copy = 0;
 407                 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
 408                         new_mfn = alloc_mfn();
 409
 410                         /*
 411                          * Set the new P2M table entry before
 412                          * reassigning the old data page. Heed the
 413                          * comment in pgtable-2level.h:pte_page(). :-)
 414                          */
 415                         set_phys_to_machine(page_to_pfn(page), new_mfn);
 416
 417                         mcl = npo->mcl + npo->mcl_prod++;
 418                         MULTI_update_va_mapping(mcl,
 419                                              (unsigned long)page_address(page),
 420                                              pfn_pte_ma(new_mfn, PAGE_KERNEL),
 421                                              0);
 422
 423                         mmu = npo->mmu + npo->mmu_prod++;
 424                         mmu->ptr = ((maddr_t)new_mfn << PAGE_SHIFT) |
 425                                 MMU_MACHPHYS_UPDATE;
 426                         mmu->val = page_to_pfn(page);
 427                 }
 428
 429                 gop = npo->trans + npo->trans_prod++;
 430                 gop->mfn = old_mfn;
 431                 gop->domid = netif->domid;
 432                 gop->ref = req->gref;
 433         }
 434         return req->id;
 435 }
 436
 437 static void netbk_gop_skb(struct sk_buff *skb,
 438                           struct netrx_pending_operations *npo)
 439 {
 440         netif_t *netif = netdev_priv(skb->dev);
 441         int nr_frags = skb_shinfo(skb)->nr_frags;
 442         int i;
 443         int extra;
 444         struct netbk_rx_meta *head_meta, *meta;
 445
 446         head_meta = npo->meta + npo->meta_prod++;
 447         head_meta->frag.page_offset = skb_shinfo(skb)->gso_type;
 448         head_meta->frag.size = skb_shinfo(skb)->gso_size;
 449         extra = !!head_meta->frag.size + 1;
 450
 451         for (i = 0; i < nr_frags; i++) {
 452                 meta = npo->meta + npo->meta_prod++;
 453                 meta->frag = skb_shinfo(skb)->frags[i];
 454                 meta->id = netbk_gop_frag(netif, meta, i + extra, npo,
 455                                           meta->frag.page,
 456                                           meta->frag.size,
 457                                           meta->frag.page_offset);
 458         }
 459
 460         /*
 461          * This must occur at the end to ensure that we don't trash
 462          * skb_shinfo until we're done.
 463          */
 464         head_meta->id = netbk_gop_frag(netif, head_meta, 0, npo,
 465                                        virt_to_page(skb->data),
 466                                        skb_headlen(skb),
 467                                        offset_in_page(skb->data));
 468
 469         netif->rx.req_cons += nr_frags + extra;
 470 }
 471
 472 static inline void netbk_free_pages(int nr_frags, struct netbk_rx_meta *meta)
 473 {
 474         int i;
 475
 476         for (i = 0; i < nr_frags; i++)
 477                 put_page(meta[i].frag.page);
 478 }
 479
 480 /* This is a twin to netbk_gop_skb.  Assume that netbk_gop_skb was
 481    used to set up the operations on the top of
 482    netrx_pending_operations, which have since been done.  Check that
 483    they didn't give any errors and advance over them. */
 484 static int netbk_check_gop(int nr_frags, domid_t domid,
 485                            struct netrx_pending_operations *npo)
 486 {
 487         multicall_entry_t *mcl;
 488         gnttab_transfer_t *gop;
 489         gnttab_copy_t     *copy_op;
 490         int status = NETIF_RSP_OKAY;
 491         int i;
 492
 493         for (i = 0; i <= nr_frags; i++) {
 494                 if (npo->meta[npo->meta_cons + i].copy) {
 495                         copy_op = npo->copy + npo->copy_cons++;
 496                         if (copy_op->status != GNTST_okay) {
 497                                 DPRINTK("Bad status %d from copy to DOM%d.\n",
 498                                         copy_op->status, domid);
 499                                 status = NETIF_RSP_ERROR;
 500                         }
 501                 } else {
 502                         if (!xen_feature(XENFEAT_auto_translated_physmap)) {
 503                                 mcl = npo->mcl + npo->mcl_cons++;
 504                                 /* The update_va_mapping() must not fail. */
 505                                 BUG_ON(mcl->result != 0);
 506                         }
 507
 508                         gop = npo->trans + npo->trans_cons++;
 509                         /* Check the reassignment error code. */
 510                         if (gop->status != 0) {
 511                                 DPRINTK("Bad status %d from grant transfer to DOM%u\n",
 512                                         gop->status, domid);
 513                                 /*
 514                                  * Page no longer belongs to us unless
 515                                  * GNTST_bad_page, but that should be
 516                                  * a fatal error anyway.
 517                                  */
 518                                 BUG_ON(gop->status == GNTST_bad_page);
 519                                 status = NETIF_RSP_ERROR;
 520                         }
 521                 }
 522         }
 523
 524         return status;
 525 }
 526
 527 static void netbk_add_frag_responses(netif_t *netif, int status,
 528                                      struct netbk_rx_meta *meta, int nr_frags)
 529 {
 530         int i;
 531         unsigned long offset;
 532
 533         for (i = 0; i < nr_frags; i++) {
 534                 int id = meta[i].id;
 535                 int flags = (i == nr_frags - 1) ? 0 : NETRXF_more_data;
 536
 537                 if (meta[i].copy)
 538                         offset = 0;
 539                 else
 540                         offset = meta[i].frag.page_offset;
 541                 make_rx_response(netif, id, status, offset,
 542                                  meta[i].frag.size, flags);
 543         }
 544 }
 545
 546 static void net_rx_action(unsigned long unused)
 547 {
 548         netif_t *netif = NULL;
 549         s8 status;
 550         u16 id, irq, flags;
 551         netif_rx_response_t *resp;
 552         multicall_entry_t *mcl;
 553         struct sk_buff_head rxq;
 554         struct sk_buff *skb;
 555         int notify_nr = 0;
 556         int ret;
 557         int nr_frags;
 558         int count;
 559         unsigned long offset;
 560
 561         /*
 562          * Putting hundreds of bytes on the stack is considered rude.
 563          * Static works because a tasklet can only be on one CPU at any time.
 564          */
 565         static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+3];
 566         static mmu_update_t rx_mmu[NET_RX_RING_SIZE];
 567         static gnttab_transfer_t grant_trans_op[NET_RX_RING_SIZE];
 568         static gnttab_copy_t grant_copy_op[NET_RX_RING_SIZE];
 569         static unsigned char rx_notify[NR_IRQS];
 570         static u16 notify_list[NET_RX_RING_SIZE];
 571         static struct netbk_rx_meta meta[NET_RX_RING_SIZE];
 572
 573         struct netrx_pending_operations npo = {
 574                 mmu: rx_mmu,
 575                 trans: grant_trans_op,
 576                 copy: grant_copy_op,
 577                 mcl: rx_mcl,
 578                 meta: meta};
 579
 580         skb_queue_head_init(&rxq);
 581
 582         count = 0;
 583
 584         while ((skb = skb_dequeue(&rx_queue)) != NULL) {
 585                 nr_frags = skb_shinfo(skb)->nr_frags;
 586                 *(int *)skb->cb = nr_frags;
 587
 588                 if (!xen_feature(XENFEAT_auto_translated_physmap) &&
 589                     check_mfn(nr_frags + 1)) {
 590                         /* Memory squeeze? Back off for an arbitrary while. */
 591                         if ( net_ratelimit() )
 592                                 WPRINTK("Memory squeeze in netback "
 593                                         "driver.\n");
 594                         mod_timer(&net_timer, jiffies + HZ);
 595                         skb_queue_head(&rx_queue, skb);
 596                         break;
 597                 }
 598
 599                 netbk_gop_skb(skb, &npo);
 600
 601                 count += nr_frags + 1;
 602
 603                 __skb_queue_tail(&rxq, skb);
 604
 605                 /* Filled the batch queue? */
 606                 if (count + MAX_SKB_FRAGS >= NET_RX_RING_SIZE)
 607                         break;
 608         }
 609
 610         if (npo.mcl_prod &&
 611             !xen_feature(XENFEAT_auto_translated_physmap)) {
 612                 mcl = npo.mcl + npo.mcl_prod++;
 613
 614                 BUG_ON(mcl[-1].op != __HYPERVISOR_update_va_mapping);
 615                 mcl[-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
 616
 617                 mcl->op = __HYPERVISOR_mmu_update;
 618                 mcl->args[0] = (unsigned long)rx_mmu;
 619                 mcl->args[1] = npo.mmu_prod;
 620                 mcl->args[2] = 0;
 621                 mcl->args[3] = DOMID_SELF;
 622         }
 623
 624         if (npo.trans_prod) {
 625                 mcl = npo.mcl + npo.mcl_prod++;
 626                 mcl->op = __HYPERVISOR_grant_table_op;
 627                 mcl->args[0] = GNTTABOP_transfer;
 628                 mcl->args[1] = (unsigned long)grant_trans_op;
 629                 mcl->args[2] = npo.trans_prod;
 630         }
 631
 632         if (npo.copy_prod) {
 633                 mcl = npo.mcl + npo.mcl_prod++;
 634                 mcl->op = __HYPERVISOR_grant_table_op;
 635                 mcl->args[0] = GNTTABOP_copy;
 636                 mcl->args[1] = (unsigned long)grant_copy_op;
 637                 mcl->args[2] = npo.copy_prod;
 638         }
 639
 640         /* Nothing to do? */
 641         if (!npo.mcl_prod)
 642                 return;
 643
 644         BUG_ON(npo.copy_prod > NET_RX_RING_SIZE);
 645         BUG_ON(npo.mmu_prod > NET_RX_RING_SIZE);
 646         BUG_ON(npo.trans_prod > NET_RX_RING_SIZE);
 647         BUG_ON(npo.mcl_prod > NET_RX_RING_SIZE+3);
 648         BUG_ON(npo.meta_prod > NET_RX_RING_SIZE);
 649
 650         ret = HYPERVISOR_multicall(npo.mcl, npo.mcl_prod);
 651         BUG_ON(ret != 0);
 652
 653         while ((skb = __skb_dequeue(&rxq)) != NULL) {
 654                 nr_frags = *(int *)skb->cb;
 655
 656                 netif = netdev_priv(skb->dev);
 657                 /* We can't rely on skb_release_data to release the
 658                    pages used by fragments for us, since it tries to
 659                    touch the pages in the fraglist.  If we're in
 660                    flipping mode, that doesn't work.  In copying mode,
 661                    we still have access to all of the pages, and so
 662                    it's safe to let release_data deal with it. */
 663                 /* (Freeing the fragments is safe since we copy
 664                    non-linear skbs destined for flipping interfaces) */
 665                 if (!netif->copying_receiver) {
 666                         atomic_set(&(skb_shinfo(skb)->dataref), 1);
 667                         skb_shinfo(skb)->frag_list = NULL;
 668                         skb_shinfo(skb)->nr_frags = 0;
 669                         netbk_free_pages(nr_frags, meta + npo.meta_cons + 1);
 670                 }
 671
 672                 netif->stats.tx_bytes += skb->len;
 673                 netif->stats.tx_packets++;
 674
 675                 status = netbk_check_gop(nr_frags, netif->domid, &npo);
 676
 677                 id = meta[npo.meta_cons].id;
 678                 flags = nr_frags ? NETRXF_more_data : 0;
 679
 680                 if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
 681                         flags |= NETRXF_csum_blank | NETRXF_data_validated;
 682                 else if (skb->proto_data_valid) /* remote but checksummed? */
 683                         flags |= NETRXF_data_validated;
 684
 685                 if (meta[npo.meta_cons].copy)
 686                         offset = 0;
 687                 else
 688                         offset = offset_in_page(skb->data);
 689                 resp = make_rx_response(netif, id, status, offset,
 690                                         skb_headlen(skb), flags);
 691
 692                 if (meta[npo.meta_cons].frag.size) {
 693                         struct netif_extra_info *gso =
 694                                 (struct netif_extra_info *)
 695                                 RING_GET_RESPONSE(&netif->rx,
 696                                                   netif->rx.rsp_prod_pvt++);
 697
 698                         resp->flags |= NETRXF_extra_info;
 699
 700                         gso->u.gso.size = meta[npo.meta_cons].frag.size;
 701                         gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
 702                         gso->u.gso.pad = 0;
 703                         gso->u.gso.features = 0;
 704
 705                         gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
 706                         gso->flags = 0;
 707                 }
 708
 709                 netbk_add_frag_responses(netif, status,
 710                                          meta + npo.meta_cons + 1,
 711                                          nr_frags);
 712
 713                 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret);
 714                 irq = netif->irq;
 715                 if (ret && !rx_notify[irq]) {
 716                         rx_notify[irq] = 1;
 717                         notify_list[notify_nr++] = irq;
 718                 }
 719
 720                 if (netif_queue_stopped(netif->dev) &&
 721                     netif_schedulable(netif->dev) &&
 722                     !netbk_queue_full(netif))
 723                         netif_wake_queue(netif->dev);
 724
 725                 netif_put(netif);
 726                 dev_kfree_skb(skb);
 727                 npo.meta_cons += nr_frags + 1;
 728         }
 729
 730         while (notify_nr != 0) {
 731                 irq = notify_list[--notify_nr];
 732                 rx_notify[irq] = 0;
 733                 notify_remote_via_irq(irq);
 734         }
 735
 736         /* More work to do? */
 737         if (!skb_queue_empty(&rx_queue) && !timer_pending(&net_timer))
 738                 tasklet_schedule(&net_rx_tasklet);
 739 #if 0
 740         else
 741                 xen_network_done_notify();
 742 #endif
 743 }
 744
 745 static void net_alarm(unsigned long unused)
 746 {
 747         tasklet_schedule(&net_rx_tasklet);
 748 }
 749
 750 struct net_device_stats *netif_be_get_stats(struct net_device *dev)
 751 {
 752         netif_t *netif = netdev_priv(dev);
 753         return &netif->stats;
 754 }
 755
 756 static int __on_net_schedule_list(netif_t *netif)
 757 {
 758         return netif->list.next != NULL;
 759 }
 760
 761 static void remove_from_net_schedule_list(netif_t *netif)
 762 {
 763         spin_lock_irq(&net_schedule_list_lock);
 764         if (likely(__on_net_schedule_list(netif))) {
 765                 list_del(&netif->list);
 766                 netif->list.next = NULL;
 767                 netif_put(netif);
 768         }
 769         spin_unlock_irq(&net_schedule_list_lock);
 770 }
 771
 772 static void add_to_net_schedule_list_tail(netif_t *netif)
 773 {
 774         if (__on_net_schedule_list(netif))
 775                 return;
 776
 777         spin_lock_irq(&net_schedule_list_lock);
 778         if (!__on_net_schedule_list(netif) &&
 779             likely(netif_schedulable(netif->dev))) {
 780                 list_add_tail(&netif->list, &net_schedule_list);
 781                 netif_get(netif);
 782         }
 783         spin_unlock_irq(&net_schedule_list_lock);
 784 }
 785
 786 /*
 787  * Note on CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER:
 788  * If this driver is pipelining transmit requests then we can be very
 789  * aggressive in avoiding new-packet notifications -- frontend only needs to
 790  * send a notification if there are no outstanding unreceived responses.
 791  * If we may be buffer transmit buffers for any reason then we must be rather
 792  * more conservative and treat this as the final check for pending work.
 793  */
 794 void netif_schedule_work(netif_t *netif)
 795 {
 796         int more_to_do;
 797
 798 #ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
 799         more_to_do = RING_HAS_UNCONSUMED_REQUESTS(&netif->tx);
 800 #else
 801         RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
 802 #endif
 803
 804         if (more_to_do) {
 805                 add_to_net_schedule_list_tail(netif);
 806                 maybe_schedule_tx_action();
 807         }
 808 }
 809
 810 void netif_deschedule_work(netif_t *netif)
 811 {
 812         remove_from_net_schedule_list(netif);
 813 }
 814
 815
 816 static void tx_add_credit(netif_t *netif)
 817 {
 818         unsigned long max_burst, max_credit;
 819
 820         /*
 821          * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
 822          * Otherwise the interface can seize up due to insufficient credit.
 823          */
 824         max_burst = RING_GET_REQUEST(&netif->tx, netif->tx.req_cons)->size;
 825         max_burst = min(max_burst, 131072UL);
 826         max_burst = max(max_burst, netif->credit_bytes);
 827
 828         /* Take care that adding a new chunk of credit doesn't wrap to zero. */
 829         max_credit = netif->remaining_credit + netif->credit_bytes;
 830         if (max_credit < netif->remaining_credit)
 831                 max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */
 832
 833         netif->remaining_credit = min(max_credit, max_burst);
 834 }
 835
 836 static void tx_credit_callback(unsigned long data)
 837 {
 838         netif_t *netif = (netif_t *)data;
 839         tx_add_credit(netif);
 840         netif_schedule_work(netif);
 841 }
 842
 843 inline static void net_tx_action_dealloc(void)
 844 {
 845         gnttab_unmap_grant_ref_t *gop;
 846         u16 pending_idx;
 847         PEND_RING_IDX dc, dp;
 848         netif_t *netif;
 849         int ret;
 850
 851         dc = dealloc_cons;
 852         dp = dealloc_prod;
 853
 854         /* Ensure we see all indexes enqueued by netif_idx_release(). */
 855         smp_rmb();
 856
 857         /*
 858          * Free up any grants we have finished using
 859          */
 860         gop = tx_unmap_ops;
 861         while (dc != dp) {
 862                 pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
 863                 gnttab_set_unmap_op(gop, idx_to_kaddr(pending_idx),
 864                                     GNTMAP_host_map,
 865                                     grant_tx_handle[pending_idx]);
 866                 gop++;
 867         }
 868         ret = HYPERVISOR_grant_table_op(
 869                 GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops);
 870         BUG_ON(ret);
 871
 872         while (dealloc_cons != dp) {
 873                 pending_idx = dealloc_ring[MASK_PEND_IDX(dealloc_cons++)];
 874
 875                 netif = pending_tx_info[pending_idx].netif;
 876
 877                 make_tx_response(netif, &pending_tx_info[pending_idx].req,
 878                                  NETIF_RSP_OKAY);
 879
 880                 pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
 881
 882                 netif_put(netif);
 883         }
 884 }
 885
 886 static void netbk_tx_err(netif_t *netif, netif_tx_request_t *txp, RING_IDX end)
 887 {
 888         RING_IDX cons = netif->tx.req_cons;
 889
 890         do {
 891                 make_tx_response(netif, txp, NETIF_RSP_ERROR);
 892                 if (cons >= end)
 893                         break;
 894                 txp = RING_GET_REQUEST(&netif->tx, cons++);
 895         } while (1);
 896         netif->tx.req_cons = cons;
 897         netif_schedule_work(netif);
 898         netif_put(netif);
 899 }
 900
 901 static int netbk_count_requests(netif_t *netif, netif_tx_request_t *first,
 902                                 netif_tx_request_t *txp, int work_to_do)
 903 {
 904         RING_IDX cons = netif->tx.req_cons;
 905         int frags = 0;
 906
 907         if (!(first->flags & NETTXF_more_data))
 908                 return 0;
 909
 910         do {
 911                 if (frags >= work_to_do) {
 912                         DPRINTK("Need more frags\n");
 913                         return -frags;
 914                 }
 915
 916                 if (unlikely(frags >= MAX_SKB_FRAGS)) {
 917                         DPRINTK("Too many frags\n");
 918                         return -frags;
 919                 }
 920
 921                 memcpy(txp, RING_GET_REQUEST(&netif->tx, cons + frags),
 922                        sizeof(*txp));
 923                 if (txp->size > first->size) {
 924                         DPRINTK("Frags galore\n");
 925                         return -frags;
 926                 }
 927
 928                 first->size -= txp->size;
 929                 frags++;
 930
 931                 if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {
 932                         DPRINTK("txp->offset: %x, size: %u\n",
 933                                 txp->offset, txp->size);
 934                         return -frags;
 935                 }
 936         } while ((txp++)->flags & NETTXF_more_data);
 937
 938         return frags;
 939 }
 940
 941 static gnttab_map_grant_ref_t *netbk_get_requests(netif_t *netif,
 942                                                   struct sk_buff *skb,
 943                                                   netif_tx_request_t *txp,
 944                                                   gnttab_map_grant_ref_t *mop)
 945 {
 946         struct skb_shared_info *shinfo = skb_shinfo(skb);
 947         skb_frag_t *frags = shinfo->frags;
 948         unsigned long pending_idx = *((u16 *)skb->data);
 949         int i, start;
 950
 951         /* Skip first skb fragment if it is on same page as header fragment. */
 952         start = ((unsigned long)shinfo->frags[0].page == pending_idx);
 953
 954         for (i = start; i < shinfo->nr_frags; i++, txp++) {
 955                 pending_idx = pending_ring[MASK_PEND_IDX(pending_cons++)];
 956
 957                 gnttab_set_map_op(mop++, idx_to_kaddr(pending_idx),
 958                                   GNTMAP_host_map | GNTMAP_readonly,
 959                                   txp->gref, netif->domid);
 960
 961                 memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
 962                 netif_get(netif);
 963                 pending_tx_info[pending_idx].netif = netif;
 964                 frags[i].page = (void *)pending_idx;
 965         }
 966
 967         return mop;
 968 }
 969
 970 static int netbk_tx_check_mop(struct sk_buff *skb,
 971                                gnttab_map_grant_ref_t **mopp)
 972 {
 973         gnttab_map_grant_ref_t *mop = *mopp;
 974         int pending_idx = *((u16 *)skb->data);
 975         netif_t *netif = pending_tx_info[pending_idx].netif;
 976         netif_tx_request_t *txp;
 977         struct skb_shared_info *shinfo = skb_shinfo(skb);
 978         int nr_frags = shinfo->nr_frags;
 979         int i, err, start;
 980
 981         /* Check status of header. */
 982         err = mop->status;
 983         if (unlikely(err)) {
 984                 txp = &pending_tx_info[pending_idx].req;
 985                 make_tx_response(netif, txp, NETIF_RSP_ERROR);
 986                 pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
 987                 netif_put(netif);
 988         } else {
 989                 set_phys_to_machine(
 990                         __pa(idx_to_kaddr(pending_idx)) >> PAGE_SHIFT,
 991                         FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
 992                 grant_tx_handle[pending_idx] = mop->handle;
 993         }
 994
 995         /* Skip first skb fragment if it is on same page as header fragment. */
 996         start = ((unsigned long)shinfo->frags[0].page == pending_idx);
 997
 998         for (i = start; i < nr_frags; i++) {
 999                 int j, newerr;
1000
1001                 pending_idx = (unsigned long)shinfo->frags[i].page;
1002
1003                 /* Check error status: if okay then remember grant handle. */
1004                 newerr = (++mop)->status;
1005                 if (likely(!newerr)) {
1006                         set_phys_to_machine(
1007                                 __pa(idx_to_kaddr(pending_idx))>>PAGE_SHIFT,
1008                                 FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT));
1009                         grant_tx_handle[pending_idx] = mop->handle;
1010                         /* Had a previous error? Invalidate this fragment. */
1011                         if (unlikely(err))
1012                                 netif_idx_release(pending_idx);
1013                         continue;
1014                 }
1015
1016                 /* Error on this fragment: respond to client with an error. */
1017                 txp = &pending_tx_info[pending_idx].req;
1018                 make_tx_response(netif, txp, NETIF_RSP_ERROR);
1019                 pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
1020                 netif_put(netif);
1021
1022                 /* Not the first error? Preceding frags already invalidated. */
1023                 if (err)
1024                         continue;
1025
1026                 /* First error: invalidate header and preceding fragments. */
1027                 pending_idx = *((u16 *)skb->data);
1028                 netif_idx_release(pending_idx);
1029                 for (j = start; j < i; j++) {
1030                         pending_idx = (unsigned long)shinfo->frags[i].page;
1031                         netif_idx_release(pending_idx);
1032                 }
1033
1034                 /* Remember the error: invalidate all subsequent fragments. */
1035                 err = newerr;
1036         }
1037
1038         *mopp = mop + 1;
1039         return err;
1040 }
1041
1042 static void netbk_fill_frags(struct sk_buff *skb)
1043 {
1044         struct skb_shared_info *shinfo = skb_shinfo(skb);
1045         int nr_frags = shinfo->nr_frags;
1046         int i;
1047
1048         for (i = 0; i < nr_frags; i++) {
1049                 skb_frag_t *frag = shinfo->frags + i;
1050                 netif_tx_request_t *txp;
1051                 unsigned long pending_idx;
1052
1053                 pending_idx = (unsigned long)frag->page;
1054                 txp = &pending_tx_info[pending_idx].req;
1055                 frag->page = virt_to_page(idx_to_kaddr(pending_idx));
1056                 frag->size = txp->size;
1057                 frag->page_offset = txp->offset;
1058
1059                 skb->len += txp->size;
1060                 skb->data_len += txp->size;
1061                 skb->truesize += txp->size;
1062         }
1063 }
1064
1065 int netbk_get_extras(netif_t *netif, struct netif_extra_info *extras,
1066                      int work_to_do)
1067 {
1068         struct netif_extra_info extra;
1069         RING_IDX cons = netif->tx.req_cons;
1070
1071         do {
1072                 if (unlikely(work_to_do-- <= 0)) {
1073                         DPRINTK("Missing extra info\n");
1074                         return -EBADR;
1075                 }
1076
1077                 memcpy(&extra, RING_GET_REQUEST(&netif->tx, cons),
1078                        sizeof(extra));
1079                 if (unlikely(!extra.type ||
1080                              extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
1081                         netif->tx.req_cons = ++cons;
1082                         DPRINTK("Invalid extra type: %d\n", extra.type);
1083                         return -EINVAL;
1084                 }
1085
1086                 memcpy(&extras[extra.type - 1], &extra, sizeof(extra));
1087                 netif->tx.req_cons = ++cons;
1088         } while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
1089
1090         return work_to_do;
1091 }
1092
1093 static int netbk_set_skb_gso(struct sk_buff *skb, struct netif_extra_info *gso)
1094 {
1095         if (!gso->u.gso.size) {
1096                 DPRINTK("GSO size must not be zero.\n");
1097                 return -EINVAL;
1098         }
1099
1100         /* Currently only TCPv4 S.O. is supported. */
1101         if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
1102                 DPRINTK("Bad GSO type %d.\n", gso->u.gso.type);
1103                 return -EINVAL;
1104         }
1105
1106         skb_shinfo(skb)->gso_size = gso->u.gso.size;
1107         skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
1108
1109         /* Header must be checked, and gso_segs computed. */
1110         skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
1111         skb_shinfo(skb)->gso_segs = 0;
1112
1113         return 0;
1114 }
1115
1116 /* Called after netfront has transmitted */
1117 static void net_tx_action(unsigned long unused)
1118 {
1119         struct list_head *ent;
1120         struct sk_buff *skb;
1121         netif_t *netif;
1122         netif_tx_request_t txreq;
1123         netif_tx_request_t txfrags[MAX_SKB_FRAGS];
1124         struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
1125         u16 pending_idx;
1126         RING_IDX i;
1127         gnttab_map_grant_ref_t *mop;
1128         unsigned int data_len;
1129         int ret, work_to_do;
1130
1131         if (dealloc_cons != dealloc_prod)
1132                 net_tx_action_dealloc();
1133
1134         mop = tx_map_ops;
1135         while (((NR_PENDING_REQS + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
1136                 !list_empty(&net_schedule_list)) {
1137                 /* Get a netif from the list with work to do. */
1138                 ent = net_schedule_list.next;
1139                 netif = list_entry(ent, netif_t, list);
1140                 netif_get(netif);
1141                 remove_from_net_schedule_list(netif);
1142
1143                 RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do);
1144                 if (!work_to_do) {
1145                         netif_put(netif);
1146                         continue;
1147                 }
1148
1149                 i = netif->tx.req_cons;
1150                 rmb(); /* Ensure that we see the request before we copy it. */
1151                 memcpy(&txreq, RING_GET_REQUEST(&netif->tx, i), sizeof(txreq));
1152
1153                 /* Credit-based scheduling. */
1154                 if (txreq.size > netif->remaining_credit) {
1155                         unsigned long now = jiffies;
1156                         unsigned long next_credit =
1157                                 netif->credit_timeout.expires +
1158                                 msecs_to_jiffies(netif->credit_usec / 1000);
1159
1160                         /* Timer could already be pending in rare cases. */
1161                         if (timer_pending(&netif->credit_timeout)) {
1162                                 netif_put(netif);
1163                                 continue;
1164                         }
1165
1166                         /* Passed the point where we can replenish credit? */
1167                         if (time_after_eq(now, next_credit)) {
1168                                 netif->credit_timeout.expires = now;
1169                                 tx_add_credit(netif);
1170                         }
1171
1172                         /* Still too big to send right now? Set a callback. */
1173                         if (txreq.size > netif->remaining_credit) {
1174                                 netif->credit_timeout.data     =
1175                                         (unsigned long)netif;
1176                                 netif->credit_timeout.function =
1177                                         tx_credit_callback;
1178                                 __mod_timer(&netif->credit_timeout,
1179                                             next_credit);
1180                                 netif_put(netif);
1181                                 continue;
1182                         }
1183                 }
1184                 netif->remaining_credit -= txreq.size;
1185
1186                 work_to_do--;
1187                 netif->tx.req_cons = ++i;
1188
1189                 memset(extras, 0, sizeof(extras));
1190                 if (txreq.flags & NETTXF_extra_info) {
1191                         work_to_do = netbk_get_extras(netif, extras,
1192                                                       work_to_do);
1193                         i = netif->tx.req_cons;
1194                         if (unlikely(work_to_do < 0)) {
1195                                 netbk_tx_err(netif, &txreq, i);
1196                                 continue;
1197                         }
1198                 }
1199
1200                 ret = netbk_count_requests(netif, &txreq, txfrags, work_to_do);
1201                 if (unlikely(ret < 0)) {
1202                         netbk_tx_err(netif, &txreq, i - ret);
1203                         continue;
1204                 }
1205                 i += ret;
1206
1207                 if (unlikely(txreq.size < ETH_HLEN)) {
1208                         DPRINTK("Bad packet size: %d\n", txreq.size);
1209                         netbk_tx_err(netif, &txreq, i);
1210                         continue;
1211                 }
1212
1213                 /* No crossing a page as the payload mustn't fragment. */
1214                 if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {
1215                         DPRINTK("txreq.offset: %x, size: %u, end: %lu\n",
1216                                 txreq.offset, txreq.size,
1217                                 (txreq.offset &~PAGE_MASK) + txreq.size);
1218                         netbk_tx_err(netif, &txreq, i);
1219                         continue;
1220                 }
1221
1222                 pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
1223
1224                 data_len = (txreq.size > PKT_PROT_LEN &&
1225                             ret < MAX_SKB_FRAGS) ?
1226                         PKT_PROT_LEN : txreq.size;
1227
1228                 skb = alloc_skb(data_len+16, GFP_ATOMIC);
1229                 if (unlikely(skb == NULL)) {
1230                         DPRINTK("Can't allocate a skb in start_xmit.\n");
1231                         netbk_tx_err(netif, &txreq, i);
1232                         break;
1233                 }
1234
1235                 /* Packets passed to netif_rx() must have some headroom. */
1236                 skb_reserve(skb, 16);
1237
1238                 if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
1239                         struct netif_extra_info *gso;
1240                         gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
1241
1242                         if (netbk_set_skb_gso(skb, gso)) {
1243                                 kfree_skb(skb);
1244                                 netbk_tx_err(netif, &txreq, i);
1245                                 continue;
1246                         }
1247                 }
1248
1249                 gnttab_set_map_op(mop, idx_to_kaddr(pending_idx),
1250                                   GNTMAP_host_map | GNTMAP_readonly,
1251                                   txreq.gref, netif->domid);
1252                 mop++;
1253
1254                 memcpy(&pending_tx_info[pending_idx].req,
1255                        &txreq, sizeof(txreq));
1256                 pending_tx_info[pending_idx].netif = netif;
1257                 *((u16 *)skb->data) = pending_idx;
1258
1259                 __skb_put(skb, data_len);
1260
1261                 skb_shinfo(skb)->nr_frags = ret;
1262                 if (data_len < txreq.size) {
1263                         skb_shinfo(skb)->nr_frags++;
1264                         skb_shinfo(skb)->frags[0].page =
1265                                 (void *)(unsigned long)pending_idx;
1266                 } else {
1267                         /* Discriminate from any valid pending_idx value. */
1268                         skb_shinfo(skb)->frags[0].page = (void *)~0UL;
1269                 }
1270
1271                 __skb_queue_tail(&tx_queue, skb);
1272
1273                 pending_cons++;
1274
1275                 mop = netbk_get_requests(netif, skb, txfrags, mop);
1276
1277                 netif->tx.req_cons = i;
1278                 netif_schedule_work(netif);
1279
1280                 if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops))
1281                         break;
1282         }
1283
1284         if (mop == tx_map_ops)
1285                 return;
1286
1287         ret = HYPERVISOR_grant_table_op(
1288                 GNTTABOP_map_grant_ref, tx_map_ops, mop - tx_map_ops);
1289         BUG_ON(ret);
1290
1291         mop = tx_map_ops;
1292         while ((skb = __skb_dequeue(&tx_queue)) != NULL) {
1293                 netif_tx_request_t *txp;
1294
1295                 pending_idx = *((u16 *)skb->data);
1296                 netif       = pending_tx_info[pending_idx].netif;
1297                 txp         = &pending_tx_info[pending_idx].req;
1298
1299                 /* Check the remap error code. */
1300                 if (unlikely(netbk_tx_check_mop(skb, &mop))) {
1301                         printk(KERN_ALERT "#### netback grant fails\n");
1302                         skb_shinfo(skb)->nr_frags = 0;
1303                         kfree_skb(skb);
1304                         continue;
1305                 }
1306
1307                 data_len = skb->len;
1308                 memcpy(skb->data,
1309                        (void *)(idx_to_kaddr(pending_idx)|txp->offset),
1310                        data_len);
1311                 if (data_len < txp->size) {
1312                         /* Append the packet payload as a fragment. */
1313                         txp->offset += data_len;
1314                         txp->size -= data_len;
1315                 } else {
1316                         /* Schedule a response immediately. */
1317                         netif_idx_release(pending_idx);
1318                 }
1319
1320                 /*
1321                  * Old frontends do not assert data_validated but we
1322                  * can infer it from csum_blank so test both flags.
1323                  */
1324                 if (txp->flags & (NETTXF_data_validated|NETTXF_csum_blank)) {
1325                         skb->ip_summed = CHECKSUM_UNNECESSARY;
1326                         skb->proto_data_valid = 1;
1327                 } else {
1328                         skb->ip_summed = CHECKSUM_NONE;
1329                         skb->proto_data_valid = 0;
1330                 }
1331                 skb->proto_csum_blank = !!(txp->flags & NETTXF_csum_blank);
1332
1333                 netbk_fill_frags(skb);
1334
1335                 skb->dev      = netif->dev;
1336                 skb->protocol = eth_type_trans(skb, skb->dev);
1337
1338                 netif->stats.rx_bytes += skb->len;
1339                 netif->stats.rx_packets++;
1340
1341                 netif_rx(skb);
1342                 netif->dev->last_rx = jiffies;
1343         }
1344 }
1345
1346 static void netif_idx_release(u16 pending_idx)
1347 {
1348         static DEFINE_SPINLOCK(_lock);
1349         unsigned long flags;
1350
1351         spin_lock_irqsave(&_lock, flags);
1352         dealloc_ring[MASK_PEND_IDX(dealloc_prod)] = pending_idx;
1353         /* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */
1354         smp_wmb();
1355         dealloc_prod++;
1356         spin_unlock_irqrestore(&_lock, flags);
1357
1358         tasklet_schedule(&net_tx_tasklet);
1359 }
1360
1361 static void netif_page_release(struct page *page)
1362 {
1363         /* Ready for next use. */
1364         init_page_count(page);
1365
1366         netif_idx_release(page->index);
1367 }
1368
1369 irqreturn_t netif_be_int(int irq, void *dev_id)
1370 {
1371         netif_t *netif = dev_id;
1372
1373         add_to_net_schedule_list_tail(netif);
1374         maybe_schedule_tx_action();
1375
1376         if (netif_schedulable(netif->dev) && !netbk_queue_full(netif))
1377                 netif_wake_queue(netif->dev);
1378
1379         return IRQ_HANDLED;
1380 }
1381
1382 static void make_tx_response(netif_t *netif,
1383                              netif_tx_request_t *txp,
1384                              s8       st)
1385 {
1386         RING_IDX i = netif->tx.rsp_prod_pvt;
1387         netif_tx_response_t *resp;
1388         int notify;
1389
1390         resp = RING_GET_RESPONSE(&netif->tx, i);
1391         resp->id     = txp->id;
1392         resp->status = st;
1393
1394         if (txp->flags & NETTXF_extra_info)
1395                 RING_GET_RESPONSE(&netif->tx, ++i)->status = NETIF_RSP_NULL;
1396
1397         netif->tx.rsp_prod_pvt = ++i;
1398         RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify);
1399         if (notify)
1400                 notify_remote_via_irq(netif->irq);
1401
1402 #ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
1403         if (i == netif->tx.req_cons) {
1404                 int more_to_do;
1405                 RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
1406                 if (more_to_do)
1407                         add_to_net_schedule_list_tail(netif);
1408         }
1409 #endif
1410 }
1411
1412 static netif_rx_response_t *make_rx_response(netif_t *netif,
1413                                              u16      id,
1414                                              s8       st,
1415                                              u16      offset,
1416                                              u16      size,
1417                                              u16      flags)
1418 {
1419         RING_IDX i = netif->rx.rsp_prod_pvt;
1420         netif_rx_response_t *resp;
1421
1422         resp = RING_GET_RESPONSE(&netif->rx, i);
1423         resp->offset     = offset;
1424         resp->flags      = flags;
1425         resp->id         = id;
1426         resp->status     = (s16)size;
1427         if (st < 0)
1428                 resp->status = (s16)st;
1429
1430         netif->rx.rsp_prod_pvt = ++i;
1431
1432         return resp;
1433 }
1434
1435 #ifdef NETBE_DEBUG_INTERRUPT
1436 static irqreturn_t netif_be_dbg(int irq, void *dev_id)
1437 {
1438         struct list_head *ent;
1439         netif_t *netif;
1440         int i = 0;
1441
1442         printk(KERN_ALERT "netif_schedule_list:\n");
1443         spin_lock_irq(&net_schedule_list_lock);
1444
1445         list_for_each (ent, &net_schedule_list) {
1446                 netif = list_entry(ent, netif_t, list);
1447                 printk(KERN_ALERT " %d: private(rx_req_cons=%08x "
1448                        "rx_resp_prod=%08x\n",
1449                        i, netif->rx.req_cons, netif->rx.rsp_prod_pvt);
1450                 printk(KERN_ALERT "   tx_req_cons=%08x tx_resp_prod=%08x)\n",
1451                        netif->tx.req_cons, netif->tx.rsp_prod_pvt);
1452                 printk(KERN_ALERT "   shared(rx_req_prod=%08x "
1453                        "rx_resp_prod=%08x\n",
1454                        netif->rx.sring->req_prod, netif->rx.sring->rsp_prod);
1455                 printk(KERN_ALERT "   rx_event=%08x tx_req_prod=%08x\n",
1456                        netif->rx.sring->rsp_event, netif->tx.sring->req_prod);
1457                 printk(KERN_ALERT "   tx_resp_prod=%08x, tx_event=%08x)\n",
1458                        netif->tx.sring->rsp_prod, netif->tx.sring->rsp_event);
1459                 i++;
1460         }
1461
1462         spin_unlock_irq(&net_schedule_list_lock);
1463         printk(KERN_ALERT " ** End of netif_schedule_list **\n");
1464
1465         return IRQ_HANDLED;
1466 }
1467 #endif
1468
1469 static int __init netback_init(void)
1470 {
1471         int i;
1472         struct page *page;
1473
1474         if (!is_running_on_xen())
1475                 return -ENODEV;
1476
1477         /* We can increase reservation by this much in net_rx_action(). */
1478         balloon_update_driver_allowance(NET_RX_RING_SIZE);
1479
1480         skb_queue_head_init(&rx_queue);
1481         skb_queue_head_init(&tx_queue);
1482
1483         init_timer(&net_timer);
1484         net_timer.data = 0;
1485         net_timer.function = net_alarm;
1486
1487         mmap_pages = alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
1488         if (mmap_pages == NULL) {
1489                 printk("%s: out of memory\n", __FUNCTION__);
1490                 return -ENOMEM;
1491         }
1492
1493         for (i = 0; i < MAX_PENDING_REQS; i++) {
1494                 page = mmap_pages[i];
1495                 SetPageForeign(page, netif_page_release);
1496                 page->index = i;
1497         }
1498
1499         pending_cons = 0;
1500         pending_prod = MAX_PENDING_REQS;
1501         for (i = 0; i < MAX_PENDING_REQS; i++)
1502                 pending_ring[i] = i;
1503
1504         spin_lock_init(&net_schedule_list_lock);
1505         INIT_LIST_HEAD(&net_schedule_list);
1506
1507         netif_xenbus_init();
1508
1509 #ifdef NETBE_DEBUG_INTERRUPT
1510         (void)bind_virq_to_irqhandler(
1511                 VIRQ_DEBUG,
1512                 0,
1513                 netif_be_dbg,
1514                 SA_SHIRQ,
1515                 "net-be-dbg",
1516                 &netif_be_dbg);
1517 #endif
1518
1519         return 0;
1520 }
1521
1522 module_init(netback_init);
1523
1524 MODULE_LICENSE("Dual BSD/GPL");