drivers/xen/netback/netback.c

   1 /******************************************************************************
   2  * drivers/xen/netback/netback.c
   3  *
   4  * Back-end of the driver for virtual network devices. This portion of the
   5  * driver exports a 'unified' network-device interface that can be accessed
   6  * by any operating system that implements a compatible front end. A
   7  * reference front-end implementation can be found in:
   8  *  drivers/xen/netfront/netfront.c
   9  *
  10  * Copyright (c) 2002-2005, K A Fraser
  11  *
  12  * This program is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU General Public License version 2
  14  * as published by the Free Software Foundation; or, when distributed
  15  * separately from the Linux kernel or incorporated into other
  16  * software packages, subject to the following license:
  17  *
  18  * Permission is hereby granted, free of charge, to any person obtaining a copy
  19  * of this source file (the "Software"), to deal in the Software without
  20  * restriction, including without limitation the rights to use, copy, modify,
  21  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  22  * and to permit persons to whom the Software is furnished to do so, subject to
  23  * the following conditions:
  24  *
  25  * The above copyright notice and this permission notice shall be included in
  26  * all copies or substantial portions of the Software.
  27  *
  28  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  29  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  30  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  31  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  32  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  33  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  34  * IN THE SOFTWARE.
  35  */
  36
  37 #include "common.h"
  38 #include <xen/balloon.h>
  39 #include <xen/interface/memory.h>
  40 #include <asm/page.h>
  41
  42 /*#define NETBE_DEBUG_INTERRUPT*/
  43
  44 struct netbk_rx_meta {
  45         skb_frag_t frag;
  46         int id;
  47         int copy:1;
  48 };
  49
  50 static void netif_idx_release(u16 pending_idx);
  51 static void netif_page_release(struct page *page);
  52 static void make_tx_response(netif_t *netif,
  53                              netif_tx_request_t *txp,
  54                              s8       st);
  55 static netif_rx_response_t *make_rx_response(netif_t *netif,
  56                                              u16      id,
  57                                              s8       st,
  58                                              u16      offset,
  59                                              u16      size,
  60                                              u16      flags);
  61
  62 static void net_tx_action(unsigned long unused);
  63 static DECLARE_TASKLET(net_tx_tasklet, net_tx_action, 0);
  64
  65 static void net_rx_action(unsigned long unused);
  66 static DECLARE_TASKLET(net_rx_tasklet, net_rx_action, 0);
  67
  68 static struct timer_list net_timer;
  69
  70 #define MAX_PENDING_REQS 256
  71
  72 static struct sk_buff_head rx_queue;
  73
  74 static struct page **mmap_pages;
  75 static inline unsigned long idx_to_kaddr(unsigned int idx)
  76 {
  77         return (unsigned long)pfn_to_kaddr(page_to_pfn(mmap_pages[idx]));
  78 }
  79
  80 #define PKT_PROT_LEN 64
  81
  82 static struct pending_tx_info {
  83         netif_tx_request_t req;
  84         netif_t *netif;
  85 } pending_tx_info[MAX_PENDING_REQS];
  86 static u16 pending_ring[MAX_PENDING_REQS];
  87 typedef unsigned int PEND_RING_IDX;
  88 #define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
  89 static PEND_RING_IDX pending_prod, pending_cons;
  90 #define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
  91
  92 /* Freed TX SKBs get batched on this ring before return to pending_ring. */
  93 static u16 dealloc_ring[MAX_PENDING_REQS];
  94 static PEND_RING_IDX dealloc_prod, dealloc_cons;
  95
  96 static struct sk_buff_head tx_queue;
  97
  98 static grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
  99 static gnttab_unmap_grant_ref_t tx_unmap_ops[MAX_PENDING_REQS];
 100 static gnttab_map_grant_ref_t tx_map_ops[MAX_PENDING_REQS];
 101
 102 static struct list_head net_schedule_list;
 103 static spinlock_t net_schedule_list_lock;
 104
 105 #define MAX_MFN_ALLOC 64
 106 static unsigned long mfn_list[MAX_MFN_ALLOC];
 107 static unsigned int alloc_index = 0;
 108
 109 static inline unsigned long alloc_mfn(void)
 110 {
 111         return mfn_list[--alloc_index];
 112 }
 113
 114 static int check_mfn(int nr)
 115 {
 116         struct xen_memory_reservation reservation = {
 117                 .extent_order = 0,
 118                 .domid        = DOMID_SELF
 119         };
 120
 121         if (likely(alloc_index >= nr))
 122                 return 0;
 123
 124         set_xen_guest_handle(reservation.extent_start, mfn_list + alloc_index);
 125         reservation.nr_extents = MAX_MFN_ALLOC - alloc_index;
 126         alloc_index += HYPERVISOR_memory_op(XENMEM_increase_reservation,
 127                                             &reservation);
 128
 129         return alloc_index >= nr ? 0 : -ENOMEM;
 130 }
 131
 132 static inline void maybe_schedule_tx_action(void)
 133 {
 134         smp_mb();
 135         if ((NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
 136             !list_empty(&net_schedule_list))
 137                 tasklet_schedule(&net_tx_tasklet);
 138 }
 139
 140 /*
 141  * A gross way of confirming the origin of an skb data page. The slab
 142  * allocator abuses a field in the page struct to cache the kmem_cache_t ptr.
 143  */
 144 static inline int is_xen_skb(struct sk_buff *skb)
 145 {
 146         extern kmem_cache_t *skbuff_cachep;
 147         kmem_cache_t *cp = (kmem_cache_t *)virt_to_page(skb->head)->lru.next;
 148         return (cp == skbuff_cachep);
 149 }
 150
 151 /*
 152  * We can flip without copying the packet unless:
 153  *  1. The data is not allocated from our special cache; or
 154  *  2. The main data area is shared; or
 155  *  3. One or more fragments are shared; or
 156  *  4. There are chained fragments.
 157  */
 158 static inline int is_flippable_skb(struct sk_buff *skb)
 159 {
 160         int frag;
 161
 162         if (!is_xen_skb(skb) || skb_cloned(skb))
 163                 return 0;
 164
 165         for (frag = 0; frag < skb_shinfo(skb)->nr_frags; frag++) {
 166                 if (page_count(skb_shinfo(skb)->frags[frag].page) > 1)
 167                         return 0;
 168         }
 169
 170         if (skb_shinfo(skb)->frag_list != NULL)
 171                 return 0;
 172
 173         return 1;
 174 }
 175
 176 static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
 177 {
 178         struct skb_shared_info *ninfo;
 179         struct sk_buff *nskb;
 180         unsigned long offset;
 181         int ret;
 182         int len;
 183         int headlen;
 184
 185         BUG_ON(skb_shinfo(skb)->frag_list != NULL);
 186
 187         nskb = alloc_skb(SKB_MAX_HEAD(0), GFP_ATOMIC);
 188         if (unlikely(!nskb))
 189                 goto err;
 190
 191         skb_reserve(nskb, 16);
 192         headlen = nskb->end - nskb->data;
 193         if (headlen > skb_headlen(skb))
 194                 headlen = skb_headlen(skb);
 195         ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen);
 196         BUG_ON(ret);
 197
 198         ninfo = skb_shinfo(nskb);
 199         ninfo->gso_size = skb_shinfo(skb)->gso_size;
 200         ninfo->gso_type = skb_shinfo(skb)->gso_type;
 201
 202         offset = headlen;
 203         len = skb->len - headlen;
 204
 205         nskb->len = skb->len;
 206         nskb->data_len = len;
 207         nskb->truesize += len;
 208
 209         while (len) {
 210                 struct page *page;
 211                 int copy;
 212                 int zero;
 213
 214                 if (unlikely(ninfo->nr_frags >= MAX_SKB_FRAGS)) {
 215                         dump_stack();
 216                         goto err_free;
 217                 }
 218
 219                 copy = len >= PAGE_SIZE ? PAGE_SIZE : len;
 220                 zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO;
 221
 222                 page = alloc_page(GFP_ATOMIC | __GFP_NOWARN | zero);
 223                 if (unlikely(!page))
 224                         goto err_free;
 225
 226                 ret = skb_copy_bits(skb, offset, page_address(page), copy);
 227                 BUG_ON(ret);
 228
 229                 ninfo->frags[ninfo->nr_frags].page = page;
 230                 ninfo->frags[ninfo->nr_frags].page_offset = 0;
 231                 ninfo->frags[ninfo->nr_frags].size = copy;
 232                 ninfo->nr_frags++;
 233
 234                 offset += copy;
 235                 len -= copy;
 236         }
 237
 238         offset = nskb->data - skb->data;
 239
 240         nskb->h.raw = skb->h.raw + offset;
 241         nskb->nh.raw = skb->nh.raw + offset;
 242         nskb->mac.raw = skb->mac.raw + offset;
 243
 244         return nskb;
 245
 246  err_free:
 247         kfree_skb(nskb);
 248  err:
 249         return NULL;
 250 }
 251
 252 static inline int netbk_max_required_rx_slots(netif_t *netif)
 253 {
 254         if (netif->features & (NETIF_F_SG|NETIF_F_TSO))
 255                 return MAX_SKB_FRAGS + 2; /* header + extra_info + frags */
 256         return 1; /* all in one */
 257 }
 258
 259 static inline int netbk_queue_full(netif_t *netif)
 260 {
 261         RING_IDX peek   = netif->rx_req_cons_peek;
 262         RING_IDX needed = netbk_max_required_rx_slots(netif);
 263
 264         return ((netif->rx.sring->req_prod - peek) < needed) ||
 265                ((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) < needed);
 266 }
 267
 268 int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
 269 {
 270         netif_t *netif = netdev_priv(dev);
 271
 272         BUG_ON(skb->dev != dev);
 273
 274         /* Drop the packet if the target domain has no receive buffers. */
 275         if (unlikely(!netif_running(dev) || !netif_carrier_ok(dev)))
 276                 goto drop;
 277
 278         if (unlikely(netbk_queue_full(netif))) {
 279                 /* Not a BUG_ON() -- misbehaving netfront can trigger this. */
 280                 if (netbk_can_queue(dev))
 281                         DPRINTK("Queue full but not stopped!\n");
 282                 goto drop;
 283         }
 284
 285         /* Copy the packet here if it's destined for a flipping
 286            interface but isn't flippable (e.g. extra references to
 287            data)
 288         */
 289         if (!netif->copying_receiver && !is_flippable_skb(skb)) {
 290                 struct sk_buff *nskb = netbk_copy_skb(skb);
 291                 if ( unlikely(nskb == NULL) )
 292                         goto drop;
 293                 /* Copy only the header fields we use in this driver. */
 294                 nskb->dev = skb->dev;
 295                 nskb->ip_summed = skb->ip_summed;
 296                 nskb->proto_data_valid = skb->proto_data_valid;
 297                 dev_kfree_skb(skb);
 298                 skb = nskb;
 299         }
 300
 301         netif->rx_req_cons_peek += skb_shinfo(skb)->nr_frags + 1 +
 302                                    !!skb_shinfo(skb)->gso_size;
 303         netif_get(netif);
 304
 305         if (netbk_can_queue(dev) && netbk_queue_full(netif)) {
 306                 netif->rx.sring->req_event = netif->rx_req_cons_peek +
 307                         netbk_max_required_rx_slots(netif);
 308                 mb(); /* request notification /then/ check & stop the queue */
 309                 if (netbk_queue_full(netif))
 310                         netif_stop_queue(dev);
 311         }
 312
 313         skb_queue_tail(&rx_queue, skb);
 314         tasklet_schedule(&net_rx_tasklet);
 315
 316         return 0;
 317
 318  drop:
 319         netif->stats.tx_dropped++;
 320         dev_kfree_skb(skb);
 321         return 0;
 322 }
 323
 324 #if 0
 325 static void xen_network_done_notify(void)
 326 {
 327         static struct net_device *eth0_dev = NULL;
 328         if (unlikely(eth0_dev == NULL))
 329                 eth0_dev = __dev_get_by_name("eth0");
 330         netif_rx_schedule(eth0_dev);
 331 }
 332 /*
 333  * Add following to poll() function in NAPI driver (Tigon3 is example):
 334  *  if ( xen_network_done() )
 335  *      tg3_enable_ints(tp);
 336  */
 337 int xen_network_done(void)
 338 {
 339         return skb_queue_empty(&rx_queue);
 340 }
 341 #endif
 342
 343 struct netrx_pending_operations {
 344         unsigned trans_prod, trans_cons;
 345         unsigned mmu_prod, mmu_cons;
 346         unsigned mcl_prod, mcl_cons;
 347         unsigned copy_prod, copy_cons;
 348         unsigned meta_prod, meta_cons;
 349         mmu_update_t *mmu;
 350         gnttab_transfer_t *trans;
 351         gnttab_copy_t *copy;
 352         multicall_entry_t *mcl;
 353         struct netbk_rx_meta *meta;
 354 };
 355
 356 /* Set up the grant operations for this fragment.  If it's a flipping
 357    interface, we also set up the unmap request from here. */
 358 static u16 netbk_gop_frag(netif_t *netif, struct netbk_rx_meta *meta,
 359                           int i, struct netrx_pending_operations *npo,
 360                           struct page *page, unsigned long size,
 361                           unsigned long offset)
 362 {
 363         mmu_update_t *mmu;
 364         gnttab_transfer_t *gop;
 365         gnttab_copy_t *copy_gop;
 366         multicall_entry_t *mcl;
 367         netif_rx_request_t *req;
 368         unsigned long old_mfn, new_mfn;
 369
 370         old_mfn = virt_to_mfn(page_address(page));
 371
 372         req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons + i);
 373         if (netif->copying_receiver) {
 374                 /* The fragment needs to be copied rather than
 375                    flipped. */
 376                 meta->copy = 1;
 377                 copy_gop = npo->copy + npo->copy_prod++;
 378                 copy_gop->flags = GNTCOPY_dest_gref;
 379                 if (PageForeign(page)) {
 380                         struct pending_tx_info *src_pend =
 381                                 &pending_tx_info[page->index];
 382                         copy_gop->source.domid = src_pend->netif->domid;
 383                         copy_gop->source.u.ref = src_pend->req.gref;
 384                         copy_gop->flags |= GNTCOPY_source_gref;
 385                 } else {
 386                         copy_gop->source.domid = DOMID_SELF;
 387                         copy_gop->source.u.gmfn = old_mfn;
 388                 }
 389                 copy_gop->source.offset = offset;
 390                 copy_gop->dest.domid = netif->domid;
 391                 copy_gop->dest.offset = 0;
 392                 copy_gop->dest.u.ref = req->gref;
 393                 copy_gop->len = size;
 394         } else {
 395                 meta->copy = 0;
 396                 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
 397                         new_mfn = alloc_mfn();
 398
 399                         /*
 400                          * Set the new P2M table entry before
 401                          * reassigning the old data page. Heed the
 402                          * comment in pgtable-2level.h:pte_page(). :-)
 403                          */
 404                         set_phys_to_machine(page_to_pfn(page), new_mfn);
 405
 406                         mcl = npo->mcl + npo->mcl_prod++;
 407                         MULTI_update_va_mapping(mcl,
 408                                              (unsigned long)page_address(page),
 409                                              pfn_pte_ma(new_mfn, PAGE_KERNEL),
 410                                              0);
 411
 412                         mmu = npo->mmu + npo->mmu_prod++;
 413                         mmu->ptr = ((maddr_t)new_mfn << PAGE_SHIFT) |
 414                                 MMU_MACHPHYS_UPDATE;
 415                         mmu->val = page_to_pfn(page);
 416                 }
 417
 418                 gop = npo->trans + npo->trans_prod++;
 419                 gop->mfn = old_mfn;
 420                 gop->domid = netif->domid;
 421                 gop->ref = req->gref;
 422         }
 423         return req->id;
 424 }
 425
 426 static void netbk_gop_skb(struct sk_buff *skb,
 427                           struct netrx_pending_operations *npo)
 428 {
 429         netif_t *netif = netdev_priv(skb->dev);
 430         int nr_frags = skb_shinfo(skb)->nr_frags;
 431         int i;
 432         int extra;
 433         struct netbk_rx_meta *head_meta, *meta;
 434
 435         head_meta = npo->meta + npo->meta_prod++;
 436         head_meta->frag.page_offset = skb_shinfo(skb)->gso_type;
 437         head_meta->frag.size = skb_shinfo(skb)->gso_size;
 438         extra = !!head_meta->frag.size + 1;
 439
 440         for (i = 0; i < nr_frags; i++) {
 441                 meta = npo->meta + npo->meta_prod++;
 442                 meta->frag = skb_shinfo(skb)->frags[i];
 443                 meta->id = netbk_gop_frag(netif, meta, i + extra, npo,
 444                                           meta->frag.page,
 445                                           meta->frag.size,
 446                                           meta->frag.page_offset);
 447         }
 448
 449         /*
 450          * This must occur at the end to ensure that we don't trash
 451          * skb_shinfo until we're done.
 452          */
 453         head_meta->id = netbk_gop_frag(netif, head_meta, 0, npo,
 454                                        virt_to_page(skb->data),
 455                                        skb_headlen(skb),
 456                                        offset_in_page(skb->data));
 457
 458         netif->rx.req_cons += nr_frags + extra;
 459 }
 460
 461 static inline void netbk_free_pages(int nr_frags, struct netbk_rx_meta *meta)
 462 {
 463         int i;
 464
 465         for (i = 0; i < nr_frags; i++)
 466                 put_page(meta[i].frag.page);
 467 }
 468
 469 /* This is a twin to netbk_gop_skb.  Assume that netbk_gop_skb was
 470    used to set up the operations on the top of
 471    netrx_pending_operations, which have since been done.  Check that
 472    they didn't give any errors and advance over them. */
 473 static int netbk_check_gop(int nr_frags, domid_t domid,
 474                            struct netrx_pending_operations *npo)
 475 {
 476         multicall_entry_t *mcl;
 477         gnttab_transfer_t *gop;
 478         gnttab_copy_t     *copy_op;
 479         int status = NETIF_RSP_OKAY;
 480         int i;
 481
 482         for (i = 0; i <= nr_frags; i++) {
 483                 if (npo->meta[npo->meta_cons + i].copy) {
 484                         copy_op = npo->copy + npo->copy_cons++;
 485                         if (copy_op->status != GNTST_okay) {
 486                                 DPRINTK("Bad status %d from copy to DOM%d.\n",
 487                                         gop->status, domid);
 488                                 status = NETIF_RSP_ERROR;
 489                         }
 490                 } else {
 491                         if (!xen_feature(XENFEAT_auto_translated_physmap)) {
 492                                 mcl = npo->mcl + npo->mcl_cons++;
 493                                 /* The update_va_mapping() must not fail. */
 494                                 BUG_ON(mcl->result != 0);
 495                         }
 496
 497                         gop = npo->trans + npo->trans_cons++;
 498                         /* Check the reassignment error code. */
 499                         if (gop->status != 0) {
 500                                 DPRINTK("Bad status %d from grant transfer to DOM%u\n",
 501                                         gop->status, domid);
 502                                 /*
 503                                  * Page no longer belongs to us unless
 504                                  * GNTST_bad_page, but that should be
 505                                  * a fatal error anyway.
 506                                  */
 507                                 BUG_ON(gop->status == GNTST_bad_page);
 508                                 status = NETIF_RSP_ERROR;
 509                         }
 510                 }
 511         }
 512
 513         return status;
 514 }
 515
 516 static void netbk_add_frag_responses(netif_t *netif, int status,
 517                                      struct netbk_rx_meta *meta, int nr_frags)
 518 {
 519         int i;
 520         unsigned long offset;
 521
 522         for (i = 0; i < nr_frags; i++) {
 523                 int id = meta[i].id;
 524                 int flags = (i == nr_frags - 1) ? 0 : NETRXF_more_data;
 525
 526                 if (meta[i].copy)
 527                         offset = 0;
 528                 else
 529                         offset = meta[i].frag.page_offset;
 530                 make_rx_response(netif, id, status, offset,
 531                                  meta[i].frag.size, flags);
 532         }
 533 }
 534
 535 static void net_rx_action(unsigned long unused)
 536 {
 537         netif_t *netif = NULL;
 538         s8 status;
 539         u16 id, irq, flags;
 540         netif_rx_response_t *resp;
 541         multicall_entry_t *mcl;
 542         struct sk_buff_head rxq;
 543         struct sk_buff *skb;
 544         int notify_nr = 0;
 545         int ret;
 546         int nr_frags;
 547         int count;
 548         unsigned long offset;
 549
 550         /*
 551          * Putting hundreds of bytes on the stack is considered rude.
 552          * Static works because a tasklet can only be on one CPU at any time.
 553          */
 554         static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+3];
 555         static mmu_update_t rx_mmu[NET_RX_RING_SIZE];
 556         static gnttab_transfer_t grant_trans_op[NET_RX_RING_SIZE];
 557         static gnttab_copy_t grant_copy_op[NET_RX_RING_SIZE];
 558         static unsigned char rx_notify[NR_IRQS];
 559         static u16 notify_list[NET_RX_RING_SIZE];
 560         static struct netbk_rx_meta meta[NET_RX_RING_SIZE];
 561
 562         struct netrx_pending_operations npo = {
 563                 mmu: rx_mmu,
 564                 trans: grant_trans_op,
 565                 copy: grant_copy_op,
 566                 mcl: rx_mcl,
 567                 meta: meta};
 568
 569         skb_queue_head_init(&rxq);
 570
 571         count = 0;
 572
 573         while ((skb = skb_dequeue(&rx_queue)) != NULL) {
 574                 nr_frags = skb_shinfo(skb)->nr_frags;
 575                 *(int *)skb->cb = nr_frags;
 576
 577                 if (!xen_feature(XENFEAT_auto_translated_physmap) &&
 578                     check_mfn(nr_frags + 1)) {
 579                         /* Memory squeeze? Back off for an arbitrary while. */
 580                         if ( net_ratelimit() )
 581                                 WPRINTK("Memory squeeze in netback "
 582                                         "driver.\n");
 583                         mod_timer(&net_timer, jiffies + HZ);
 584                         skb_queue_head(&rx_queue, skb);
 585                         break;
 586                 }
 587
 588                 netbk_gop_skb(skb, &npo);
 589
 590                 count += nr_frags + 1;
 591
 592                 __skb_queue_tail(&rxq, skb);
 593
 594                 /* Filled the batch queue? */
 595                 if (count + MAX_SKB_FRAGS >= NET_RX_RING_SIZE)
 596                         break;
 597         }
 598
 599         if (npo.mcl_prod &&
 600             !xen_feature(XENFEAT_auto_translated_physmap)) {
 601                 mcl = npo.mcl + npo.mcl_prod++;
 602
 603                 BUG_ON(mcl[-1].op != __HYPERVISOR_update_va_mapping);
 604                 mcl[-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
 605
 606                 mcl->op = __HYPERVISOR_mmu_update;
 607                 mcl->args[0] = (unsigned long)rx_mmu;
 608                 mcl->args[1] = npo.mmu_prod;
 609                 mcl->args[2] = 0;
 610                 mcl->args[3] = DOMID_SELF;
 611         }
 612
 613         if (npo.trans_prod) {
 614                 mcl = npo.mcl + npo.mcl_prod++;
 615                 mcl->op = __HYPERVISOR_grant_table_op;
 616                 mcl->args[0] = GNTTABOP_transfer;
 617                 mcl->args[1] = (unsigned long)grant_trans_op;
 618                 mcl->args[2] = npo.trans_prod;
 619         }
 620
 621         if (npo.copy_prod) {
 622                 mcl = npo.mcl + npo.mcl_prod++;
 623                 mcl->op = __HYPERVISOR_grant_table_op;
 624                 mcl->args[0] = GNTTABOP_copy;
 625                 mcl->args[1] = (unsigned long)grant_copy_op;
 626                 mcl->args[2] = npo.copy_prod;
 627         }
 628
 629         /* Nothing to do? */
 630         if (!npo.mcl_prod)
 631                 return;
 632
 633         BUG_ON(npo.copy_prod > NET_RX_RING_SIZE);
 634         BUG_ON(npo.mmu_prod > NET_RX_RING_SIZE);
 635         BUG_ON(npo.trans_prod > NET_RX_RING_SIZE);
 636         BUG_ON(npo.mcl_prod > NET_RX_RING_SIZE+3);
 637         BUG_ON(npo.meta_prod > NET_RX_RING_SIZE);
 638
 639         ret = HYPERVISOR_multicall(npo.mcl, npo.mcl_prod);
 640         BUG_ON(ret != 0);
 641
 642         while ((skb = __skb_dequeue(&rxq)) != NULL) {
 643                 nr_frags = *(int *)skb->cb;
 644
 645                 netif = netdev_priv(skb->dev);
 646                 /* We can't rely on skb_release_data to release the
 647                    pages used by fragments for us, since it tries to
 648                    touch the pages in the fraglist.  If we're in
 649                    flipping mode, that doesn't work.  In copying mode,
 650                    we still have access to all of the pages, and so
 651                    it's safe to let release_data deal with it. */
 652                 /* (Freeing the fragments is safe since we copy
 653                    non-linear skbs destined for flipping interfaces) */
 654                 if (!netif->copying_receiver) {
 655                         atomic_set(&(skb_shinfo(skb)->dataref), 1);
 656                         skb_shinfo(skb)->frag_list = NULL;
 657                         skb_shinfo(skb)->nr_frags = 0;
 658                         netbk_free_pages(nr_frags, meta + npo.meta_cons + 1);
 659                 }
 660
 661                 netif->stats.tx_bytes += skb->len;
 662                 netif->stats.tx_packets++;
 663
 664                 status = netbk_check_gop(nr_frags, netif->domid, &npo);
 665
 666                 id = meta[npo.meta_cons].id;
 667                 flags = nr_frags ? NETRXF_more_data : 0;
 668
 669                 if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
 670                         flags |= NETRXF_csum_blank | NETRXF_data_validated;
 671                 else if (skb->proto_data_valid) /* remote but checksummed? */
 672                         flags |= NETRXF_data_validated;
 673
 674                 if (meta[npo.meta_cons].copy)
 675                         offset = 0;
 676                 else
 677                         offset = offset_in_page(skb->data);
 678                 resp = make_rx_response(netif, id, status, offset,
 679                                         skb_headlen(skb), flags);
 680
 681                 if (meta[npo.meta_cons].frag.size) {
 682                         struct netif_extra_info *gso =
 683                                 (struct netif_extra_info *)
 684                                 RING_GET_RESPONSE(&netif->rx,
 685                                                   netif->rx.rsp_prod_pvt++);
 686
 687                         resp->flags |= NETRXF_extra_info;
 688
 689                         gso->u.gso.size = meta[npo.meta_cons].frag.size;
 690                         gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
 691                         gso->u.gso.pad = 0;
 692                         gso->u.gso.features = 0;
 693
 694                         gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
 695                         gso->flags = 0;
 696                 }
 697
 698                 netbk_add_frag_responses(netif, status,
 699                                          meta + npo.meta_cons + 1,
 700                                          nr_frags);
 701
 702                 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret);
 703                 irq = netif->irq;
 704                 if (ret && !rx_notify[irq]) {
 705                         rx_notify[irq] = 1;
 706                         notify_list[notify_nr++] = irq;
 707                 }
 708
 709                 if (netif_queue_stopped(netif->dev) &&
 710                     !netbk_queue_full(netif))
 711                         netif_wake_queue(netif->dev);
 712
 713                 netif_put(netif);
 714                 dev_kfree_skb(skb);
 715                 npo.meta_cons += nr_frags + 1;
 716         }
 717
 718         while (notify_nr != 0) {
 719                 irq = notify_list[--notify_nr];
 720                 rx_notify[irq] = 0;
 721                 notify_remote_via_irq(irq);
 722         }
 723
 724         /* More work to do? */
 725         if (!skb_queue_empty(&rx_queue) && !timer_pending(&net_timer))
 726                 tasklet_schedule(&net_rx_tasklet);
 727 #if 0
 728         else
 729                 xen_network_done_notify();
 730 #endif
 731 }
 732
 733 static void net_alarm(unsigned long unused)
 734 {
 735         tasklet_schedule(&net_rx_tasklet);
 736 }
 737
 738 struct net_device_stats *netif_be_get_stats(struct net_device *dev)
 739 {
 740         netif_t *netif = netdev_priv(dev);
 741         return &netif->stats;
 742 }
 743
 744 static int __on_net_schedule_list(netif_t *netif)
 745 {
 746         return netif->list.next != NULL;
 747 }
 748
 749 static void remove_from_net_schedule_list(netif_t *netif)
 750 {
 751         spin_lock_irq(&net_schedule_list_lock);
 752         if (likely(__on_net_schedule_list(netif))) {
 753                 list_del(&netif->list);
 754                 netif->list.next = NULL;
 755                 netif_put(netif);
 756         }
 757         spin_unlock_irq(&net_schedule_list_lock);
 758 }
 759
 760 static void add_to_net_schedule_list_tail(netif_t *netif)
 761 {
 762         if (__on_net_schedule_list(netif))
 763                 return;
 764
 765         spin_lock_irq(&net_schedule_list_lock);
 766         if (!__on_net_schedule_list(netif) &&
 767             likely(netif_running(netif->dev) &&
 768                    netif_carrier_ok(netif->dev))) {
 769                 list_add_tail(&netif->list, &net_schedule_list);
 770                 netif_get(netif);
 771         }
 772         spin_unlock_irq(&net_schedule_list_lock);
 773 }
 774
 775 /*
 776  * Note on CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER:
 777  * If this driver is pipelining transmit requests then we can be very
 778  * aggressive in avoiding new-packet notifications -- frontend only needs to
 779  * send a notification if there are no outstanding unreceived responses.
 780  * If we may be buffer transmit buffers for any reason then we must be rather
 781  * more conservative and treat this as the final check for pending work.
 782  */
 783 void netif_schedule_work(netif_t *netif)
 784 {
 785         int more_to_do;
 786
 787 #ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
 788         more_to_do = RING_HAS_UNCONSUMED_REQUESTS(&netif->tx);
 789 #else
 790         RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
 791 #endif
 792
 793         if (more_to_do) {
 794                 add_to_net_schedule_list_tail(netif);
 795                 maybe_schedule_tx_action();
 796         }
 797 }
 798
 799 void netif_deschedule_work(netif_t *netif)
 800 {
 801         remove_from_net_schedule_list(netif);
 802 }
 803
 804
 805 static void tx_add_credit(netif_t *netif)
 806 {
 807         unsigned long max_burst;
 808
 809         /*
 810          * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
 811          * Otherwise the interface can seize up due to insufficient credit.
 812          */
 813         max_burst = RING_GET_REQUEST(&netif->tx, netif->tx.req_cons)->size;
 814         max_burst = min(max_burst, 131072UL);
 815         max_burst = max(max_burst, netif->credit_bytes);
 816
 817         netif->remaining_credit = min(netif->remaining_credit +
 818                                       netif->credit_bytes,
 819                                       max_burst);
 820 }
 821
 822 static void tx_credit_callback(unsigned long data)
 823 {
 824         netif_t *netif = (netif_t *)data;
 825         tx_add_credit(netif);
 826         netif_schedule_work(netif);
 827 }
 828
 829 inline static void net_tx_action_dealloc(void)
 830 {
 831         gnttab_unmap_grant_ref_t *gop;
 832         u16 pending_idx;
 833         PEND_RING_IDX dc, dp;
 834         netif_t *netif;
 835         int ret;
 836
 837         dc = dealloc_cons;
 838         dp = dealloc_prod;
 839
 840         /* Ensure we see all indexes enqueued by netif_idx_release(). */
 841         smp_rmb();
 842
 843         /*
 844          * Free up any grants we have finished using
 845          */
 846         gop = tx_unmap_ops;
 847         while (dc != dp) {
 848                 pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
 849                 gnttab_set_unmap_op(gop, idx_to_kaddr(pending_idx),
 850                                     GNTMAP_host_map,
 851                                     grant_tx_handle[pending_idx]);
 852                 gop++;
 853         }
 854         ret = HYPERVISOR_grant_table_op(
 855                 GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops);
 856         BUG_ON(ret);
 857
 858         while (dealloc_cons != dp) {
 859                 pending_idx = dealloc_ring[MASK_PEND_IDX(dealloc_cons++)];
 860
 861                 netif = pending_tx_info[pending_idx].netif;
 862
 863                 make_tx_response(netif, &pending_tx_info[pending_idx].req,
 864                                  NETIF_RSP_OKAY);
 865
 866                 pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
 867
 868                 netif_put(netif);
 869         }
 870 }
 871
 872 static void netbk_tx_err(netif_t *netif, netif_tx_request_t *txp, RING_IDX end)
 873 {
 874         RING_IDX cons = netif->tx.req_cons;
 875
 876         do {
 877                 make_tx_response(netif, txp, NETIF_RSP_ERROR);
 878                 if (cons >= end)
 879                         break;
 880                 txp = RING_GET_REQUEST(&netif->tx, cons++);
 881         } while (1);
 882         netif->tx.req_cons = cons;
 883         netif_schedule_work(netif);
 884         netif_put(netif);
 885 }
 886
 887 static int netbk_count_requests(netif_t *netif, netif_tx_request_t *first,
 888                                 netif_tx_request_t *txp, int work_to_do)
 889 {
 890         RING_IDX cons = netif->tx.req_cons;
 891         int frags = 0;
 892
 893         if (!(first->flags & NETTXF_more_data))
 894                 return 0;
 895
 896         do {
 897                 if (frags >= work_to_do) {
 898                         DPRINTK("Need more frags\n");
 899                         return -frags;
 900                 }
 901
 902                 if (unlikely(frags >= MAX_SKB_FRAGS)) {
 903                         DPRINTK("Too many frags\n");
 904                         return -frags;
 905                 }
 906
 907                 memcpy(txp, RING_GET_REQUEST(&netif->tx, cons + frags),
 908                        sizeof(*txp));
 909                 if (txp->size > first->size) {
 910                         DPRINTK("Frags galore\n");
 911                         return -frags;
 912                 }
 913
 914                 first->size -= txp->size;
 915                 frags++;
 916
 917                 if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {
 918                         DPRINTK("txp->offset: %x, size: %u\n",
 919                                 txp->offset, txp->size);
 920                         return -frags;
 921                 }
 922         } while ((txp++)->flags & NETTXF_more_data);
 923
 924         return frags;
 925 }
 926
 927 static gnttab_map_grant_ref_t *netbk_get_requests(netif_t *netif,
 928                                                   struct sk_buff *skb,
 929                                                   netif_tx_request_t *txp,
 930                                                   gnttab_map_grant_ref_t *mop)
 931 {
 932         struct skb_shared_info *shinfo = skb_shinfo(skb);
 933         skb_frag_t *frags = shinfo->frags;
 934         unsigned long pending_idx = *((u16 *)skb->data);
 935         int i, start;
 936
 937         /* Skip first skb fragment if it is on same page as header fragment. */
 938         start = ((unsigned long)shinfo->frags[0].page == pending_idx);
 939
 940         for (i = start; i < shinfo->nr_frags; i++, txp++) {
 941                 pending_idx = pending_ring[MASK_PEND_IDX(pending_cons++)];
 942
 943                 gnttab_set_map_op(mop++, idx_to_kaddr(pending_idx),
 944                                   GNTMAP_host_map | GNTMAP_readonly,
 945                                   txp->gref, netif->domid);
 946
 947                 memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
 948                 netif_get(netif);
 949                 pending_tx_info[pending_idx].netif = netif;
 950                 frags[i].page = (void *)pending_idx;
 951         }
 952
 953         return mop;
 954 }
 955
 956 static int netbk_tx_check_mop(struct sk_buff *skb,
 957                                gnttab_map_grant_ref_t **mopp)
 958 {
 959         gnttab_map_grant_ref_t *mop = *mopp;
 960         int pending_idx = *((u16 *)skb->data);
 961         netif_t *netif = pending_tx_info[pending_idx].netif;
 962         netif_tx_request_t *txp;
 963         struct skb_shared_info *shinfo = skb_shinfo(skb);
 964         int nr_frags = shinfo->nr_frags;
 965         int i, err, start;
 966
 967         /* Check status of header. */
 968         err = mop->status;
 969         if (unlikely(err)) {
 970                 txp = &pending_tx_info[pending_idx].req;
 971                 make_tx_response(netif, txp, NETIF_RSP_ERROR);
 972                 pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
 973                 netif_put(netif);
 974         } else {
 975                 set_phys_to_machine(
 976                         __pa(idx_to_kaddr(pending_idx)) >> PAGE_SHIFT,
 977                         FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
 978                 grant_tx_handle[pending_idx] = mop->handle;
 979         }
 980
 981         /* Skip first skb fragment if it is on same page as header fragment. */
 982         start = ((unsigned long)shinfo->frags[0].page == pending_idx);
 983
 984         for (i = start; i < nr_frags; i++) {
 985                 int j, newerr;
 986
 987                 pending_idx = (unsigned long)shinfo->frags[i].page;
 988
 989                 /* Check error status: if okay then remember grant handle. */
 990                 newerr = (++mop)->status;
 991                 if (likely(!newerr)) {
 992                         set_phys_to_machine(
 993                                 __pa(idx_to_kaddr(pending_idx))>>PAGE_SHIFT,
 994                                 FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT));
 995                         grant_tx_handle[pending_idx] = mop->handle;
 996                         /* Had a previous error? Invalidate this fragment. */
 997                         if (unlikely(err))
 998                                 netif_idx_release(pending_idx);
 999                         continue;
1000                 }
1001
1002                 /* Error on this fragment: respond to client with an error. */
1003                 txp = &pending_tx_info[pending_idx].req;
1004                 make_tx_response(netif, txp, NETIF_RSP_ERROR);
1005                 pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
1006                 netif_put(netif);
1007
1008                 /* Not the first error? Preceding frags already invalidated. */
1009                 if (err)
1010                         continue;
1011
1012                 /* First error: invalidate header and preceding fragments. */
1013                 pending_idx = *((u16 *)skb->data);
1014                 netif_idx_release(pending_idx);
1015                 for (j = start; j < i; j++) {
1016                         pending_idx = (unsigned long)shinfo->frags[i].page;
1017                         netif_idx_release(pending_idx);
1018                 }
1019
1020                 /* Remember the error: invalidate all subsequent fragments. */
1021                 err = newerr;
1022         }
1023
1024         *mopp = mop + 1;
1025         return err;
1026 }
1027
1028 static void netbk_fill_frags(struct sk_buff *skb)
1029 {
1030         struct skb_shared_info *shinfo = skb_shinfo(skb);
1031         int nr_frags = shinfo->nr_frags;
1032         int i;
1033
1034         for (i = 0; i < nr_frags; i++) {
1035                 skb_frag_t *frag = shinfo->frags + i;
1036                 netif_tx_request_t *txp;
1037                 unsigned long pending_idx;
1038
1039                 pending_idx = (unsigned long)frag->page;
1040                 txp = &pending_tx_info[pending_idx].req;
1041                 frag->page = virt_to_page(idx_to_kaddr(pending_idx));
1042                 frag->size = txp->size;
1043                 frag->page_offset = txp->offset;
1044
1045                 skb->len += txp->size;
1046                 skb->data_len += txp->size;
1047                 skb->truesize += txp->size;
1048         }
1049 }
1050
1051 int netbk_get_extras(netif_t *netif, struct netif_extra_info *extras,
1052                      int work_to_do)
1053 {
1054         struct netif_extra_info extra;
1055         RING_IDX cons = netif->tx.req_cons;
1056
1057         do {
1058                 if (unlikely(work_to_do-- <= 0)) {
1059                         DPRINTK("Missing extra info\n");
1060                         return -EBADR;
1061                 }
1062
1063                 memcpy(&extra, RING_GET_REQUEST(&netif->tx, cons),
1064                        sizeof(extra));
1065                 if (unlikely(!extra.type ||
1066                              extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
1067                         netif->tx.req_cons = ++cons;
1068                         DPRINTK("Invalid extra type: %d\n", extra.type);
1069                         return -EINVAL;
1070                 }
1071
1072                 memcpy(&extras[extra.type - 1], &extra, sizeof(extra));
1073                 netif->tx.req_cons = ++cons;
1074         } while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
1075
1076         return work_to_do;
1077 }
1078
1079 static int netbk_set_skb_gso(struct sk_buff *skb, struct netif_extra_info *gso)
1080 {
1081         if (!gso->u.gso.size) {
1082                 DPRINTK("GSO size must not be zero.\n");
1083                 return -EINVAL;
1084         }
1085
1086         /* Currently only TCPv4 S.O. is supported. */
1087         if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
1088                 DPRINTK("Bad GSO type %d.\n", gso->u.gso.type);
1089                 return -EINVAL;
1090         }
1091
1092         skb_shinfo(skb)->gso_size = gso->u.gso.size;
1093         skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
1094
1095         /* Header must be checked, and gso_segs computed. */
1096         skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
1097         skb_shinfo(skb)->gso_segs = 0;
1098
1099         return 0;
1100 }
1101
1102 /* Called after netfront has transmitted */
1103 static void net_tx_action(unsigned long unused)
1104 {
1105         struct list_head *ent;
1106         struct sk_buff *skb;
1107         netif_t *netif;
1108         netif_tx_request_t txreq;
1109         netif_tx_request_t txfrags[MAX_SKB_FRAGS];
1110         struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
1111         u16 pending_idx;
1112         RING_IDX i;
1113         gnttab_map_grant_ref_t *mop;
1114         unsigned int data_len;
1115         int ret, work_to_do;
1116
1117         if (dealloc_cons != dealloc_prod)
1118                 net_tx_action_dealloc();
1119
1120         mop = tx_map_ops;
1121         while (((NR_PENDING_REQS + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
1122                 !list_empty(&net_schedule_list)) {
1123                 /* Get a netif from the list with work to do. */
1124                 ent = net_schedule_list.next;
1125                 netif = list_entry(ent, netif_t, list);
1126                 netif_get(netif);
1127                 remove_from_net_schedule_list(netif);
1128
1129                 RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do);
1130                 if (!work_to_do) {
1131                         netif_put(netif);
1132                         continue;
1133                 }
1134
1135                 i = netif->tx.req_cons;
1136                 rmb(); /* Ensure that we see the request before we copy it. */
1137                 memcpy(&txreq, RING_GET_REQUEST(&netif->tx, i), sizeof(txreq));
1138
1139                 /* Credit-based scheduling. */
1140                 if (txreq.size > netif->remaining_credit) {
1141                         unsigned long now = jiffies;
1142                         unsigned long next_credit =
1143                                 netif->credit_timeout.expires +
1144                                 msecs_to_jiffies(netif->credit_usec / 1000);
1145
1146                         /* Timer could already be pending in rare cases. */
1147                         if (timer_pending(&netif->credit_timeout)) {
1148                                 netif_put(netif);
1149                                 continue;
1150                         }
1151
1152                         /* Passed the point where we can replenish credit? */
1153                         if (time_after_eq(now, next_credit)) {
1154                                 netif->credit_timeout.expires = now;
1155                                 tx_add_credit(netif);
1156                         }
1157
1158                         /* Still too big to send right now? Set a callback. */
1159                         if (txreq.size > netif->remaining_credit) {
1160                                 netif->credit_timeout.data     =
1161                                         (unsigned long)netif;
1162                                 netif->credit_timeout.function =
1163                                         tx_credit_callback;
1164                                 __mod_timer(&netif->credit_timeout,
1165                                             next_credit);
1166                                 netif_put(netif);
1167                                 continue;
1168                         }
1169                 }
1170                 netif->remaining_credit -= txreq.size;
1171
1172                 work_to_do--;
1173                 netif->tx.req_cons = ++i;
1174
1175                 memset(extras, 0, sizeof(extras));
1176                 if (txreq.flags & NETTXF_extra_info) {
1177                         work_to_do = netbk_get_extras(netif, extras,
1178                                                       work_to_do);
1179                         i = netif->tx.req_cons;
1180                         if (unlikely(work_to_do < 0)) {
1181                                 netbk_tx_err(netif, &txreq, i);
1182                                 continue;
1183                         }
1184                 }
1185
1186                 ret = netbk_count_requests(netif, &txreq, txfrags, work_to_do);
1187                 if (unlikely(ret < 0)) {
1188                         netbk_tx_err(netif, &txreq, i - ret);
1189                         continue;
1190                 }
1191                 i += ret;
1192
1193                 if (unlikely(txreq.size < ETH_HLEN)) {
1194                         DPRINTK("Bad packet size: %d\n", txreq.size);
1195                         netbk_tx_err(netif, &txreq, i);
1196                         continue;
1197                 }
1198
1199                 /* No crossing a page as the payload mustn't fragment. */
1200                 if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {
1201                         DPRINTK("txreq.offset: %x, size: %u, end: %lu\n",
1202                                 txreq.offset, txreq.size,
1203                                 (txreq.offset &~PAGE_MASK) + txreq.size);
1204                         netbk_tx_err(netif, &txreq, i);
1205                         continue;
1206                 }
1207
1208                 pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
1209
1210                 data_len = (txreq.size > PKT_PROT_LEN &&
1211                             ret < MAX_SKB_FRAGS) ?
1212                         PKT_PROT_LEN : txreq.size;
1213
1214                 skb = alloc_skb(data_len+16, GFP_ATOMIC);
1215                 if (unlikely(skb == NULL)) {
1216                         DPRINTK("Can't allocate a skb in start_xmit.\n");
1217                         netbk_tx_err(netif, &txreq, i);
1218                         break;
1219                 }
1220
1221                 /* Packets passed to netif_rx() must have some headroom. */
1222                 skb_reserve(skb, 16);
1223
1224                 if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
1225                         struct netif_extra_info *gso;
1226                         gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
1227
1228                         if (netbk_set_skb_gso(skb, gso)) {
1229                                 kfree_skb(skb);
1230                                 netbk_tx_err(netif, &txreq, i);
1231                                 continue;
1232                         }
1233                 }
1234
1235                 gnttab_set_map_op(mop, idx_to_kaddr(pending_idx),
1236                                   GNTMAP_host_map | GNTMAP_readonly,
1237                                   txreq.gref, netif->domid);
1238                 mop++;
1239
1240                 memcpy(&pending_tx_info[pending_idx].req,
1241                        &txreq, sizeof(txreq));
1242                 pending_tx_info[pending_idx].netif = netif;
1243                 *((u16 *)skb->data) = pending_idx;
1244
1245                 __skb_put(skb, data_len);
1246
1247                 skb_shinfo(skb)->nr_frags = ret;
1248                 if (data_len < txreq.size) {
1249                         skb_shinfo(skb)->nr_frags++;
1250                         skb_shinfo(skb)->frags[0].page =
1251                                 (void *)(unsigned long)pending_idx;
1252                 } else {
1253                         /* Discriminate from any valid pending_idx value. */
1254                         skb_shinfo(skb)->frags[0].page = (void *)~0UL;
1255                 }
1256
1257                 __skb_queue_tail(&tx_queue, skb);
1258
1259                 pending_cons++;
1260
1261                 mop = netbk_get_requests(netif, skb, txfrags, mop);
1262
1263                 netif->tx.req_cons = i;
1264                 netif_schedule_work(netif);
1265
1266                 if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops))
1267                         break;
1268         }
1269
1270         if (mop == tx_map_ops)
1271                 return;
1272
1273         ret = HYPERVISOR_grant_table_op(
1274                 GNTTABOP_map_grant_ref, tx_map_ops, mop - tx_map_ops);
1275         BUG_ON(ret);
1276
1277         mop = tx_map_ops;
1278         while ((skb = __skb_dequeue(&tx_queue)) != NULL) {
1279                 netif_tx_request_t *txp;
1280
1281                 pending_idx = *((u16 *)skb->data);
1282                 netif       = pending_tx_info[pending_idx].netif;
1283                 txp         = &pending_tx_info[pending_idx].req;
1284
1285                 /* Check the remap error code. */
1286                 if (unlikely(netbk_tx_check_mop(skb, &mop))) {
1287                         printk(KERN_ALERT "#### netback grant fails\n");
1288                         skb_shinfo(skb)->nr_frags = 0;
1289                         kfree_skb(skb);
1290                         continue;
1291                 }
1292
1293                 data_len = skb->len;
1294                 memcpy(skb->data,
1295                        (void *)(idx_to_kaddr(pending_idx)|txp->offset),
1296                        data_len);
1297                 if (data_len < txp->size) {
1298                         /* Append the packet payload as a fragment. */
1299                         txp->offset += data_len;
1300                         txp->size -= data_len;
1301                 } else {
1302                         /* Schedule a response immediately. */
1303                         netif_idx_release(pending_idx);
1304                 }
1305
1306                 /*
1307                  * Old frontends do not assert data_validated but we
1308                  * can infer it from csum_blank so test both flags.
1309                  */
1310                 if (txp->flags & (NETTXF_data_validated|NETTXF_csum_blank)) {
1311                         skb->ip_summed = CHECKSUM_UNNECESSARY;
1312                         skb->proto_data_valid = 1;
1313                 } else {
1314                         skb->ip_summed = CHECKSUM_NONE;
1315                         skb->proto_data_valid = 0;
1316                 }
1317                 skb->proto_csum_blank = !!(txp->flags & NETTXF_csum_blank);
1318
1319                 netbk_fill_frags(skb);
1320
1321                 skb->dev      = netif->dev;
1322                 skb->protocol = eth_type_trans(skb, skb->dev);
1323
1324                 netif->stats.rx_bytes += skb->len;
1325                 netif->stats.rx_packets++;
1326
1327                 netif_rx(skb);
1328                 netif->dev->last_rx = jiffies;
1329         }
1330 }
1331
1332 static void netif_idx_release(u16 pending_idx)
1333 {
1334         static DEFINE_SPINLOCK(_lock);
1335         unsigned long flags;
1336
1337         spin_lock_irqsave(&_lock, flags);
1338         dealloc_ring[MASK_PEND_IDX(dealloc_prod)] = pending_idx;
1339         /* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */
1340         smp_wmb();
1341         dealloc_prod++;
1342         spin_unlock_irqrestore(&_lock, flags);
1343
1344         tasklet_schedule(&net_tx_tasklet);
1345 }
1346
1347 static void netif_page_release(struct page *page)
1348 {
1349         /* Ready for next use. */
1350         init_page_count(page);
1351
1352         netif_idx_release(page->index);
1353 }
1354
1355 irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
1356 {
1357         netif_t *netif = dev_id;
1358
1359         add_to_net_schedule_list_tail(netif);
1360         maybe_schedule_tx_action();
1361
1362         if (netif_queue_stopped(netif->dev) && !netbk_queue_full(netif))
1363                 netif_wake_queue(netif->dev);
1364
1365         return IRQ_HANDLED;
1366 }
1367
1368 static void make_tx_response(netif_t *netif,
1369                              netif_tx_request_t *txp,
1370                              s8       st)
1371 {
1372         RING_IDX i = netif->tx.rsp_prod_pvt;
1373         netif_tx_response_t *resp;
1374         int notify;
1375
1376         resp = RING_GET_RESPONSE(&netif->tx, i);
1377         resp->id     = txp->id;
1378         resp->status = st;
1379
1380         if (txp->flags & NETTXF_extra_info)
1381                 RING_GET_RESPONSE(&netif->tx, ++i)->status = NETIF_RSP_NULL;
1382
1383         netif->tx.rsp_prod_pvt = ++i;
1384         RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify);
1385         if (notify)
1386                 notify_remote_via_irq(netif->irq);
1387
1388 #ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
1389         if (i == netif->tx.req_cons) {
1390                 int more_to_do;
1391                 RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
1392                 if (more_to_do)
1393                         add_to_net_schedule_list_tail(netif);
1394         }
1395 #endif
1396 }
1397
1398 static netif_rx_response_t *make_rx_response(netif_t *netif,
1399                                              u16      id,
1400                                              s8       st,
1401                                              u16      offset,
1402                                              u16      size,
1403                                              u16      flags)
1404 {
1405         RING_IDX i = netif->rx.rsp_prod_pvt;
1406         netif_rx_response_t *resp;
1407
1408         resp = RING_GET_RESPONSE(&netif->rx, i);
1409         resp->offset     = offset;
1410         resp->flags      = flags;
1411         resp->id         = id;
1412         resp->status     = (s16)size;
1413         if (st < 0)
1414                 resp->status = (s16)st;
1415
1416         netif->rx.rsp_prod_pvt = ++i;
1417
1418         return resp;
1419 }
1420
1421 #ifdef NETBE_DEBUG_INTERRUPT
1422 static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
1423 {
1424         struct list_head *ent;
1425         netif_t *netif;
1426         int i = 0;
1427
1428         printk(KERN_ALERT "netif_schedule_list:\n");
1429         spin_lock_irq(&net_schedule_list_lock);
1430
1431         list_for_each (ent, &net_schedule_list) {
1432                 netif = list_entry(ent, netif_t, list);
1433                 printk(KERN_ALERT " %d: private(rx_req_cons=%08x "
1434                        "rx_resp_prod=%08x\n",
1435                        i, netif->rx.req_cons, netif->rx.rsp_prod_pvt);
1436                 printk(KERN_ALERT "   tx_req_cons=%08x tx_resp_prod=%08x)\n",
1437                        netif->tx.req_cons, netif->tx.rsp_prod_pvt);
1438                 printk(KERN_ALERT "   shared(rx_req_prod=%08x "
1439                        "rx_resp_prod=%08x\n",
1440                        netif->rx.sring->req_prod, netif->rx.sring->rsp_prod);
1441                 printk(KERN_ALERT "   rx_event=%08x tx_req_prod=%08x\n",
1442                        netif->rx.sring->rsp_event, netif->tx.sring->req_prod);
1443                 printk(KERN_ALERT "   tx_resp_prod=%08x, tx_event=%08x)\n",
1444                        netif->tx.sring->rsp_prod, netif->tx.sring->rsp_event);
1445                 i++;
1446         }
1447
1448         spin_unlock_irq(&net_schedule_list_lock);
1449         printk(KERN_ALERT " ** End of netif_schedule_list **\n");
1450
1451         return IRQ_HANDLED;
1452 }
1453 #endif
1454
1455 static int __init netback_init(void)
1456 {
1457         int i;
1458         struct page *page;
1459
1460         if (!is_running_on_xen())
1461                 return -ENODEV;
1462
1463         /* We can increase reservation by this much in net_rx_action(). */
1464         balloon_update_driver_allowance(NET_RX_RING_SIZE);
1465
1466         skb_queue_head_init(&rx_queue);
1467         skb_queue_head_init(&tx_queue);
1468
1469         init_timer(&net_timer);
1470         net_timer.data = 0;
1471         net_timer.function = net_alarm;
1472
1473         mmap_pages = alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
1474         if (mmap_pages == NULL) {
1475                 printk("%s: out of memory\n", __FUNCTION__);
1476                 return -ENOMEM;
1477         }
1478
1479         for (i = 0; i < MAX_PENDING_REQS; i++) {
1480                 page = mmap_pages[i];
1481                 SetPageForeign(page, netif_page_release);
1482                 page->index = i;
1483         }
1484
1485         pending_cons = 0;
1486         pending_prod = MAX_PENDING_REQS;
1487         for (i = 0; i < MAX_PENDING_REQS; i++)
1488                 pending_ring[i] = i;
1489
1490         spin_lock_init(&net_schedule_list_lock);
1491         INIT_LIST_HEAD(&net_schedule_list);
1492
1493         netif_xenbus_init();
1494
1495 #ifdef NETBE_DEBUG_INTERRUPT
1496         (void)bind_virq_to_irqhandler(
1497                 VIRQ_DEBUG,
1498                 0,
1499                 netif_be_dbg,
1500                 SA_SHIRQ,
1501                 "net-be-dbg",
1502                 &netif_be_dbg);
1503 #endif
1504
1505         return 0;
1506 }
1507
1508 module_init(netback_init);
1509
1510 MODULE_LICENSE("Dual BSD/GPL");