drivers/xen/netback/netback.c

   1 /******************************************************************************
   2  * drivers/xen/netback/netback.c
   3  *
   4  * Back-end of the driver for virtual network devices. This portion of the
   5  * driver exports a 'unified' network-device interface that can be accessed
   6  * by any operating system that implements a compatible front end. A
   7  * reference front-end implementation can be found in:
   8  *  drivers/xen/netfront/netfront.c
   9  *
  10  * Copyright (c) 2002-2005, K A Fraser
  11  */
  12
  13 #include "common.h"
  14 #include <asm-xen/balloon.h>
  15 #include <asm-xen/evtchn.h>
  16
  17 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
  18 #include <linux/delay.h>
  19 #endif
  20
  21 static void netif_idx_release(u16 pending_idx);
  22 static void netif_page_release(struct page *page);
  23 static void make_tx_response(netif_t *netif,
  24                              u16      id,
  25                              s8       st);
  26 static int  make_rx_response(netif_t *netif,
  27                              u16      id,
  28                              s8       st,
  29                              memory_t addr,
  30                              u16      size);
  31
  32 static void net_tx_action(unsigned long unused);
  33 static DECLARE_TASKLET(net_tx_tasklet, net_tx_action, 0);
  34
  35 static void net_rx_action(unsigned long unused);
  36 static DECLARE_TASKLET(net_rx_tasklet, net_rx_action, 0);
  37
  38 static struct timer_list net_timer;
  39
  40 static struct sk_buff_head rx_queue;
  41 static multicall_entry_t rx_mcl[NETIF_RX_RING_SIZE*2+1];
  42 static mmu_update_t rx_mmu[NETIF_RX_RING_SIZE];
  43 static struct mmuext_op rx_mmuext[NETIF_RX_RING_SIZE];
  44 static unsigned char rx_notify[NR_EVENT_CHANNELS];
  45
  46 /* Don't currently gate addition of an interface to the tx scheduling list. */
  47 #define tx_work_exists(_if) (1)
  48
  49 #define MAX_PENDING_REQS 256
  50 static unsigned long mmap_vstart;
  51 #define MMAP_VADDR(_req) (mmap_vstart + ((_req) * PAGE_SIZE))
  52
  53 #define PKT_PROT_LEN 64
  54
  55 static struct {
  56     netif_tx_request_t req;
  57     netif_t *netif;
  58 } pending_tx_info[MAX_PENDING_REQS];
  59 static u16 pending_ring[MAX_PENDING_REQS];
  60 typedef unsigned int PEND_RING_IDX;
  61 #define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
  62 static PEND_RING_IDX pending_prod, pending_cons;
  63 #define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
  64
  65 /* Freed TX SKBs get batched on this ring before return to pending_ring. */
  66 static u16 dealloc_ring[MAX_PENDING_REQS];
  67 static PEND_RING_IDX dealloc_prod, dealloc_cons;
  68
  69 static struct sk_buff_head tx_queue;
  70 static multicall_entry_t tx_mcl[MAX_PENDING_REQS];
  71
  72 static struct list_head net_schedule_list;
  73 static spinlock_t net_schedule_list_lock;
  74
  75 #define MAX_MFN_ALLOC 64
  76 static unsigned long mfn_list[MAX_MFN_ALLOC];
  77 static unsigned int alloc_index = 0;
  78 static spinlock_t mfn_lock = SPIN_LOCK_UNLOCKED;
  79
  80 static unsigned long alloc_mfn(void)
  81 {
  82     unsigned long mfn = 0, flags;
  83     spin_lock_irqsave(&mfn_lock, flags);
  84     if ( unlikely(alloc_index == 0) )
  85         alloc_index = HYPERVISOR_dom_mem_op(
  86             MEMOP_increase_reservation, mfn_list, MAX_MFN_ALLOC, 0);
  87     if ( alloc_index != 0 )
  88         mfn = mfn_list[--alloc_index];
  89     spin_unlock_irqrestore(&mfn_lock, flags);
  90     return mfn;
  91 }
  92
  93 static void free_mfn(unsigned long mfn)
  94 {
  95     unsigned long flags;
  96     spin_lock_irqsave(&mfn_lock, flags);
  97     if ( alloc_index != MAX_MFN_ALLOC )
  98         mfn_list[alloc_index++] = mfn;
  99     else if ( HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation,
 100                                     &mfn, 1, 0) != 1 )
 101         BUG();
 102     spin_unlock_irqrestore(&mfn_lock, flags);
 103 }
 104
 105 static inline void maybe_schedule_tx_action(void)
 106 {
 107     smp_mb();
 108     if ( (NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
 109          !list_empty(&net_schedule_list) )
 110         tasklet_schedule(&net_tx_tasklet);
 111 }
 112
 113 /*
 114  * A gross way of confirming the origin of an skb data page. The slab
 115  * allocator abuses a field in the page struct to cache the kmem_cache_t ptr.
 116  */
 117 static inline int is_xen_skb(struct sk_buff *skb)
 118 {
 119     extern kmem_cache_t *skbuff_cachep;
 120 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
 121     kmem_cache_t *cp = (kmem_cache_t *)virt_to_page(skb->head)->lru.next;
 122 #else
 123     kmem_cache_t *cp = (kmem_cache_t *)virt_to_page(skb->head)->list.next;
 124 #endif
 125     return (cp == skbuff_cachep);
 126 }
 127
 128 int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
 129 {
 130     netif_t *netif = netdev_priv(dev);
 131
 132     ASSERT(skb->dev == dev);
 133
 134     /* Drop the packet if the target domain has no receive buffers. */
 135     if ( !netif->active ||
 136          (netif->rx_req_cons == netif->rx->req_prod) ||
 137          ((netif->rx_req_cons-netif->rx_resp_prod) == NETIF_RX_RING_SIZE) )
 138         goto drop;
 139
 140     /*
 141      * We do not copy the packet unless:
 142      *  1. The data is shared; or
 143      *  2. The data is not allocated from our special cache.
 144      * NB. We also couldn't cope with fragmented packets, but we won't get
 145      *     any because we not advertise the NETIF_F_SG feature.
 146      */
 147     if ( skb_shared(skb) || skb_cloned(skb) || !is_xen_skb(skb) )
 148     {
 149         int hlen = skb->data - skb->head;
 150         struct sk_buff *nskb = dev_alloc_skb(hlen + skb->len);
 151         if ( unlikely(nskb == NULL) )
 152             goto drop;
 153         skb_reserve(nskb, hlen);
 154         __skb_put(nskb, skb->len);
 155         (void)skb_copy_bits(skb, -hlen, nskb->data - hlen, skb->len + hlen);
 156         nskb->dev = skb->dev;
 157         dev_kfree_skb(skb);
 158         skb = nskb;
 159     }
 160
 161     netif->rx_req_cons++;
 162     netif_get(netif);
 163
 164     skb_queue_tail(&rx_queue, skb);
 165     tasklet_schedule(&net_rx_tasklet);
 166
 167     return 0;
 168
 169  drop:
 170     netif->stats.tx_dropped++;
 171     dev_kfree_skb(skb);
 172     return 0;
 173 }
 174
 175 #if 0
 176 static void xen_network_done_notify(void)
 177 {
 178     static struct net_device *eth0_dev = NULL;
 179     if ( unlikely(eth0_dev == NULL) )
 180         eth0_dev = __dev_get_by_name("eth0");
 181     netif_rx_schedule(eth0_dev);
 182 }
 183 /*
 184  * Add following to poll() function in NAPI driver (Tigon3 is example):
 185  *  if ( xen_network_done() )
 186  *      tg3_enable_ints(tp);
 187  */
 188 int xen_network_done(void)
 189 {
 190     return skb_queue_empty(&rx_queue);
 191 }
 192 #endif
 193
 194 static void net_rx_action(unsigned long unused)
 195 {
 196     netif_t *netif;
 197     s8 status;
 198     u16 size, id, evtchn;
 199     multicall_entry_t *mcl;
 200     mmu_update_t *mmu;
 201     struct mmuext_op *mmuext;
 202     unsigned long vdata, mdata, new_mfn;
 203     struct sk_buff_head rxq;
 204     struct sk_buff *skb;
 205     u16 notify_list[NETIF_RX_RING_SIZE];
 206     int notify_nr = 0;
 207
 208     skb_queue_head_init(&rxq);
 209
 210     mcl = rx_mcl;
 211     mmu = rx_mmu;
 212     mmuext = rx_mmuext;
 213     while ( (skb = skb_dequeue(&rx_queue)) != NULL )
 214     {
 215         netif   = netdev_priv(skb->dev);
 216         vdata   = (unsigned long)skb->data;
 217         mdata   = virt_to_machine(vdata);
 218
 219         /* Memory squeeze? Back off for an arbitrary while. */
 220         if ( (new_mfn = alloc_mfn()) == 0 )
 221         {
 222             if ( net_ratelimit() )
 223                 printk(KERN_WARNING "Memory squeeze in netback driver.\n");
 224             mod_timer(&net_timer, jiffies + HZ);
 225             skb_queue_head(&rx_queue, skb);
 226             break;
 227         }
 228
 229         /*
 230          * Set the new P2M table entry before reassigning the old data page.
 231          * Heed the comment in pgtable-2level.h:pte_page(). :-)
 232          */
 233         phys_to_machine_mapping[__pa(skb->data) >> PAGE_SHIFT] = new_mfn;
 234
 235         mcl->op = __HYPERVISOR_update_va_mapping;
 236         mcl->args[0] = vdata;
 237         mcl->args[1] = (new_mfn << PAGE_SHIFT) | __PAGE_KERNEL;
 238         mcl->args[2] = 0;
 239         mcl++;
 240
 241         mcl->op = __HYPERVISOR_mmuext_op;
 242         mcl->args[0] = (unsigned long)mmuext;
 243         mcl->args[1] = 1;
 244         mcl->args[2] = 0;
 245         mcl->args[3] = netif->domid;
 246         mcl++;
 247
 248         mmuext->cmd = MMUEXT_REASSIGN_PAGE;
 249         mmuext->mfn = mdata >> PAGE_SHIFT;
 250         mmuext++;
 251
 252         mmu->ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
 253         mmu->val = __pa(vdata) >> PAGE_SHIFT;
 254         mmu++;
 255
 256         __skb_queue_tail(&rxq, skb);
 257
 258         /* Filled the batch queue? */
 259         if ( (mcl - rx_mcl) == ARRAY_SIZE(rx_mcl) )
 260             break;
 261     }
 262
 263     if ( mcl == rx_mcl )
 264         return;
 265
 266     mcl->op = __HYPERVISOR_mmu_update;
 267     mcl->args[0] = (unsigned long)rx_mmu;
 268     mcl->args[1] = mmu - rx_mmu;
 269     mcl->args[2] = 0;
 270     mcl->args[3] = DOMID_SELF;
 271     mcl++;
 272
 273     mcl[-3].args[2] = UVMF_TLB_FLUSH|UVMF_ALL;
 274     if ( unlikely(HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl) != 0) )
 275         BUG();
 276
 277     mcl = rx_mcl;
 278     mmuext = rx_mmuext;
 279     while ( (skb = __skb_dequeue(&rxq)) != NULL )
 280     {
 281         netif   = netdev_priv(skb->dev);
 282         size    = skb->tail - skb->data;
 283
 284         /* Rederive the machine addresses. */
 285         new_mfn = mcl[0].args[1] >> PAGE_SHIFT;
 286         mdata   = ((mmuext[0].mfn << PAGE_SHIFT) |
 287                    ((unsigned long)skb->data & ~PAGE_MASK));
 288
 289         atomic_set(&(skb_shinfo(skb)->dataref), 1);
 290         skb_shinfo(skb)->nr_frags = 0;
 291         skb_shinfo(skb)->frag_list = NULL;
 292
 293         netif->stats.tx_bytes += size;
 294         netif->stats.tx_packets++;
 295
 296         /* The update_va_mapping() must not fail. */
 297         if ( unlikely(mcl[0].args[5] != 0) )
 298             BUG();
 299
 300         /* Check the reassignment error code. */
 301         status = NETIF_RSP_OKAY;
 302         if ( unlikely(mcl[1].args[5] != 0) )
 303         {
 304             DPRINTK("Failed MMU update transferring to DOM%u\n", netif->domid);
 305             free_mfn(mdata >> PAGE_SHIFT);
 306             status = NETIF_RSP_ERROR;
 307         }
 308
 309         evtchn = netif->evtchn;
 310         id = netif->rx->ring[MASK_NETIF_RX_IDX(netif->rx_resp_prod)].req.id;
 311         if ( make_rx_response(netif, id, status, mdata, size) &&
 312              (rx_notify[evtchn] == 0) )
 313         {
 314             rx_notify[evtchn] = 1;
 315             notify_list[notify_nr++] = evtchn;
 316         }
 317
 318         netif_put(netif);
 319         dev_kfree_skb(skb);
 320
 321         mcl += 2;
 322         mmuext += 1;
 323     }
 324
 325     while ( notify_nr != 0 )
 326     {
 327         evtchn = notify_list[--notify_nr];
 328         rx_notify[evtchn] = 0;
 329         notify_via_evtchn(evtchn);
 330     }
 331
 332     /* More work to do? */
 333     if ( !skb_queue_empty(&rx_queue) && !timer_pending(&net_timer) )
 334         tasklet_schedule(&net_rx_tasklet);
 335 #if 0
 336     else
 337         xen_network_done_notify();
 338 #endif
 339 }
 340
 341 static void net_alarm(unsigned long unused)
 342 {
 343     tasklet_schedule(&net_rx_tasklet);
 344 }
 345
 346 struct net_device_stats *netif_be_get_stats(struct net_device *dev)
 347 {
 348     netif_t *netif = netdev_priv(dev);
 349     return &netif->stats;
 350 }
 351
 352 static int __on_net_schedule_list(netif_t *netif)
 353 {
 354     return netif->list.next != NULL;
 355 }
 356
 357 static void remove_from_net_schedule_list(netif_t *netif)
 358 {
 359     spin_lock_irq(&net_schedule_list_lock);
 360     if ( likely(__on_net_schedule_list(netif)) )
 361     {
 362         list_del(&netif->list);
 363         netif->list.next = NULL;
 364         netif_put(netif);
 365     }
 366     spin_unlock_irq(&net_schedule_list_lock);
 367 }
 368
 369 static void add_to_net_schedule_list_tail(netif_t *netif)
 370 {
 371     if ( __on_net_schedule_list(netif) )
 372         return;
 373
 374     spin_lock_irq(&net_schedule_list_lock);
 375     if ( !__on_net_schedule_list(netif) && netif->active )
 376     {
 377         list_add_tail(&netif->list, &net_schedule_list);
 378         netif_get(netif);
 379     }
 380     spin_unlock_irq(&net_schedule_list_lock);
 381 }
 382
 383 void netif_schedule_work(netif_t *netif)
 384 {
 385     if ( (netif->tx_req_cons != netif->tx->req_prod) &&
 386          ((netif->tx_req_cons-netif->tx_resp_prod) != NETIF_TX_RING_SIZE) )
 387     {
 388         add_to_net_schedule_list_tail(netif);
 389         maybe_schedule_tx_action();
 390     }
 391 }
 392
 393 void netif_deschedule_work(netif_t *netif)
 394 {
 395     remove_from_net_schedule_list(netif);
 396 }
 397
 398
 399 static void tx_credit_callback(unsigned long data)
 400 {
 401     netif_t *netif = (netif_t *)data;
 402     netif->remaining_credit = netif->credit_bytes;
 403     netif_schedule_work(netif);
 404 }
 405
 406 static void net_tx_action(unsigned long unused)
 407 {
 408     struct list_head *ent;
 409     struct sk_buff *skb;
 410     netif_t *netif;
 411     netif_tx_request_t txreq;
 412     u16 pending_idx;
 413     NETIF_RING_IDX i;
 414     multicall_entry_t *mcl;
 415     PEND_RING_IDX dc, dp;
 416     unsigned int data_len;
 417
 418     if ( (dc = dealloc_cons) == (dp = dealloc_prod) )
 419         goto skip_dealloc;
 420
 421     mcl = tx_mcl;
 422     while ( dc != dp )
 423     {
 424         pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
 425         mcl[0].op = __HYPERVISOR_update_va_mapping;
 426         mcl[0].args[0] = MMAP_VADDR(pending_idx);
 427         mcl[0].args[1] = 0;
 428         mcl[0].args[2] = 0;
 429         mcl++;
 430     }
 431
 432     mcl[-1].args[2] = UVMF_TLB_FLUSH|UVMF_ALL;
 433     if ( unlikely(HYPERVISOR_multicall(tx_mcl, mcl - tx_mcl) != 0) )
 434         BUG();
 435
 436     mcl = tx_mcl;
 437     while ( dealloc_cons != dp )
 438     {
 439         /* The update_va_mapping() must not fail. */
 440         if ( unlikely(mcl[0].args[5] != 0) )
 441             BUG();
 442
 443         pending_idx = dealloc_ring[MASK_PEND_IDX(dealloc_cons++)];
 444
 445         netif = pending_tx_info[pending_idx].netif;
 446
 447         make_tx_response(netif, pending_tx_info[pending_idx].req.id,
 448                          NETIF_RSP_OKAY);
 449
 450         pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
 451
 452         /*
 453          * Scheduling checks must happen after the above response is posted.
 454          * This avoids a possible race with a guest OS on another CPU if that
 455          * guest is testing against 'resp_prod' when deciding whether to notify
 456          * us when it queues additional packets.
 457          */
 458         mb();
 459         if ( (netif->tx_req_cons != netif->tx->req_prod) &&
 460              ((netif->tx_req_cons-netif->tx_resp_prod) != NETIF_TX_RING_SIZE) )
 461             add_to_net_schedule_list_tail(netif);
 462
 463         netif_put(netif);
 464
 465         mcl++;
 466     }
 467
 468  skip_dealloc:
 469     mcl = tx_mcl;
 470     while ( (NR_PENDING_REQS < MAX_PENDING_REQS) &&
 471             !list_empty(&net_schedule_list) )
 472     {
 473         /* Get a netif from the list with work to do. */
 474         ent = net_schedule_list.next;
 475         netif = list_entry(ent, netif_t, list);
 476         netif_get(netif);
 477         remove_from_net_schedule_list(netif);
 478
 479         /* Work to do? */
 480         i = netif->tx_req_cons;
 481         if ( (i == netif->tx->req_prod) ||
 482              ((i-netif->tx_resp_prod) == NETIF_TX_RING_SIZE) )
 483         {
 484             netif_put(netif);
 485             continue;
 486         }
 487
 488         rmb(); /* Ensure that we see the request before we copy it. */
 489         memcpy(&txreq, &netif->tx->ring[MASK_NETIF_TX_IDX(i)].req,
 490                sizeof(txreq));
 491
 492         /* Credit-based scheduling. */
 493         if ( txreq.size > netif->remaining_credit )
 494         {
 495             unsigned long now = jiffies;
 496             unsigned long next_credit =
 497                 netif->credit_timeout.expires +
 498                 msecs_to_jiffies(netif->credit_usec / 1000);
 499
 500             /* Timer could already be pending in some rare cases. */
 501             if ( timer_pending(&netif->credit_timeout) )
 502                 break;
 503
 504             /* Already passed the point at which we can replenish credit? */
 505             if ( time_after_eq(now, next_credit) )
 506             {
 507                 netif->credit_timeout.expires = now;
 508                 netif->remaining_credit = netif->credit_bytes;
 509             }
 510
 511             /* Still too big to send right now? Then set a timer callback. */
 512             if ( txreq.size > netif->remaining_credit )
 513             {
 514                 netif->remaining_credit = 0;
 515                 netif->credit_timeout.expires  = next_credit;
 516                 netif->credit_timeout.data     = (unsigned long)netif;
 517                 netif->credit_timeout.function = tx_credit_callback;
 518 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
 519                 add_timer_on(&netif->credit_timeout, smp_processor_id());
 520 #else
 521                 add_timer(&netif->credit_timeout);
 522 #endif
 523                 break;
 524             }
 525         }
 526         netif->remaining_credit -= txreq.size;
 527
 528         /*
 529          * Why the barrier? It ensures that the frontend sees updated req_cons
 530          * before we check for more work to schedule.
 531          */
 532         netif->tx->req_cons = ++netif->tx_req_cons;
 533         mb();
 534
 535         netif_schedule_work(netif);
 536
 537         if ( unlikely(txreq.size < ETH_HLEN) ||
 538              unlikely(txreq.size > ETH_FRAME_LEN) )
 539         {
 540             DPRINTK("Bad packet size: %d\n", txreq.size);
 541             make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
 542             netif_put(netif);
 543             continue;
 544         }
 545
 546         /* No crossing a page boundary as the payload mustn't fragment. */
 547         if ( unlikely(((txreq.addr & ~PAGE_MASK) + txreq.size) >= PAGE_SIZE) )
 548         {
 549             DPRINTK("txreq.addr: %lx, size: %u, end: %lu\n",
 550                     txreq.addr, txreq.size,
 551                     (txreq.addr &~PAGE_MASK) + txreq.size);
 552             make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
 553             netif_put(netif);
 554             continue;
 555         }
 556
 557         pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
 558
 559         data_len = (txreq.size > PKT_PROT_LEN) ? PKT_PROT_LEN : txreq.size;
 560
 561         if ( unlikely((skb = alloc_skb(data_len+16, GFP_ATOMIC)) == NULL) )
 562         {
 563             DPRINTK("Can't allocate a skb in start_xmit.\n");
 564             make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
 565             netif_put(netif);
 566             break;
 567         }
 568
 569         /* Packets passed to netif_rx() must have some headroom. */
 570         skb_reserve(skb, 16);
 571
 572         mcl[0].op = __HYPERVISOR_update_va_mapping_otherdomain;
 573         mcl[0].args[0] = MMAP_VADDR(pending_idx);
 574         mcl[0].args[1] = (txreq.addr & PAGE_MASK) | __PAGE_KERNEL;
 575         mcl[0].args[2] = 0;
 576         mcl[0].args[3] = netif->domid;
 577         mcl++;
 578
 579         memcpy(&pending_tx_info[pending_idx].req, &txreq, sizeof(txreq));
 580         pending_tx_info[pending_idx].netif = netif;
 581         *((u16 *)skb->data) = pending_idx;
 582
 583         __skb_queue_tail(&tx_queue, skb);
 584
 585         pending_cons++;
 586
 587         /* Filled the batch queue? */
 588         if ( (mcl - tx_mcl) == ARRAY_SIZE(tx_mcl) )
 589             break;
 590     }
 591
 592     if ( mcl == tx_mcl )
 593         return;
 594
 595     if ( unlikely(HYPERVISOR_multicall(tx_mcl, mcl - tx_mcl) != 0) )
 596         BUG();
 597
 598     mcl = tx_mcl;
 599     while ( (skb = __skb_dequeue(&tx_queue)) != NULL )
 600     {
 601         pending_idx = *((u16 *)skb->data);
 602         netif       = pending_tx_info[pending_idx].netif;
 603         memcpy(&txreq, &pending_tx_info[pending_idx].req, sizeof(txreq));
 604
 605         /* Check the remap error code. */
 606         if ( unlikely(mcl[0].args[5] != 0) )
 607         {
 608             DPRINTK("Bad page frame\n");
 609             make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
 610             netif_put(netif);
 611             kfree_skb(skb);
 612             mcl++;
 613             pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
 614             continue;
 615         }
 616
 617         phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT] =
 618             FOREIGN_FRAME(txreq.addr >> PAGE_SHIFT);
 619
 620         data_len = (txreq.size > PKT_PROT_LEN) ? PKT_PROT_LEN : txreq.size;
 621
 622         __skb_put(skb, data_len);
 623         memcpy(skb->data,
 624                (void *)(MMAP_VADDR(pending_idx)|(txreq.addr&~PAGE_MASK)),
 625                data_len);
 626
 627         if ( data_len < txreq.size )
 628         {
 629             /* Append the packet payload as a fragment. */
 630             skb_shinfo(skb)->frags[0].page        =
 631                 virt_to_page(MMAP_VADDR(pending_idx));
 632             skb_shinfo(skb)->frags[0].size        = txreq.size - data_len;
 633             skb_shinfo(skb)->frags[0].page_offset =
 634                 (txreq.addr + data_len) & ~PAGE_MASK;
 635             skb_shinfo(skb)->nr_frags = 1;
 636         }
 637         else
 638         {
 639             /* Schedule a response immediately. */
 640             netif_idx_release(pending_idx);
 641         }
 642
 643         skb->data_len  = txreq.size - data_len;
 644         skb->len      += skb->data_len;
 645
 646         skb->dev      = netif->dev;
 647         skb->protocol = eth_type_trans(skb, skb->dev);
 648
 649         netif->stats.rx_bytes += txreq.size;
 650         netif->stats.rx_packets++;
 651
 652         netif_rx(skb);
 653         netif->dev->last_rx = jiffies;
 654
 655         mcl++;
 656     }
 657 }
 658
 659 static void netif_idx_release(u16 pending_idx)
 660 {
 661     static spinlock_t _lock = SPIN_LOCK_UNLOCKED;
 662     unsigned long flags;
 663
 664     spin_lock_irqsave(&_lock, flags);
 665     dealloc_ring[MASK_PEND_IDX(dealloc_prod++)] = pending_idx;
 666     spin_unlock_irqrestore(&_lock, flags);
 667
 668     tasklet_schedule(&net_tx_tasklet);
 669 }
 670
 671 static void netif_page_release(struct page *page)
 672 {
 673     u16 pending_idx = page - virt_to_page(mmap_vstart);
 674
 675     /* Ready for next use. */
 676     set_page_count(page, 1);
 677
 678     netif_idx_release(pending_idx);
 679 }
 680
 681 irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
 682 {
 683     netif_t *netif = dev_id;
 684     if ( tx_work_exists(netif) )
 685     {
 686         add_to_net_schedule_list_tail(netif);
 687         maybe_schedule_tx_action();
 688     }
 689     return IRQ_HANDLED;
 690 }
 691
 692 static void make_tx_response(netif_t *netif,
 693                              u16      id,
 694                              s8       st)
 695 {
 696     NETIF_RING_IDX i = netif->tx_resp_prod;
 697     netif_tx_response_t *resp;
 698
 699     resp = &netif->tx->ring[MASK_NETIF_TX_IDX(i)].resp;
 700     resp->id     = id;
 701     resp->status = st;
 702     wmb();
 703     netif->tx->resp_prod = netif->tx_resp_prod = ++i;
 704
 705     mb(); /* Update producer before checking event threshold. */
 706     if ( i == netif->tx->event )
 707         notify_via_evtchn(netif->evtchn);
 708 }
 709
 710 static int make_rx_response(netif_t *netif,
 711                             u16      id,
 712                             s8       st,
 713                             memory_t addr,
 714                             u16      size)
 715 {
 716     NETIF_RING_IDX i = netif->rx_resp_prod;
 717     netif_rx_response_t *resp;
 718
 719     resp = &netif->rx->ring[MASK_NETIF_RX_IDX(i)].resp;
 720     resp->addr   = addr;
 721     resp->id     = id;
 722     resp->status = (s16)size;
 723     if ( st < 0 )
 724         resp->status = (s16)st;
 725     wmb();
 726     netif->rx->resp_prod = netif->rx_resp_prod = ++i;
 727
 728     mb(); /* Update producer before checking event threshold. */
 729     return (i == netif->rx->event);
 730 }
 731
 732 static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
 733 {
 734     struct list_head *ent;
 735     netif_t *netif;
 736     int i = 0;
 737
 738     printk(KERN_ALERT "netif_schedule_list:\n");
 739     spin_lock_irq(&net_schedule_list_lock);
 740
 741     list_for_each ( ent, &net_schedule_list )
 742     {
 743         netif = list_entry(ent, netif_t, list);
 744         printk(KERN_ALERT " %d: private(rx_req_cons=%08x rx_resp_prod=%08x\n",
 745                i, netif->rx_req_cons, netif->rx_resp_prod);
 746         printk(KERN_ALERT "   tx_req_cons=%08x tx_resp_prod=%08x)\n",
 747                netif->tx_req_cons, netif->tx_resp_prod);
 748         printk(KERN_ALERT "   shared(rx_req_prod=%08x rx_resp_prod=%08x\n",
 749                netif->rx->req_prod, netif->rx->resp_prod);
 750         printk(KERN_ALERT "   rx_event=%08x tx_req_prod=%08x\n",
 751                netif->rx->event, netif->tx->req_prod);
 752         printk(KERN_ALERT "   tx_resp_prod=%08x, tx_event=%08x)\n",
 753                netif->tx->resp_prod, netif->tx->event);
 754         i++;
 755     }
 756
 757     spin_unlock_irq(&net_schedule_list_lock);
 758     printk(KERN_ALERT " ** End of netif_schedule_list **\n");
 759
 760     return IRQ_HANDLED;
 761 }
 762
 763 static int __init netback_init(void)
 764 {
 765     int i;
 766     struct page *page;
 767
 768     if ( !(xen_start_info.flags & SIF_NET_BE_DOMAIN) &&
 769          !(xen_start_info.flags & SIF_INITDOMAIN) )
 770         return 0;
 771
 772     printk("Initialising Xen netif backend\n");
 773
 774     /* We can increase reservation by this much in net_rx_action(). */
 775     balloon_update_driver_allowance(NETIF_RX_RING_SIZE);
 776
 777     skb_queue_head_init(&rx_queue);
 778     skb_queue_head_init(&tx_queue);
 779
 780     init_timer(&net_timer);
 781     net_timer.data = 0;
 782     net_timer.function = net_alarm;
 783
 784     netif_interface_init();
 785
 786     if ( (mmap_vstart = allocate_empty_lowmem_region(MAX_PENDING_REQS)) == 0 )
 787         BUG();
 788
 789     for ( i = 0; i < MAX_PENDING_REQS; i++ )
 790     {
 791         page = virt_to_page(MMAP_VADDR(i));
 792         set_page_count(page, 1);
 793         SetPageForeign(page, netif_page_release);
 794     }
 795
 796     pending_cons = 0;
 797     pending_prod = MAX_PENDING_REQS;
 798     for ( i = 0; i < MAX_PENDING_REQS; i++ )
 799         pending_ring[i] = i;
 800
 801     spin_lock_init(&net_schedule_list_lock);
 802     INIT_LIST_HEAD(&net_schedule_list);
 803
 804     netif_ctrlif_init();
 805
 806     (void)request_irq(bind_virq_to_irq(VIRQ_DEBUG),
 807                       netif_be_dbg, SA_SHIRQ,
 808                       "net-be-dbg", &netif_be_dbg);
 809
 810     return 0;
 811 }
 812
 813 static void netback_cleanup(void)
 814 {
 815     BUG();
 816 }
 817
 818 module_init(netback_init);
 819 module_exit(netback_cleanup);