1 /******************************************************************************
2 * drivers/xen/netback/netback.c
4 * Back-end of the driver for virtual network devices. This portion of the
5 * driver exports a 'unified' network-device interface that can be accessed
6 * by any operating system that implements a compatible front end. A
7 * reference front-end implementation can be found in:
8 * drivers/xen/netfront/netfront.c
10 * Copyright (c) 2002-2005, K A Fraser
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License version 2
14 * as published by the Free Software Foundation; or, when distributed
15 * separately from the Linux kernel or incorporated into other
16 * software packages, subject to the following license:
18 * Permission is hereby granted, free of charge, to any person obtaining a copy
19 * of this source file (the "Software"), to deal in the Software without
20 * restriction, including without limitation the rights to use, copy, modify,
21 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
22 * and to permit persons to whom the Software is furnished to do so, subject to
23 * the following conditions:
25 * The above copyright notice and this permission notice shall be included in
26 * all copies or substantial portions of the Software.
28 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
29 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
30 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
31 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
32 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
33 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
38 #include <xen/balloon.h>
39 #include <xen/interface/memory.h>
41 /*#define NETBE_DEBUG_INTERRUPT*/
43 struct netbk_rx_meta {
48 static void netif_idx_release(u16 pending_idx);
49 static void netif_page_release(struct page *page);
50 static void make_tx_response(netif_t *netif,
51 netif_tx_request_t *txp,
53 static netif_rx_response_t *make_rx_response(netif_t *netif,
60 static void net_tx_action(unsigned long unused);
61 static DECLARE_TASKLET(net_tx_tasklet, net_tx_action, 0);
63 static void net_rx_action(unsigned long unused);
64 static DECLARE_TASKLET(net_rx_tasklet, net_rx_action, 0);
66 static struct timer_list net_timer;
68 #define MAX_PENDING_REQS 256
70 static struct sk_buff_head rx_queue;
71 static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1];
72 static mmu_update_t rx_mmu[NET_RX_RING_SIZE];
73 static gnttab_transfer_t grant_rx_op[NET_RX_RING_SIZE];
74 static unsigned char rx_notify[NR_IRQS];
76 static unsigned long mmap_vstart;
77 #define MMAP_VADDR(_req) (mmap_vstart + ((_req) * PAGE_SIZE))
79 #define PKT_PROT_LEN 64
82 netif_tx_request_t req;
84 } pending_tx_info[MAX_PENDING_REQS];
85 static u16 pending_ring[MAX_PENDING_REQS];
86 typedef unsigned int PEND_RING_IDX;
87 #define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
88 static PEND_RING_IDX pending_prod, pending_cons;
89 #define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
91 /* Freed TX SKBs get batched on this ring before return to pending_ring. */
92 static u16 dealloc_ring[MAX_PENDING_REQS];
93 static PEND_RING_IDX dealloc_prod, dealloc_cons;
95 static struct sk_buff_head tx_queue;
97 static grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
98 static gnttab_unmap_grant_ref_t tx_unmap_ops[MAX_PENDING_REQS];
99 static gnttab_map_grant_ref_t tx_map_ops[MAX_PENDING_REQS];
101 static struct list_head net_schedule_list;
102 static spinlock_t net_schedule_list_lock;
104 #define MAX_MFN_ALLOC 64
105 static unsigned long mfn_list[MAX_MFN_ALLOC];
106 static unsigned int alloc_index = 0;
108 static inline unsigned long alloc_mfn(void)
110 return mfn_list[--alloc_index];
113 static int check_mfn(int nr)
115 struct xen_memory_reservation reservation = {
120 if (likely(alloc_index >= nr))
123 set_xen_guest_handle(reservation.extent_start, mfn_list + alloc_index);
124 reservation.nr_extents = MAX_MFN_ALLOC - alloc_index;
125 alloc_index += HYPERVISOR_memory_op(XENMEM_increase_reservation,
128 return alloc_index >= nr ? 0 : -ENOMEM;
131 static inline void maybe_schedule_tx_action(void)
134 if ((NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
135 !list_empty(&net_schedule_list))
136 tasklet_schedule(&net_tx_tasklet);
140 * A gross way of confirming the origin of an skb data page. The slab
141 * allocator abuses a field in the page struct to cache the kmem_cache_t ptr.
143 static inline int is_xen_skb(struct sk_buff *skb)
145 extern kmem_cache_t *skbuff_cachep;
146 kmem_cache_t *cp = (kmem_cache_t *)virt_to_page(skb->head)->lru.next;
147 return (cp == skbuff_cachep);
150 static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
152 struct skb_shared_info *ninfo;
153 struct sk_buff *nskb;
154 unsigned long offset;
159 nskb = alloc_skb(SKB_MAX_HEAD(0), GFP_ATOMIC);
163 skb_reserve(nskb, 16);
164 headlen = nskb->end - nskb->data;
165 if (headlen > skb_headlen(skb))
166 headlen = skb_headlen(skb);
167 ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen);
170 ninfo = skb_shinfo(nskb);
171 ninfo->gso_size = skb_shinfo(skb)->gso_size;
172 ninfo->gso_type = skb_shinfo(skb)->gso_type;
175 len = skb->len - headlen;
177 nskb->len = skb->len;
178 nskb->data_len = len;
179 nskb->truesize += len;
186 if (unlikely(ninfo->nr_frags >= MAX_SKB_FRAGS)) {
191 copy = len >= PAGE_SIZE ? PAGE_SIZE : len;
192 zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO;
194 page = alloc_page(GFP_ATOMIC | zero);
198 ret = skb_copy_bits(skb, offset, page_address(page), copy);
201 ninfo->frags[ninfo->nr_frags].page = page;
202 ninfo->frags[ninfo->nr_frags].page_offset = 0;
203 ninfo->frags[ninfo->nr_frags].size = copy;
210 offset = nskb->data - skb->data;
212 nskb->h.raw = skb->h.raw + offset;
213 nskb->nh.raw = skb->nh.raw + offset;
214 nskb->mac.raw = skb->mac.raw + offset;
224 static inline int netbk_max_required_rx_slots(netif_t *netif)
226 if (netif->features & (NETIF_F_SG|NETIF_F_TSO))
227 return MAX_SKB_FRAGS + 2; /* header + extra_info + frags */
228 return 1; /* all in one */
231 static inline int netbk_queue_full(netif_t *netif)
233 RING_IDX peek = netif->rx_req_cons_peek;
234 RING_IDX needed = netbk_max_required_rx_slots(netif);
236 return ((netif->rx.sring->req_prod - peek) < needed) ||
237 ((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) < needed);
240 int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
242 netif_t *netif = netdev_priv(dev);
244 BUG_ON(skb->dev != dev);
246 /* Drop the packet if the target domain has no receive buffers. */
247 if (unlikely(!netif_running(dev) || !netif_carrier_ok(dev)))
250 if (unlikely(netbk_queue_full(netif))) {
251 /* Not a BUG_ON() -- misbehaving netfront can trigger this. */
252 if (netbk_can_queue(dev))
253 DPRINTK("Queue full but not stopped!\n");
258 * We do not copy the packet unless:
259 * 1. The data is shared; or
260 * 2. The data is not allocated from our special cache.
261 * 3. The data is fragmented.
263 if (skb_cloned(skb) || skb_is_nonlinear(skb) || !is_xen_skb(skb)) {
264 struct sk_buff *nskb = netbk_copy_skb(skb);
265 if ( unlikely(nskb == NULL) )
267 /* Copy only the header fields we use in this driver. */
268 nskb->dev = skb->dev;
269 nskb->ip_summed = skb->ip_summed;
270 nskb->proto_data_valid = skb->proto_data_valid;
275 netif->rx_req_cons_peek += skb_shinfo(skb)->nr_frags + 1 +
276 !!skb_shinfo(skb)->gso_size;
279 if (netbk_can_queue(dev) && netbk_queue_full(netif)) {
280 netif->rx.sring->req_event = netif->rx_req_cons_peek +
281 netbk_max_required_rx_slots(netif);
282 mb(); /* request notification /then/ check & stop the queue */
283 if (netbk_queue_full(netif))
284 netif_stop_queue(dev);
287 skb_queue_tail(&rx_queue, skb);
288 tasklet_schedule(&net_rx_tasklet);
293 netif->stats.tx_dropped++;
299 static void xen_network_done_notify(void)
301 static struct net_device *eth0_dev = NULL;
302 if (unlikely(eth0_dev == NULL))
303 eth0_dev = __dev_get_by_name("eth0");
304 netif_rx_schedule(eth0_dev);
307 * Add following to poll() function in NAPI driver (Tigon3 is example):
308 * if ( xen_network_done() )
309 * tg3_enable_ints(tp);
311 int xen_network_done(void)
313 return skb_queue_empty(&rx_queue);
317 static u16 netbk_gop_frag(netif_t *netif, struct page *page, int count, int i)
319 multicall_entry_t *mcl = rx_mcl + count;
320 mmu_update_t *mmu = rx_mmu + count;
321 gnttab_transfer_t *gop = grant_rx_op + count;
322 netif_rx_request_t *req;
323 unsigned long old_mfn, new_mfn;
325 old_mfn = virt_to_mfn(page_address(page));
327 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
328 new_mfn = alloc_mfn();
331 * Set the new P2M table entry before reassigning
332 * the old data page. Heed the comment in
333 * pgtable-2level.h:pte_page(). :-)
335 set_phys_to_machine(page_to_pfn(page), new_mfn);
337 MULTI_update_va_mapping(mcl, (unsigned long)page_address(page),
338 pfn_pte_ma(new_mfn, PAGE_KERNEL), 0);
340 mmu->ptr = ((maddr_t)new_mfn << PAGE_SHIFT) |
342 mmu->val = page_to_pfn(page);
345 req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons + i);
347 gop->domid = netif->domid;
348 gop->ref = req->gref;
352 static void netbk_gop_skb(struct sk_buff *skb, struct netbk_rx_meta *meta,
355 netif_t *netif = netdev_priv(skb->dev);
356 int nr_frags = skb_shinfo(skb)->nr_frags;
360 meta[count].frag.page_offset = skb_shinfo(skb)->gso_type;
361 meta[count].frag.size = skb_shinfo(skb)->gso_size;
362 extra = !!meta[count].frag.size + 1;
364 for (i = 0; i < nr_frags; i++) {
365 meta[++count].frag = skb_shinfo(skb)->frags[i];
366 meta[count].id = netbk_gop_frag(netif, meta[count].frag.page,
371 * This must occur at the end to ensure that we don't trash
372 * skb_shinfo until we're done.
374 meta[count - nr_frags].id = netbk_gop_frag(netif,
375 virt_to_page(skb->data),
376 count - nr_frags, 0);
377 netif->rx.req_cons += nr_frags + extra;
380 static inline void netbk_free_pages(int nr_frags, struct netbk_rx_meta *meta)
384 for (i = 0; i < nr_frags; i++)
385 put_page(meta[i].frag.page);
388 static int netbk_check_gop(int nr_frags, domid_t domid, int count)
390 multicall_entry_t *mcl = rx_mcl + count;
391 gnttab_transfer_t *gop = grant_rx_op + count;
392 int status = NETIF_RSP_OKAY;
395 for (i = 0; i <= nr_frags; i++) {
396 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
397 /* The update_va_mapping() must not fail. */
398 BUG_ON(mcl->result != 0);
402 /* Check the reassignment error code. */
403 if (gop->status != 0) {
404 DPRINTK("Bad status %d from grant transfer to DOM%u\n",
407 * Page no longer belongs to us unless GNTST_bad_page,
408 * but that should be a fatal error anyway.
410 BUG_ON(gop->status == GNTST_bad_page);
411 status = NETIF_RSP_ERROR;
419 static void netbk_add_frag_responses(netif_t *netif, int status,
420 struct netbk_rx_meta *meta, int nr_frags)
424 for (i = 0; i < nr_frags; i++) {
426 int flags = (i == nr_frags - 1) ? 0 : NETRXF_more_data;
428 make_rx_response(netif, id, status, meta[i].frag.page_offset,
429 meta[i].frag.size, flags);
433 static void net_rx_action(unsigned long unused)
435 netif_t *netif = NULL;
438 netif_rx_response_t *resp;
439 struct netif_extra_info *extra;
440 multicall_entry_t *mcl;
441 struct sk_buff_head rxq;
449 * Putting hundreds of bytes on the stack is considered rude.
450 * Static works because a tasklet can only be on one CPU at any time.
452 static u16 notify_list[NET_RX_RING_SIZE];
453 static struct netbk_rx_meta meta[NET_RX_RING_SIZE];
455 skb_queue_head_init(&rxq);
459 while ((skb = skb_dequeue(&rx_queue)) != NULL) {
460 nr_frags = skb_shinfo(skb)->nr_frags;
461 *(int *)skb->cb = nr_frags;
463 if (!xen_feature(XENFEAT_auto_translated_physmap) &&
464 check_mfn(nr_frags + 1)) {
465 /* Memory squeeze? Back off for an arbitrary while. */
466 if ( net_ratelimit() )
467 WPRINTK("Memory squeeze in netback "
469 mod_timer(&net_timer, jiffies + HZ);
470 skb_queue_head(&rx_queue, skb);
474 netbk_gop_skb(skb, meta, count);
476 count += nr_frags + 1;
478 __skb_queue_tail(&rxq, skb);
480 /* Filled the batch queue? */
481 if (count + MAX_SKB_FRAGS >= NET_RX_RING_SIZE)
488 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
489 mcl = rx_mcl + count;
491 mcl[-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
493 mcl->op = __HYPERVISOR_mmu_update;
494 mcl->args[0] = (unsigned long)rx_mmu;
495 mcl->args[1] = count;
497 mcl->args[3] = DOMID_SELF;
499 ret = HYPERVISOR_multicall(rx_mcl, count + 1);
503 ret = HYPERVISOR_grant_table_op(GNTTABOP_transfer, grant_rx_op, count);
507 while ((skb = __skb_dequeue(&rxq)) != NULL) {
508 nr_frags = *(int *)skb->cb;
510 atomic_set(&(skb_shinfo(skb)->dataref), 1);
511 skb_shinfo(skb)->nr_frags = 0;
512 skb_shinfo(skb)->frag_list = NULL;
514 netif = netdev_priv(skb->dev);
515 netif->stats.tx_bytes += skb->len;
516 netif->stats.tx_packets++;
518 netbk_free_pages(nr_frags, meta + count + 1);
519 status = netbk_check_gop(nr_frags, netif->domid, count);
522 flags = nr_frags ? NETRXF_more_data : 0;
524 if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
525 flags |= NETRXF_csum_blank | NETRXF_data_validated;
526 else if (skb->proto_data_valid) /* remote but checksummed? */
527 flags |= NETRXF_data_validated;
529 resp = make_rx_response(netif, id, status,
530 offset_in_page(skb->data),
531 skb_headlen(skb), flags);
535 if (meta[count].frag.size) {
536 struct netif_extra_info *gso =
537 (struct netif_extra_info *)
538 RING_GET_RESPONSE(&netif->rx,
539 netif->rx.rsp_prod_pvt++);
542 extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE;
544 resp->flags |= NETRXF_extra_info;
546 gso->u.gso.size = meta[count].frag.size;
547 gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
549 gso->u.gso.features = 0;
551 gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
556 netbk_add_frag_responses(netif, status, meta + count + 1,
559 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret);
561 if (ret && !rx_notify[irq]) {
563 notify_list[notify_nr++] = irq;
566 if (netif_queue_stopped(netif->dev) &&
567 !netbk_queue_full(netif))
568 netif_wake_queue(netif->dev);
572 count += nr_frags + 1;
575 while (notify_nr != 0) {
576 irq = notify_list[--notify_nr];
578 notify_remote_via_irq(irq);
581 /* More work to do? */
582 if (!skb_queue_empty(&rx_queue) && !timer_pending(&net_timer))
583 tasklet_schedule(&net_rx_tasklet);
586 xen_network_done_notify();
590 static void net_alarm(unsigned long unused)
592 tasklet_schedule(&net_rx_tasklet);
595 struct net_device_stats *netif_be_get_stats(struct net_device *dev)
597 netif_t *netif = netdev_priv(dev);
598 return &netif->stats;
601 static int __on_net_schedule_list(netif_t *netif)
603 return netif->list.next != NULL;
606 static void remove_from_net_schedule_list(netif_t *netif)
608 spin_lock_irq(&net_schedule_list_lock);
609 if (likely(__on_net_schedule_list(netif))) {
610 list_del(&netif->list);
611 netif->list.next = NULL;
614 spin_unlock_irq(&net_schedule_list_lock);
617 static void add_to_net_schedule_list_tail(netif_t *netif)
619 if (__on_net_schedule_list(netif))
622 spin_lock_irq(&net_schedule_list_lock);
623 if (!__on_net_schedule_list(netif) &&
624 likely(netif_running(netif->dev) &&
625 netif_carrier_ok(netif->dev))) {
626 list_add_tail(&netif->list, &net_schedule_list);
629 spin_unlock_irq(&net_schedule_list_lock);
633 * Note on CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER:
634 * If this driver is pipelining transmit requests then we can be very
635 * aggressive in avoiding new-packet notifications -- frontend only needs to
636 * send a notification if there are no outstanding unreceived responses.
637 * If we may be buffer transmit buffers for any reason then we must be rather
638 * more conservative and treat this as the final check for pending work.
640 void netif_schedule_work(netif_t *netif)
644 #ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
645 more_to_do = RING_HAS_UNCONSUMED_REQUESTS(&netif->tx);
647 RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
651 add_to_net_schedule_list_tail(netif);
652 maybe_schedule_tx_action();
656 void netif_deschedule_work(netif_t *netif)
658 remove_from_net_schedule_list(netif);
662 static void tx_credit_callback(unsigned long data)
664 netif_t *netif = (netif_t *)data;
665 netif->remaining_credit = netif->credit_bytes;
666 netif_schedule_work(netif);
669 inline static void net_tx_action_dealloc(void)
671 gnttab_unmap_grant_ref_t *gop;
673 PEND_RING_IDX dc, dp;
680 /* Ensure we see all indexes enqueued by netif_idx_release(). */
684 * Free up any grants we have finished using
688 pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
689 gnttab_set_unmap_op(gop, MMAP_VADDR(pending_idx),
691 grant_tx_handle[pending_idx]);
694 ret = HYPERVISOR_grant_table_op(
695 GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops);
698 while (dealloc_cons != dp) {
699 pending_idx = dealloc_ring[MASK_PEND_IDX(dealloc_cons++)];
701 netif = pending_tx_info[pending_idx].netif;
703 make_tx_response(netif, &pending_tx_info[pending_idx].req,
706 pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
712 static void netbk_tx_err(netif_t *netif, netif_tx_request_t *txp, RING_IDX end)
714 RING_IDX cons = netif->tx.req_cons;
717 make_tx_response(netif, txp, NETIF_RSP_ERROR);
720 txp = RING_GET_REQUEST(&netif->tx, cons++);
722 netif->tx.req_cons = cons;
723 netif_schedule_work(netif);
727 static int netbk_count_requests(netif_t *netif, netif_tx_request_t *txp,
730 netif_tx_request_t *first = txp;
731 RING_IDX cons = netif->tx.req_cons;
734 while (txp->flags & NETTXF_more_data) {
735 if (frags >= work_to_do) {
736 DPRINTK("Need more frags\n");
740 txp = RING_GET_REQUEST(&netif->tx, cons + frags);
741 if (txp->size > first->size) {
742 DPRINTK("Frags galore\n");
746 first->size -= txp->size;
749 if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {
750 DPRINTK("txp->offset: %x, size: %u\n",
751 txp->offset, txp->size);
759 static gnttab_map_grant_ref_t *netbk_get_requests(netif_t *netif,
761 gnttab_map_grant_ref_t *mop)
763 struct skb_shared_info *shinfo = skb_shinfo(skb);
764 skb_frag_t *frags = shinfo->frags;
765 netif_tx_request_t *txp;
766 unsigned long pending_idx = *((u16 *)skb->data);
767 RING_IDX cons = netif->tx.req_cons;
770 /* Skip first skb fragment if it is on same page as header fragment. */
771 start = ((unsigned long)shinfo->frags[0].page == pending_idx);
773 for (i = start; i < shinfo->nr_frags; i++) {
774 txp = RING_GET_REQUEST(&netif->tx, cons++);
775 pending_idx = pending_ring[MASK_PEND_IDX(pending_cons++)];
777 gnttab_set_map_op(mop++, MMAP_VADDR(pending_idx),
778 GNTMAP_host_map | GNTMAP_readonly,
779 txp->gref, netif->domid);
781 memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
783 pending_tx_info[pending_idx].netif = netif;
784 frags[i].page = (void *)pending_idx;
790 static int netbk_tx_check_mop(struct sk_buff *skb,
791 gnttab_map_grant_ref_t **mopp)
793 gnttab_map_grant_ref_t *mop = *mopp;
794 int pending_idx = *((u16 *)skb->data);
795 netif_t *netif = pending_tx_info[pending_idx].netif;
796 netif_tx_request_t *txp;
797 struct skb_shared_info *shinfo = skb_shinfo(skb);
798 int nr_frags = shinfo->nr_frags;
801 /* Check status of header. */
804 txp = &pending_tx_info[pending_idx].req;
805 make_tx_response(netif, txp, NETIF_RSP_ERROR);
806 pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
810 __pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT,
811 FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
812 grant_tx_handle[pending_idx] = mop->handle;
815 /* Skip first skb fragment if it is on same page as header fragment. */
816 start = ((unsigned long)shinfo->frags[0].page == pending_idx);
818 for (i = start; i < nr_frags; i++) {
821 pending_idx = (unsigned long)shinfo->frags[i].page;
823 /* Check error status: if okay then remember grant handle. */
824 newerr = (++mop)->status;
825 if (likely(!newerr)) {
827 __pa(MMAP_VADDR(pending_idx))>>PAGE_SHIFT,
828 FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT));
829 grant_tx_handle[pending_idx] = mop->handle;
830 /* Had a previous error? Invalidate this fragment. */
832 netif_idx_release(pending_idx);
836 /* Error on this fragment: respond to client with an error. */
837 txp = &pending_tx_info[pending_idx].req;
838 make_tx_response(netif, txp, NETIF_RSP_ERROR);
839 pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
842 /* Not the first error? Preceding frags already invalidated. */
846 /* First error: invalidate header and preceding fragments. */
847 pending_idx = *((u16 *)skb->data);
848 netif_idx_release(pending_idx);
849 for (j = start; j < i; j++) {
850 pending_idx = (unsigned long)shinfo->frags[i].page;
851 netif_idx_release(pending_idx);
854 /* Remember the error: invalidate all subsequent fragments. */
862 static void netbk_fill_frags(struct sk_buff *skb)
864 struct skb_shared_info *shinfo = skb_shinfo(skb);
865 int nr_frags = shinfo->nr_frags;
868 for (i = 0; i < nr_frags; i++) {
869 skb_frag_t *frag = shinfo->frags + i;
870 netif_tx_request_t *txp;
871 unsigned long pending_idx;
873 pending_idx = (unsigned long)frag->page;
874 txp = &pending_tx_info[pending_idx].req;
875 frag->page = virt_to_page(MMAP_VADDR(pending_idx));
876 frag->size = txp->size;
877 frag->page_offset = txp->offset;
879 skb->len += txp->size;
880 skb->data_len += txp->size;
881 skb->truesize += txp->size;
885 int netbk_get_extras(netif_t *netif, struct netif_extra_info *extras,
888 struct netif_extra_info *extra;
889 RING_IDX cons = netif->tx.req_cons;
892 if (unlikely(work_to_do-- <= 0)) {
893 DPRINTK("Missing extra info\n");
897 extra = (struct netif_extra_info *)
898 RING_GET_REQUEST(&netif->tx, cons);
899 if (unlikely(!extra->type ||
900 extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
901 netif->tx.req_cons = ++cons;
902 DPRINTK("Invalid extra type: %d\n", extra->type);
906 memcpy(&extras[extra->type - 1], extra, sizeof(*extra));
907 netif->tx.req_cons = ++cons;
908 } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE);
913 static int netbk_set_skb_gso(struct sk_buff *skb, struct netif_extra_info *gso)
915 if (!gso->u.gso.size) {
916 DPRINTK("GSO size must not be zero.\n");
920 /* Currently only TCPv4 S.O. is supported. */
921 if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
922 DPRINTK("Bad GSO type %d.\n", gso->u.gso.type);
926 skb_shinfo(skb)->gso_size = gso->u.gso.size;
927 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
929 /* Header must be checked, and gso_segs computed. */
930 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
931 skb_shinfo(skb)->gso_segs = 0;
936 /* Called after netfront has transmitted */
937 static void net_tx_action(unsigned long unused)
939 struct list_head *ent;
942 netif_tx_request_t txreq;
943 struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
946 gnttab_map_grant_ref_t *mop;
947 unsigned int data_len;
950 if (dealloc_cons != dealloc_prod)
951 net_tx_action_dealloc();
954 while (((NR_PENDING_REQS + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
955 !list_empty(&net_schedule_list)) {
956 /* Get a netif from the list with work to do. */
957 ent = net_schedule_list.next;
958 netif = list_entry(ent, netif_t, list);
960 remove_from_net_schedule_list(netif);
962 RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do);
968 i = netif->tx.req_cons;
969 rmb(); /* Ensure that we see the request before we copy it. */
970 memcpy(&txreq, RING_GET_REQUEST(&netif->tx, i), sizeof(txreq));
971 /* Credit-based scheduling. */
972 if (txreq.size > netif->remaining_credit) {
973 unsigned long now = jiffies;
974 unsigned long next_credit =
975 netif->credit_timeout.expires +
976 msecs_to_jiffies(netif->credit_usec / 1000);
978 /* Timer could already be pending in rare cases. */
979 if (timer_pending(&netif->credit_timeout))
982 /* Passed the point where we can replenish credit? */
983 if (time_after_eq(now, next_credit)) {
984 netif->credit_timeout.expires = now;
985 netif->remaining_credit = netif->credit_bytes;
988 /* Still too big to send right now? Set a callback. */
989 if (txreq.size > netif->remaining_credit) {
990 netif->remaining_credit = 0;
991 netif->credit_timeout.data =
992 (unsigned long)netif;
993 netif->credit_timeout.function =
995 __mod_timer(&netif->credit_timeout,
1000 netif->remaining_credit -= txreq.size;
1003 netif->tx.req_cons = ++i;
1005 memset(extras, 0, sizeof(extras));
1006 if (txreq.flags & NETTXF_extra_info) {
1007 work_to_do = netbk_get_extras(netif, extras,
1009 i = netif->tx.req_cons;
1010 if (unlikely(work_to_do < 0)) {
1011 netbk_tx_err(netif, &txreq, i);
1016 ret = netbk_count_requests(netif, &txreq, work_to_do);
1017 if (unlikely(ret < 0)) {
1018 netbk_tx_err(netif, &txreq, i - ret);
1023 if (unlikely(ret > MAX_SKB_FRAGS)) {
1024 DPRINTK("Too many frags\n");
1025 netbk_tx_err(netif, &txreq, i);
1029 if (unlikely(txreq.size < ETH_HLEN)) {
1030 DPRINTK("Bad packet size: %d\n", txreq.size);
1031 netbk_tx_err(netif, &txreq, i);
1035 /* No crossing a page as the payload mustn't fragment. */
1036 if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {
1037 DPRINTK("txreq.offset: %x, size: %u, end: %lu\n",
1038 txreq.offset, txreq.size,
1039 (txreq.offset &~PAGE_MASK) + txreq.size);
1040 netbk_tx_err(netif, &txreq, i);
1044 pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
1046 data_len = (txreq.size > PKT_PROT_LEN &&
1047 ret < MAX_SKB_FRAGS) ?
1048 PKT_PROT_LEN : txreq.size;
1050 skb = alloc_skb(data_len+16, GFP_ATOMIC);
1051 if (unlikely(skb == NULL)) {
1052 DPRINTK("Can't allocate a skb in start_xmit.\n");
1053 netbk_tx_err(netif, &txreq, i);
1057 /* Packets passed to netif_rx() must have some headroom. */
1058 skb_reserve(skb, 16);
1060 if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
1061 struct netif_extra_info *gso;
1062 gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
1064 if (netbk_set_skb_gso(skb, gso)) {
1066 netbk_tx_err(netif, &txreq, i);
1071 gnttab_set_map_op(mop, MMAP_VADDR(pending_idx),
1072 GNTMAP_host_map | GNTMAP_readonly,
1073 txreq.gref, netif->domid);
1076 memcpy(&pending_tx_info[pending_idx].req,
1077 &txreq, sizeof(txreq));
1078 pending_tx_info[pending_idx].netif = netif;
1079 *((u16 *)skb->data) = pending_idx;
1081 __skb_put(skb, data_len);
1083 skb_shinfo(skb)->nr_frags = ret;
1084 if (data_len < txreq.size) {
1085 skb_shinfo(skb)->nr_frags++;
1086 skb_shinfo(skb)->frags[0].page =
1087 (void *)(unsigned long)pending_idx;
1090 __skb_queue_tail(&tx_queue, skb);
1094 mop = netbk_get_requests(netif, skb, mop);
1096 netif->tx.req_cons = i;
1097 netif_schedule_work(netif);
1099 if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops))
1103 if (mop == tx_map_ops)
1106 ret = HYPERVISOR_grant_table_op(
1107 GNTTABOP_map_grant_ref, tx_map_ops, mop - tx_map_ops);
1111 while ((skb = __skb_dequeue(&tx_queue)) != NULL) {
1112 netif_tx_request_t *txp;
1114 pending_idx = *((u16 *)skb->data);
1115 netif = pending_tx_info[pending_idx].netif;
1116 txp = &pending_tx_info[pending_idx].req;
1118 /* Check the remap error code. */
1119 if (unlikely(netbk_tx_check_mop(skb, &mop))) {
1120 printk(KERN_ALERT "#### netback grant fails\n");
1121 skb_shinfo(skb)->nr_frags = 0;
1126 data_len = skb->len;
1128 (void *)(MMAP_VADDR(pending_idx)|txp->offset),
1130 if (data_len < txp->size) {
1131 /* Append the packet payload as a fragment. */
1132 txp->offset += data_len;
1133 txp->size -= data_len;
1135 /* Schedule a response immediately. */
1136 netif_idx_release(pending_idx);
1140 * Old frontends do not assert data_validated but we
1141 * can infer it from csum_blank so test both flags.
1143 if (txp->flags & (NETTXF_data_validated|NETTXF_csum_blank)) {
1144 skb->ip_summed = CHECKSUM_UNNECESSARY;
1145 skb->proto_data_valid = 1;
1147 skb->ip_summed = CHECKSUM_NONE;
1148 skb->proto_data_valid = 0;
1150 skb->proto_csum_blank = !!(txp->flags & NETTXF_csum_blank);
1152 netbk_fill_frags(skb);
1154 skb->dev = netif->dev;
1155 skb->protocol = eth_type_trans(skb, skb->dev);
1157 netif->stats.rx_bytes += skb->len;
1158 netif->stats.rx_packets++;
1161 netif->dev->last_rx = jiffies;
1165 static void netif_idx_release(u16 pending_idx)
1167 static DEFINE_SPINLOCK(_lock);
1168 unsigned long flags;
1170 spin_lock_irqsave(&_lock, flags);
1171 dealloc_ring[MASK_PEND_IDX(dealloc_prod)] = pending_idx;
1172 /* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */
1175 spin_unlock_irqrestore(&_lock, flags);
1177 tasklet_schedule(&net_tx_tasklet);
1180 static void netif_page_release(struct page *page)
1182 u16 pending_idx = page - virt_to_page(mmap_vstart);
1184 /* Ready for next use. */
1185 init_page_count(page);
1187 netif_idx_release(pending_idx);
1190 irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
1192 netif_t *netif = dev_id;
1194 add_to_net_schedule_list_tail(netif);
1195 maybe_schedule_tx_action();
1197 if (netif_queue_stopped(netif->dev) && !netbk_queue_full(netif))
1198 netif_wake_queue(netif->dev);
1203 static void make_tx_response(netif_t *netif,
1204 netif_tx_request_t *txp,
1207 RING_IDX i = netif->tx.rsp_prod_pvt;
1208 netif_tx_response_t *resp;
1211 resp = RING_GET_RESPONSE(&netif->tx, i);
1215 if (txp->flags & NETTXF_extra_info)
1216 RING_GET_RESPONSE(&netif->tx, ++i)->status = NETIF_RSP_NULL;
1218 netif->tx.rsp_prod_pvt = ++i;
1219 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify);
1221 notify_remote_via_irq(netif->irq);
1223 #ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
1224 if (i == netif->tx.req_cons) {
1226 RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
1228 add_to_net_schedule_list_tail(netif);
1233 static netif_rx_response_t *make_rx_response(netif_t *netif,
1240 RING_IDX i = netif->rx.rsp_prod_pvt;
1241 netif_rx_response_t *resp;
1243 resp = RING_GET_RESPONSE(&netif->rx, i);
1244 resp->offset = offset;
1245 resp->flags = flags;
1247 resp->status = (s16)size;
1249 resp->status = (s16)st;
1251 netif->rx.rsp_prod_pvt = ++i;
1256 #ifdef NETBE_DEBUG_INTERRUPT
1257 static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
1259 struct list_head *ent;
1263 printk(KERN_ALERT "netif_schedule_list:\n");
1264 spin_lock_irq(&net_schedule_list_lock);
1266 list_for_each (ent, &net_schedule_list) {
1267 netif = list_entry(ent, netif_t, list);
1268 printk(KERN_ALERT " %d: private(rx_req_cons=%08x "
1269 "rx_resp_prod=%08x\n",
1270 i, netif->rx.req_cons, netif->rx.rsp_prod_pvt);
1271 printk(KERN_ALERT " tx_req_cons=%08x tx_resp_prod=%08x)\n",
1272 netif->tx.req_cons, netif->tx.rsp_prod_pvt);
1273 printk(KERN_ALERT " shared(rx_req_prod=%08x "
1274 "rx_resp_prod=%08x\n",
1275 netif->rx.sring->req_prod, netif->rx.sring->rsp_prod);
1276 printk(KERN_ALERT " rx_event=%08x tx_req_prod=%08x\n",
1277 netif->rx.sring->rsp_event, netif->tx.sring->req_prod);
1278 printk(KERN_ALERT " tx_resp_prod=%08x, tx_event=%08x)\n",
1279 netif->tx.sring->rsp_prod, netif->tx.sring->rsp_event);
1283 spin_unlock_irq(&net_schedule_list_lock);
1284 printk(KERN_ALERT " ** End of netif_schedule_list **\n");
1290 static int __init netback_init(void)
1295 if (!is_running_on_xen())
1298 /* We can increase reservation by this much in net_rx_action(). */
1299 balloon_update_driver_allowance(NET_RX_RING_SIZE);
1301 skb_queue_head_init(&rx_queue);
1302 skb_queue_head_init(&tx_queue);
1304 init_timer(&net_timer);
1306 net_timer.function = net_alarm;
1308 page = balloon_alloc_empty_page_range(MAX_PENDING_REQS);
1312 mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
1314 for (i = 0; i < MAX_PENDING_REQS; i++) {
1315 page = virt_to_page(MMAP_VADDR(i));
1316 init_page_count(page);
1317 SetPageForeign(page, netif_page_release);
1321 pending_prod = MAX_PENDING_REQS;
1322 for (i = 0; i < MAX_PENDING_REQS; i++)
1323 pending_ring[i] = i;
1325 spin_lock_init(&net_schedule_list_lock);
1326 INIT_LIST_HEAD(&net_schedule_list);
1328 netif_xenbus_init();
1330 #ifdef NETBE_DEBUG_INTERRUPT
1331 (void)bind_virq_to_irqhandler(
1343 module_init(netback_init);
1345 MODULE_LICENSE("Dual BSD/GPL");