/****************************************************************************** * drivers/xen/netback/netback.c * * Back-end of the driver for virtual network devices. This portion of the * driver exports a 'unified' network-device interface that can be accessed * by any operating system that implements a compatible front end. A * reference front-end implementation can be found in: * drivers/xen/netfront/netfront.c * * Copyright (c) 2002-2005, K A Fraser * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License version 2 * as published by the Free Software Foundation; or, when distributed * separately from the Linux kernel or incorporated into other * software packages, subject to the following license: * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this source file (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, modify, * merge, publish, distribute, sublicense, and/or sell copies of the Software, * and to permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #include "common.h" #include #include /*#define NETBE_DEBUG_INTERRUPT*/ struct netbk_rx_meta { skb_frag_t frag; int id; }; static void netif_idx_release(u16 pending_idx); static void netif_page_release(struct page *page); static void make_tx_response(netif_t *netif, netif_tx_request_t *txp, s8 st); static netif_rx_response_t *make_rx_response(netif_t *netif, u16 id, s8 st, u16 offset, u16 size, u16 flags); static void net_tx_action(unsigned long unused); static DECLARE_TASKLET(net_tx_tasklet, net_tx_action, 0); static void net_rx_action(unsigned long unused); static DECLARE_TASKLET(net_rx_tasklet, net_rx_action, 0); static struct timer_list net_timer; #define MAX_PENDING_REQS 256 static struct sk_buff_head rx_queue; static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1]; static mmu_update_t rx_mmu[NET_RX_RING_SIZE]; static gnttab_transfer_t grant_rx_op[NET_RX_RING_SIZE]; static unsigned char rx_notify[NR_IRQS]; static unsigned long mmap_vstart; #define MMAP_VADDR(_req) (mmap_vstart + ((_req) * PAGE_SIZE)) #define PKT_PROT_LEN 64 static struct { netif_tx_request_t req; netif_t *netif; } pending_tx_info[MAX_PENDING_REQS]; static u16 pending_ring[MAX_PENDING_REQS]; typedef unsigned int PEND_RING_IDX; #define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1)) static PEND_RING_IDX pending_prod, pending_cons; #define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons) /* Freed TX SKBs get batched on this ring before return to pending_ring. */ static u16 dealloc_ring[MAX_PENDING_REQS]; static PEND_RING_IDX dealloc_prod, dealloc_cons; static struct sk_buff_head tx_queue; static grant_handle_t grant_tx_handle[MAX_PENDING_REQS]; static gnttab_unmap_grant_ref_t tx_unmap_ops[MAX_PENDING_REQS]; static gnttab_map_grant_ref_t tx_map_ops[MAX_PENDING_REQS]; static struct list_head net_schedule_list; static spinlock_t net_schedule_list_lock; #define MAX_MFN_ALLOC 64 static unsigned long mfn_list[MAX_MFN_ALLOC]; static unsigned int alloc_index = 0; static inline unsigned long alloc_mfn(void) { return mfn_list[--alloc_index]; } static int check_mfn(int nr) { struct xen_memory_reservation reservation = { .extent_order = 0, .domid = DOMID_SELF }; if (likely(alloc_index >= nr)) return 0; set_xen_guest_handle(reservation.extent_start, mfn_list + alloc_index); reservation.nr_extents = MAX_MFN_ALLOC - alloc_index; alloc_index += HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation); return alloc_index >= nr ? 0 : -ENOMEM; } static inline void maybe_schedule_tx_action(void) { smp_mb(); if ((NR_PENDING_REQS < (MAX_PENDING_REQS/2)) && !list_empty(&net_schedule_list)) tasklet_schedule(&net_tx_tasklet); } /* * A gross way of confirming the origin of an skb data page. The slab * allocator abuses a field in the page struct to cache the kmem_cache_t ptr. */ static inline int is_xen_skb(struct sk_buff *skb) { extern kmem_cache_t *skbuff_cachep; kmem_cache_t *cp = (kmem_cache_t *)virt_to_page(skb->head)->lru.next; return (cp == skbuff_cachep); } static struct sk_buff *netbk_copy_skb(struct sk_buff *skb) { struct skb_shared_info *ninfo; struct sk_buff *nskb; unsigned long offset; int ret; int len; int headlen; nskb = alloc_skb(SKB_MAX_HEAD(0), GFP_ATOMIC); if (unlikely(!nskb)) goto err; skb_reserve(nskb, 16); headlen = nskb->end - nskb->data; if (headlen > skb_headlen(skb)) headlen = skb_headlen(skb); ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen); BUG_ON(ret); ninfo = skb_shinfo(nskb); ninfo->gso_size = skb_shinfo(skb)->gso_size; ninfo->gso_type = skb_shinfo(skb)->gso_type; offset = headlen; len = skb->len - headlen; nskb->len = skb->len; nskb->data_len = len; nskb->truesize += len; while (len) { struct page *page; int copy; int zero; if (unlikely(ninfo->nr_frags >= MAX_SKB_FRAGS)) { dump_stack(); goto err_free; } copy = len >= PAGE_SIZE ? PAGE_SIZE : len; zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO; page = alloc_page(GFP_ATOMIC | zero); if (unlikely(!page)) goto err_free; ret = skb_copy_bits(skb, offset, page_address(page), copy); BUG_ON(ret); ninfo->frags[ninfo->nr_frags].page = page; ninfo->frags[ninfo->nr_frags].page_offset = 0; ninfo->frags[ninfo->nr_frags].size = copy; ninfo->nr_frags++; offset += copy; len -= copy; } offset = nskb->data - skb->data; nskb->h.raw = skb->h.raw + offset; nskb->nh.raw = skb->nh.raw + offset; nskb->mac.raw = skb->mac.raw + offset; return nskb; err_free: kfree_skb(nskb); err: return NULL; } static inline int netbk_max_required_rx_slots(netif_t *netif) { if (netif->features & (NETIF_F_SG|NETIF_F_TSO)) return MAX_SKB_FRAGS + 2; /* header + extra_info + frags */ return 1; /* all in one */ } static inline int netbk_queue_full(netif_t *netif) { RING_IDX peek = netif->rx_req_cons_peek; RING_IDX needed = netbk_max_required_rx_slots(netif); return ((netif->rx.sring->req_prod - peek) < needed) || ((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) < needed); } int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev) { netif_t *netif = netdev_priv(dev); BUG_ON(skb->dev != dev); /* Drop the packet if the target domain has no receive buffers. */ if (unlikely(!netif_running(dev) || !netif_carrier_ok(dev))) goto drop; if (unlikely(netbk_queue_full(netif))) { /* Not a BUG_ON() -- misbehaving netfront can trigger this. */ if (netbk_can_queue(dev)) DPRINTK("Queue full but not stopped!\n"); goto drop; } /* * We do not copy the packet unless: * 1. The data is shared; or * 2. The data is not allocated from our special cache. * 3. The data is fragmented. */ if (skb_cloned(skb) || skb_is_nonlinear(skb) || !is_xen_skb(skb)) { struct sk_buff *nskb = netbk_copy_skb(skb); if ( unlikely(nskb == NULL) ) goto drop; /* Copy only the header fields we use in this driver. */ nskb->dev = skb->dev; nskb->ip_summed = skb->ip_summed; nskb->proto_data_valid = skb->proto_data_valid; dev_kfree_skb(skb); skb = nskb; } netif->rx_req_cons_peek += skb_shinfo(skb)->nr_frags + 1 + !!skb_shinfo(skb)->gso_size; netif_get(netif); if (netbk_can_queue(dev) && netbk_queue_full(netif)) { netif->rx.sring->req_event = netif->rx_req_cons_peek + netbk_max_required_rx_slots(netif); mb(); /* request notification /then/ check & stop the queue */ if (netbk_queue_full(netif)) netif_stop_queue(dev); } skb_queue_tail(&rx_queue, skb); tasklet_schedule(&net_rx_tasklet); return 0; drop: netif->stats.tx_dropped++; dev_kfree_skb(skb); return 0; } #if 0 static void xen_network_done_notify(void) { static struct net_device *eth0_dev = NULL; if (unlikely(eth0_dev == NULL)) eth0_dev = __dev_get_by_name("eth0"); netif_rx_schedule(eth0_dev); } /* * Add following to poll() function in NAPI driver (Tigon3 is example): * if ( xen_network_done() ) * tg3_enable_ints(tp); */ int xen_network_done(void) { return skb_queue_empty(&rx_queue); } #endif static u16 netbk_gop_frag(netif_t *netif, struct page *page, int count, int i) { multicall_entry_t *mcl = rx_mcl + count; mmu_update_t *mmu = rx_mmu + count; gnttab_transfer_t *gop = grant_rx_op + count; netif_rx_request_t *req; unsigned long old_mfn, new_mfn; old_mfn = virt_to_mfn(page_address(page)); if (!xen_feature(XENFEAT_auto_translated_physmap)) { new_mfn = alloc_mfn(); /* * Set the new P2M table entry before reassigning * the old data page. Heed the comment in * pgtable-2level.h:pte_page(). :-) */ set_phys_to_machine(page_to_pfn(page), new_mfn); MULTI_update_va_mapping(mcl, (unsigned long)page_address(page), pfn_pte_ma(new_mfn, PAGE_KERNEL), 0); mmu->ptr = ((maddr_t)new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE; mmu->val = page_to_pfn(page); } req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons + i); gop->mfn = old_mfn; gop->domid = netif->domid; gop->ref = req->gref; return req->id; } static void netbk_gop_skb(struct sk_buff *skb, struct netbk_rx_meta *meta, int count) { netif_t *netif = netdev_priv(skb->dev); int nr_frags = skb_shinfo(skb)->nr_frags; int i; int extra; meta[count].frag.page_offset = skb_shinfo(skb)->gso_type; meta[count].frag.size = skb_shinfo(skb)->gso_size; extra = !!meta[count].frag.size + 1; for (i = 0; i < nr_frags; i++) { meta[++count].frag = skb_shinfo(skb)->frags[i]; meta[count].id = netbk_gop_frag(netif, meta[count].frag.page, count, i + extra); } /* * This must occur at the end to ensure that we don't trash * skb_shinfo until we're done. */ meta[count - nr_frags].id = netbk_gop_frag(netif, virt_to_page(skb->data), count - nr_frags, 0); netif->rx.req_cons += nr_frags + extra; } static inline void netbk_free_pages(int nr_frags, struct netbk_rx_meta *meta) { int i; for (i = 0; i < nr_frags; i++) put_page(meta[i].frag.page); } static int netbk_check_gop(int nr_frags, domid_t domid, int count) { multicall_entry_t *mcl = rx_mcl + count; gnttab_transfer_t *gop = grant_rx_op + count; int status = NETIF_RSP_OKAY; int i; for (i = 0; i <= nr_frags; i++) { if (!xen_feature(XENFEAT_auto_translated_physmap)) { /* The update_va_mapping() must not fail. */ BUG_ON(mcl->result != 0); mcl++; } /* Check the reassignment error code. */ if (gop->status != 0) { DPRINTK("Bad status %d from grant transfer to DOM%u\n", gop->status, domid); /* * Page no longer belongs to us unless GNTST_bad_page, * but that should be a fatal error anyway. */ BUG_ON(gop->status == GNTST_bad_page); status = NETIF_RSP_ERROR; } gop++; } return status; } static void netbk_add_frag_responses(netif_t *netif, int status, struct netbk_rx_meta *meta, int nr_frags) { int i; for (i = 0; i < nr_frags; i++) { int id = meta[i].id; int flags = (i == nr_frags - 1) ? 0 : NETRXF_more_data; make_rx_response(netif, id, status, meta[i].frag.page_offset, meta[i].frag.size, flags); } } static void net_rx_action(unsigned long unused) { netif_t *netif = NULL; s8 status; u16 id, irq, flags; netif_rx_response_t *resp; struct netif_extra_info *extra; multicall_entry_t *mcl; struct sk_buff_head rxq; struct sk_buff *skb; int notify_nr = 0; int ret; int nr_frags; int count; /* * Putting hundreds of bytes on the stack is considered rude. * Static works because a tasklet can only be on one CPU at any time. */ static u16 notify_list[NET_RX_RING_SIZE]; static struct netbk_rx_meta meta[NET_RX_RING_SIZE]; skb_queue_head_init(&rxq); count = 0; while ((skb = skb_dequeue(&rx_queue)) != NULL) { nr_frags = skb_shinfo(skb)->nr_frags; *(int *)skb->cb = nr_frags; if (!xen_feature(XENFEAT_auto_translated_physmap) && check_mfn(nr_frags + 1)) { /* Memory squeeze? Back off for an arbitrary while. */ if ( net_ratelimit() ) WPRINTK("Memory squeeze in netback " "driver.\n"); mod_timer(&net_timer, jiffies + HZ); skb_queue_head(&rx_queue, skb); break; } netbk_gop_skb(skb, meta, count); count += nr_frags + 1; __skb_queue_tail(&rxq, skb); /* Filled the batch queue? */ if (count + MAX_SKB_FRAGS >= NET_RX_RING_SIZE) break; } if (!count) return; if (!xen_feature(XENFEAT_auto_translated_physmap)) { mcl = rx_mcl + count; mcl[-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL; mcl->op = __HYPERVISOR_mmu_update; mcl->args[0] = (unsigned long)rx_mmu; mcl->args[1] = count; mcl->args[2] = 0; mcl->args[3] = DOMID_SELF; ret = HYPERVISOR_multicall(rx_mcl, count + 1); BUG_ON(ret != 0); } ret = HYPERVISOR_grant_table_op(GNTTABOP_transfer, grant_rx_op, count); BUG_ON(ret != 0); count = 0; while ((skb = __skb_dequeue(&rxq)) != NULL) { nr_frags = *(int *)skb->cb; atomic_set(&(skb_shinfo(skb)->dataref), 1); skb_shinfo(skb)->nr_frags = 0; skb_shinfo(skb)->frag_list = NULL; netif = netdev_priv(skb->dev); netif->stats.tx_bytes += skb->len; netif->stats.tx_packets++; netbk_free_pages(nr_frags, meta + count + 1); status = netbk_check_gop(nr_frags, netif->domid, count); id = meta[count].id; flags = nr_frags ? NETRXF_more_data : 0; if (skb->ip_summed == CHECKSUM_HW) /* local packet? */ flags |= NETRXF_csum_blank | NETRXF_data_validated; else if (skb->proto_data_valid) /* remote but checksummed? */ flags |= NETRXF_data_validated; resp = make_rx_response(netif, id, status, offset_in_page(skb->data), skb_headlen(skb), flags); extra = NULL; if (meta[count].frag.size) { struct netif_extra_info *gso = (struct netif_extra_info *) RING_GET_RESPONSE(&netif->rx, netif->rx.rsp_prod_pvt++); if (extra) extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE; else resp->flags |= NETRXF_extra_info; gso->u.gso.size = meta[count].frag.size; gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; gso->u.gso.pad = 0; gso->u.gso.features = 0; gso->type = XEN_NETIF_EXTRA_TYPE_GSO; gso->flags = 0; extra = gso; } netbk_add_frag_responses(netif, status, meta + count + 1, nr_frags); RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret); irq = netif->irq; if (ret && !rx_notify[irq]) { rx_notify[irq] = 1; notify_list[notify_nr++] = irq; } if (netif_queue_stopped(netif->dev) && !netbk_queue_full(netif)) netif_wake_queue(netif->dev); netif_put(netif); dev_kfree_skb(skb); count += nr_frags + 1; } while (notify_nr != 0) { irq = notify_list[--notify_nr]; rx_notify[irq] = 0; notify_remote_via_irq(irq); } /* More work to do? */ if (!skb_queue_empty(&rx_queue) && !timer_pending(&net_timer)) tasklet_schedule(&net_rx_tasklet); #if 0 else xen_network_done_notify(); #endif } static void net_alarm(unsigned long unused) { tasklet_schedule(&net_rx_tasklet); } struct net_device_stats *netif_be_get_stats(struct net_device *dev) { netif_t *netif = netdev_priv(dev); return &netif->stats; } static int __on_net_schedule_list(netif_t *netif) { return netif->list.next != NULL; } static void remove_from_net_schedule_list(netif_t *netif) { spin_lock_irq(&net_schedule_list_lock); if (likely(__on_net_schedule_list(netif))) { list_del(&netif->list); netif->list.next = NULL; netif_put(netif); } spin_unlock_irq(&net_schedule_list_lock); } static void add_to_net_schedule_list_tail(netif_t *netif) { if (__on_net_schedule_list(netif)) return; spin_lock_irq(&net_schedule_list_lock); if (!__on_net_schedule_list(netif) && likely(netif_running(netif->dev) && netif_carrier_ok(netif->dev))) { list_add_tail(&netif->list, &net_schedule_list); netif_get(netif); } spin_unlock_irq(&net_schedule_list_lock); } /* * Note on CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER: * If this driver is pipelining transmit requests then we can be very * aggressive in avoiding new-packet notifications -- frontend only needs to * send a notification if there are no outstanding unreceived responses. * If we may be buffer transmit buffers for any reason then we must be rather * more conservative and treat this as the final check for pending work. */ void netif_schedule_work(netif_t *netif) { int more_to_do; #ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER more_to_do = RING_HAS_UNCONSUMED_REQUESTS(&netif->tx); #else RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do); #endif if (more_to_do) { add_to_net_schedule_list_tail(netif); maybe_schedule_tx_action(); } } void netif_deschedule_work(netif_t *netif) { remove_from_net_schedule_list(netif); } static void tx_credit_callback(unsigned long data) { netif_t *netif = (netif_t *)data; netif->remaining_credit = netif->credit_bytes; netif_schedule_work(netif); } inline static void net_tx_action_dealloc(void) { gnttab_unmap_grant_ref_t *gop; u16 pending_idx; PEND_RING_IDX dc, dp; netif_t *netif; int ret; dc = dealloc_cons; dp = dealloc_prod; /* Ensure we see all indexes enqueued by netif_idx_release(). */ smp_rmb(); /* * Free up any grants we have finished using */ gop = tx_unmap_ops; while (dc != dp) { pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)]; gnttab_set_unmap_op(gop, MMAP_VADDR(pending_idx), GNTMAP_host_map, grant_tx_handle[pending_idx]); gop++; } ret = HYPERVISOR_grant_table_op( GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops); BUG_ON(ret); while (dealloc_cons != dp) { pending_idx = dealloc_ring[MASK_PEND_IDX(dealloc_cons++)]; netif = pending_tx_info[pending_idx].netif; make_tx_response(netif, &pending_tx_info[pending_idx].req, NETIF_RSP_OKAY); pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx; netif_put(netif); } } static void netbk_tx_err(netif_t *netif, netif_tx_request_t *txp, RING_IDX end) { RING_IDX cons = netif->tx.req_cons; do { make_tx_response(netif, txp, NETIF_RSP_ERROR); if (cons >= end) break; txp = RING_GET_REQUEST(&netif->tx, cons++); } while (1); netif->tx.req_cons = cons; netif_schedule_work(netif); netif_put(netif); } static int netbk_count_requests(netif_t *netif, netif_tx_request_t *txp, int work_to_do) { netif_tx_request_t *first = txp; RING_IDX cons = netif->tx.req_cons; int frags = 0; while (txp->flags & NETTXF_more_data) { if (frags >= work_to_do) { DPRINTK("Need more frags\n"); return -frags; } txp = RING_GET_REQUEST(&netif->tx, cons + frags); if (txp->size > first->size) { DPRINTK("Frags galore\n"); return -frags; } first->size -= txp->size; frags++; if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) { DPRINTK("txp->offset: %x, size: %u\n", txp->offset, txp->size); return -frags; } } return frags; } static gnttab_map_grant_ref_t *netbk_get_requests(netif_t *netif, struct sk_buff *skb, gnttab_map_grant_ref_t *mop) { struct skb_shared_info *shinfo = skb_shinfo(skb); skb_frag_t *frags = shinfo->frags; netif_tx_request_t *txp; unsigned long pending_idx = *((u16 *)skb->data); RING_IDX cons = netif->tx.req_cons; int i, start; /* Skip first skb fragment if it is on same page as header fragment. */ start = ((unsigned long)shinfo->frags[0].page == pending_idx); for (i = start; i < shinfo->nr_frags; i++) { txp = RING_GET_REQUEST(&netif->tx, cons++); pending_idx = pending_ring[MASK_PEND_IDX(pending_cons++)]; gnttab_set_map_op(mop++, MMAP_VADDR(pending_idx), GNTMAP_host_map | GNTMAP_readonly, txp->gref, netif->domid); memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp)); netif_get(netif); pending_tx_info[pending_idx].netif = netif; frags[i].page = (void *)pending_idx; } return mop; } static int netbk_tx_check_mop(struct sk_buff *skb, gnttab_map_grant_ref_t **mopp) { gnttab_map_grant_ref_t *mop = *mopp; int pending_idx = *((u16 *)skb->data); netif_t *netif = pending_tx_info[pending_idx].netif; netif_tx_request_t *txp; struct skb_shared_info *shinfo = skb_shinfo(skb); int nr_frags = shinfo->nr_frags; int i, err, start; /* Check status of header. */ err = mop->status; if (unlikely(err)) { txp = &pending_tx_info[pending_idx].req; make_tx_response(netif, txp, NETIF_RSP_ERROR); pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx; netif_put(netif); } else { set_phys_to_machine( __pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT, FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT)); grant_tx_handle[pending_idx] = mop->handle; } /* Skip first skb fragment if it is on same page as header fragment. */ start = ((unsigned long)shinfo->frags[0].page == pending_idx); for (i = start; i < nr_frags; i++) { int j, newerr; pending_idx = (unsigned long)shinfo->frags[i].page; /* Check error status: if okay then remember grant handle. */ newerr = (++mop)->status; if (likely(!newerr)) { set_phys_to_machine( __pa(MMAP_VADDR(pending_idx))>>PAGE_SHIFT, FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT)); grant_tx_handle[pending_idx] = mop->handle; /* Had a previous error? Invalidate this fragment. */ if (unlikely(err)) netif_idx_release(pending_idx); continue; } /* Error on this fragment: respond to client with an error. */ txp = &pending_tx_info[pending_idx].req; make_tx_response(netif, txp, NETIF_RSP_ERROR); pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx; netif_put(netif); /* Not the first error? Preceding frags already invalidated. */ if (err) continue; /* First error: invalidate header and preceding fragments. */ pending_idx = *((u16 *)skb->data); netif_idx_release(pending_idx); for (j = start; j < i; j++) { pending_idx = (unsigned long)shinfo->frags[i].page; netif_idx_release(pending_idx); } /* Remember the error: invalidate all subsequent fragments. */ err = newerr; } *mopp = mop + 1; return err; } static void netbk_fill_frags(struct sk_buff *skb) { struct skb_shared_info *shinfo = skb_shinfo(skb); int nr_frags = shinfo->nr_frags; int i; for (i = 0; i < nr_frags; i++) { skb_frag_t *frag = shinfo->frags + i; netif_tx_request_t *txp; unsigned long pending_idx; pending_idx = (unsigned long)frag->page; txp = &pending_tx_info[pending_idx].req; frag->page = virt_to_page(MMAP_VADDR(pending_idx)); frag->size = txp->size; frag->page_offset = txp->offset; skb->len += txp->size; skb->data_len += txp->size; skb->truesize += txp->size; } } int netbk_get_extras(netif_t *netif, struct netif_extra_info *extras, int work_to_do) { struct netif_extra_info *extra; RING_IDX cons = netif->tx.req_cons; do { if (unlikely(work_to_do-- <= 0)) { DPRINTK("Missing extra info\n"); return -EBADR; } extra = (struct netif_extra_info *) RING_GET_REQUEST(&netif->tx, cons); if (unlikely(!extra->type || extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) { netif->tx.req_cons = ++cons; DPRINTK("Invalid extra type: %d\n", extra->type); return -EINVAL; } memcpy(&extras[extra->type - 1], extra, sizeof(*extra)); netif->tx.req_cons = ++cons; } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE); return work_to_do; } static int netbk_set_skb_gso(struct sk_buff *skb, struct netif_extra_info *gso) { if (!gso->u.gso.size) { DPRINTK("GSO size must not be zero.\n"); return -EINVAL; } /* Currently only TCPv4 S.O. is supported. */ if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) { DPRINTK("Bad GSO type %d.\n", gso->u.gso.type); return -EINVAL; } skb_shinfo(skb)->gso_size = gso->u.gso.size; skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; /* Header must be checked, and gso_segs computed. */ skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; skb_shinfo(skb)->gso_segs = 0; return 0; } /* Called after netfront has transmitted */ static void net_tx_action(unsigned long unused) { struct list_head *ent; struct sk_buff *skb; netif_t *netif; netif_tx_request_t txreq; struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1]; u16 pending_idx; RING_IDX i; gnttab_map_grant_ref_t *mop; unsigned int data_len; int ret, work_to_do; if (dealloc_cons != dealloc_prod) net_tx_action_dealloc(); mop = tx_map_ops; while (((NR_PENDING_REQS + MAX_SKB_FRAGS) < MAX_PENDING_REQS) && !list_empty(&net_schedule_list)) { /* Get a netif from the list with work to do. */ ent = net_schedule_list.next; netif = list_entry(ent, netif_t, list); netif_get(netif); remove_from_net_schedule_list(netif); RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do); if (!work_to_do) { netif_put(netif); continue; } i = netif->tx.req_cons; rmb(); /* Ensure that we see the request before we copy it. */ memcpy(&txreq, RING_GET_REQUEST(&netif->tx, i), sizeof(txreq)); /* Credit-based scheduling. */ if (txreq.size > netif->remaining_credit) { unsigned long now = jiffies; unsigned long next_credit = netif->credit_timeout.expires + msecs_to_jiffies(netif->credit_usec / 1000); /* Timer could already be pending in rare cases. */ if (timer_pending(&netif->credit_timeout)) break; /* Passed the point where we can replenish credit? */ if (time_after_eq(now, next_credit)) { netif->credit_timeout.expires = now; netif->remaining_credit = netif->credit_bytes; } /* Still too big to send right now? Set a callback. */ if (txreq.size > netif->remaining_credit) { netif->remaining_credit = 0; netif->credit_timeout.data = (unsigned long)netif; netif->credit_timeout.function = tx_credit_callback; __mod_timer(&netif->credit_timeout, next_credit); break; } } netif->remaining_credit -= txreq.size; work_to_do--; netif->tx.req_cons = ++i; memset(extras, 0, sizeof(extras)); if (txreq.flags & NETTXF_extra_info) { work_to_do = netbk_get_extras(netif, extras, work_to_do); i = netif->tx.req_cons; if (unlikely(work_to_do < 0)) { netbk_tx_err(netif, &txreq, i); continue; } } ret = netbk_count_requests(netif, &txreq, work_to_do); if (unlikely(ret < 0)) { netbk_tx_err(netif, &txreq, i - ret); continue; } i += ret; if (unlikely(ret > MAX_SKB_FRAGS)) { DPRINTK("Too many frags\n"); netbk_tx_err(netif, &txreq, i); continue; } if (unlikely(txreq.size < ETH_HLEN)) { DPRINTK("Bad packet size: %d\n", txreq.size); netbk_tx_err(netif, &txreq, i); continue; } /* No crossing a page as the payload mustn't fragment. */ if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) { DPRINTK("txreq.offset: %x, size: %u, end: %lu\n", txreq.offset, txreq.size, (txreq.offset &~PAGE_MASK) + txreq.size); netbk_tx_err(netif, &txreq, i); continue; } pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)]; data_len = (txreq.size > PKT_PROT_LEN && ret < MAX_SKB_FRAGS) ? PKT_PROT_LEN : txreq.size; skb = alloc_skb(data_len+16, GFP_ATOMIC); if (unlikely(skb == NULL)) { DPRINTK("Can't allocate a skb in start_xmit.\n"); netbk_tx_err(netif, &txreq, i); break; } /* Packets passed to netif_rx() must have some headroom. */ skb_reserve(skb, 16); if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) { struct netif_extra_info *gso; gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1]; if (netbk_set_skb_gso(skb, gso)) { kfree_skb(skb); netbk_tx_err(netif, &txreq, i); continue; } } gnttab_set_map_op(mop, MMAP_VADDR(pending_idx), GNTMAP_host_map | GNTMAP_readonly, txreq.gref, netif->domid); mop++; memcpy(&pending_tx_info[pending_idx].req, &txreq, sizeof(txreq)); pending_tx_info[pending_idx].netif = netif; *((u16 *)skb->data) = pending_idx; __skb_put(skb, data_len); skb_shinfo(skb)->nr_frags = ret; if (data_len < txreq.size) { skb_shinfo(skb)->nr_frags++; skb_shinfo(skb)->frags[0].page = (void *)(unsigned long)pending_idx; } __skb_queue_tail(&tx_queue, skb); pending_cons++; mop = netbk_get_requests(netif, skb, mop); netif->tx.req_cons = i; netif_schedule_work(netif); if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops)) break; } if (mop == tx_map_ops) return; ret = HYPERVISOR_grant_table_op( GNTTABOP_map_grant_ref, tx_map_ops, mop - tx_map_ops); BUG_ON(ret); mop = tx_map_ops; while ((skb = __skb_dequeue(&tx_queue)) != NULL) { netif_tx_request_t *txp; pending_idx = *((u16 *)skb->data); netif = pending_tx_info[pending_idx].netif; txp = &pending_tx_info[pending_idx].req; /* Check the remap error code. */ if (unlikely(netbk_tx_check_mop(skb, &mop))) { printk(KERN_ALERT "#### netback grant fails\n"); skb_shinfo(skb)->nr_frags = 0; kfree_skb(skb); continue; } data_len = skb->len; memcpy(skb->data, (void *)(MMAP_VADDR(pending_idx)|txp->offset), data_len); if (data_len < txp->size) { /* Append the packet payload as a fragment. */ txp->offset += data_len; txp->size -= data_len; } else { /* Schedule a response immediately. */ netif_idx_release(pending_idx); } /* * Old frontends do not assert data_validated but we * can infer it from csum_blank so test both flags. */ if (txp->flags & (NETTXF_data_validated|NETTXF_csum_blank)) { skb->ip_summed = CHECKSUM_UNNECESSARY; skb->proto_data_valid = 1; } else { skb->ip_summed = CHECKSUM_NONE; skb->proto_data_valid = 0; } skb->proto_csum_blank = !!(txp->flags & NETTXF_csum_blank); netbk_fill_frags(skb); skb->dev = netif->dev; skb->protocol = eth_type_trans(skb, skb->dev); netif->stats.rx_bytes += skb->len; netif->stats.rx_packets++; netif_rx(skb); netif->dev->last_rx = jiffies; } } static void netif_idx_release(u16 pending_idx) { static DEFINE_SPINLOCK(_lock); unsigned long flags; spin_lock_irqsave(&_lock, flags); dealloc_ring[MASK_PEND_IDX(dealloc_prod)] = pending_idx; /* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */ smp_wmb(); dealloc_prod++; spin_unlock_irqrestore(&_lock, flags); tasklet_schedule(&net_tx_tasklet); } static void netif_page_release(struct page *page) { u16 pending_idx = page - virt_to_page(mmap_vstart); /* Ready for next use. */ init_page_count(page); netif_idx_release(pending_idx); } irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs) { netif_t *netif = dev_id; add_to_net_schedule_list_tail(netif); maybe_schedule_tx_action(); if (netif_queue_stopped(netif->dev) && !netbk_queue_full(netif)) netif_wake_queue(netif->dev); return IRQ_HANDLED; } static void make_tx_response(netif_t *netif, netif_tx_request_t *txp, s8 st) { RING_IDX i = netif->tx.rsp_prod_pvt; netif_tx_response_t *resp; int notify; resp = RING_GET_RESPONSE(&netif->tx, i); resp->id = txp->id; resp->status = st; if (txp->flags & NETTXF_extra_info) RING_GET_RESPONSE(&netif->tx, ++i)->status = NETIF_RSP_NULL; netif->tx.rsp_prod_pvt = ++i; RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify); if (notify) notify_remote_via_irq(netif->irq); #ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER if (i == netif->tx.req_cons) { int more_to_do; RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do); if (more_to_do) add_to_net_schedule_list_tail(netif); } #endif } static netif_rx_response_t *make_rx_response(netif_t *netif, u16 id, s8 st, u16 offset, u16 size, u16 flags) { RING_IDX i = netif->rx.rsp_prod_pvt; netif_rx_response_t *resp; resp = RING_GET_RESPONSE(&netif->rx, i); resp->offset = offset; resp->flags = flags; resp->id = id; resp->status = (s16)size; if (st < 0) resp->status = (s16)st; netif->rx.rsp_prod_pvt = ++i; return resp; } #ifdef NETBE_DEBUG_INTERRUPT static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs) { struct list_head *ent; netif_t *netif; int i = 0; printk(KERN_ALERT "netif_schedule_list:\n"); spin_lock_irq(&net_schedule_list_lock); list_for_each (ent, &net_schedule_list) { netif = list_entry(ent, netif_t, list); printk(KERN_ALERT " %d: private(rx_req_cons=%08x " "rx_resp_prod=%08x\n", i, netif->rx.req_cons, netif->rx.rsp_prod_pvt); printk(KERN_ALERT " tx_req_cons=%08x tx_resp_prod=%08x)\n", netif->tx.req_cons, netif->tx.rsp_prod_pvt); printk(KERN_ALERT " shared(rx_req_prod=%08x " "rx_resp_prod=%08x\n", netif->rx.sring->req_prod, netif->rx.sring->rsp_prod); printk(KERN_ALERT " rx_event=%08x tx_req_prod=%08x\n", netif->rx.sring->rsp_event, netif->tx.sring->req_prod); printk(KERN_ALERT " tx_resp_prod=%08x, tx_event=%08x)\n", netif->tx.sring->rsp_prod, netif->tx.sring->rsp_event); i++; } spin_unlock_irq(&net_schedule_list_lock); printk(KERN_ALERT " ** End of netif_schedule_list **\n"); return IRQ_HANDLED; } #endif static int __init netback_init(void) { int i; struct page *page; if (!is_running_on_xen()) return -ENODEV; /* We can increase reservation by this much in net_rx_action(). */ balloon_update_driver_allowance(NET_RX_RING_SIZE); skb_queue_head_init(&rx_queue); skb_queue_head_init(&tx_queue); init_timer(&net_timer); net_timer.data = 0; net_timer.function = net_alarm; page = balloon_alloc_empty_page_range(MAX_PENDING_REQS); if (page == NULL) return -ENOMEM; mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page)); for (i = 0; i < MAX_PENDING_REQS; i++) { page = virt_to_page(MMAP_VADDR(i)); init_page_count(page); SetPageForeign(page, netif_page_release); } pending_cons = 0; pending_prod = MAX_PENDING_REQS; for (i = 0; i < MAX_PENDING_REQS; i++) pending_ring[i] = i; spin_lock_init(&net_schedule_list_lock); INIT_LIST_HEAD(&net_schedule_list); netif_xenbus_init(); #ifdef NETBE_DEBUG_INTERRUPT (void)bind_virq_to_irqhandler( VIRQ_DEBUG, 0, netif_be_dbg, SA_SHIRQ, "net-be-dbg", &netif_be_dbg); #endif return 0; } module_init(netback_init); MODULE_LICENSE("Dual BSD/GPL");