1 /******************************************************************************
2 * Virtual network driver for conversing with remote driver backends.
4 * Copyright (c) 2002-2004, K A Fraser
6 * This file may be distributed separately from the Linux kernel, or
7 * incorporated into other software packages, subject to the following license:
9 * Permission is hereby granted, free of charge, to any person obtaining a copy
10 * of this source file (the "Software"), to deal in the Software without
11 * restriction, including without limitation the rights to use, copy, modify,
12 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
13 * and to permit persons to whom the Software is furnished to do so, subject to
14 * the following conditions:
16 * The above copyright notice and this permission notice shall be included in
17 * all copies or substantial portions of the Software.
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28 #include <linux/config.h>
29 #include <linux/module.h>
30 #include <linux/version.h>
31 #include <linux/kernel.h>
32 #include <linux/sched.h>
33 #include <linux/slab.h>
34 #include <linux/string.h>
35 #include <linux/errno.h>
36 #include <linux/netdevice.h>
37 #include <linux/inetdevice.h>
38 #include <linux/etherdevice.h>
39 #include <linux/skbuff.h>
40 #include <linux/init.h>
41 #include <linux/bitops.h>
43 #include <net/pkt_sched.h>
45 #include <net/route.h>
47 #include <asm-xen/evtchn.h>
48 #include <asm-xen/ctrl_if.h>
49 #include <asm-xen/xen-public/io/netif.h>
50 #include <asm-xen/balloon.h>
54 #define __GFP_NOWARN 0
56 #define alloc_xen_skb(_l) __dev_alloc_skb((_l), GFP_ATOMIC|__GFP_NOWARN)
58 #define init_skb_shinfo(_skb) \
60 atomic_set(&(skb_shinfo(_skb)->dataref), 1); \
61 skb_shinfo(_skb)->nr_frags = 0; \
62 skb_shinfo(_skb)->frag_list = NULL; \
65 /* Allow headroom on each rx pkt for Ethernet header, alignment padding, ... */
66 #define RX_HEADROOM 200
69 * If the backend driver is pipelining transmit requests then we can be very
70 * aggressive in avoiding new-packet notifications -- only need to send a
71 * notification if there are no outstanding unreceived responses.
72 * If the backend may be buffering our transmit buffers for any reason then we
73 * are rather more conservative.
75 #ifdef CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER
76 #define TX_TEST_IDX resp_prod /* aggressive: any outstanding responses? */
78 #define TX_TEST_IDX req_cons /* conservative: not seen all our requests? */
81 static void network_tx_buf_gc(struct net_device *dev);
82 static void network_alloc_rx_buffers(struct net_device *dev);
84 static unsigned long rx_pfn_array[NETIF_RX_RING_SIZE];
85 static multicall_entry_t rx_mcl[NETIF_RX_RING_SIZE+1];
86 static mmu_update_t rx_mmu[NETIF_RX_RING_SIZE];
88 static struct list_head dev_list;
92 struct list_head list;
93 struct net_device *dev;
95 struct net_device_stats stats;
96 NETIF_RING_IDX rx_resp_cons, tx_resp_cons;
99 netif_tx_interface_t *tx;
100 netif_rx_interface_t *rx;
109 /* What is the status of our connection to the remote backend? */
110 #define BEST_CLOSED 0
111 #define BEST_DISCONNECTED 1
112 #define BEST_CONNECTED 2
113 unsigned int backend_state;
115 /* Is this interface open or closed (down or up)? */
118 unsigned int user_state;
120 /* Receive-ring batched refills. */
121 #define RX_MIN_TARGET 8
122 #define RX_MAX_TARGET NETIF_RX_RING_SIZE
124 struct sk_buff_head rx_batch;
127 * {tx,rx}_skbs store outstanding skbuffs. The first entry in each
128 * array is an index into a chain of free entries.
130 struct sk_buff *tx_skbs[NETIF_TX_RING_SIZE+1];
131 struct sk_buff *rx_skbs[NETIF_RX_RING_SIZE+1];
134 /* Access macros for acquiring freeing slots in {tx,rx}_skbs[]. */
135 #define ADD_ID_TO_FREELIST(_list, _id) \
136 (_list)[(_id)] = (_list)[0]; \
137 (_list)[0] = (void *)(unsigned long)(_id);
138 #define GET_ID_FROM_FREELIST(_list) \
139 ({ unsigned long _id = (unsigned long)(_list)[0]; \
140 (_list)[0] = (_list)[_id]; \
141 (unsigned short)_id; })
143 static char *status_name[] = {
144 [NETIF_INTERFACE_STATUS_CLOSED] = "closed",
145 [NETIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected",
146 [NETIF_INTERFACE_STATUS_CONNECTED] = "connected",
147 [NETIF_INTERFACE_STATUS_CHANGED] = "changed",
150 static char *be_state_name[] = {
151 [BEST_CLOSED] = "closed",
152 [BEST_DISCONNECTED] = "disconnected",
153 [BEST_CONNECTED] = "connected",
157 #define DPRINTK(fmt, args...) \
158 printk(KERN_ALERT "xen_net (%s:%d) " fmt, __FUNCTION__, __LINE__, ##args)
160 #define DPRINTK(fmt, args...) ((void)0)
162 #define IPRINTK(fmt, args...) \
163 printk(KERN_INFO "xen_net: " fmt, ##args)
164 #define WPRINTK(fmt, args...) \
165 printk(KERN_WARNING "xen_net: " fmt, ##args)
167 static struct net_device *find_dev_by_handle(unsigned int handle)
169 struct list_head *ent;
170 struct net_private *np;
171 list_for_each (ent, &dev_list) {
172 np = list_entry(ent, struct net_private, list);
173 if (np->handle == handle)
179 /** Network interface info. */
181 /** Number of interfaces. */
183 /** Number of connected interfaces. */
190 static struct netif_ctrl netctrl;
192 static void netctrl_init(void)
194 memset(&netctrl, 0, sizeof(netctrl));
195 netctrl.up = NETIF_DRIVER_STATUS_DOWN;
198 /** Get or set a network interface error.
200 static int netctrl_err(int err)
202 if ((err < 0) && !netctrl.err)
207 /** Test if all network interfaces are connected.
209 * @return 1 if all connected, 0 if not, negative error code otherwise
211 static int netctrl_connected(void)
217 else if (netctrl.up == NETIF_DRIVER_STATUS_UP)
218 ok = (netctrl.connected_n == netctrl.interface_n);
225 /** Count the connected network interfaces.
227 * @return connected count
229 static int netctrl_connected_count(void)
232 struct list_head *ent;
233 struct net_private *np;
234 unsigned int connected;
238 list_for_each(ent, &dev_list) {
239 np = list_entry(ent, struct net_private, list);
240 if (np->backend_state == BEST_CONNECTED)
244 netctrl.connected_n = connected;
245 DPRINTK("> connected_n=%d interface_n=%d\n",
246 netctrl.connected_n, netctrl.interface_n);
250 /** Send a packet on a net device to encourage switches to learn the
251 * MAC. We send a fake ARP request.
254 * @return 0 on success, error code otherwise
256 static int send_fake_arp(struct net_device *dev)
261 dst_ip = INADDR_BROADCAST;
262 src_ip = inet_select_addr(dev, dst_ip, RT_SCOPE_LINK);
264 /* No IP? Then nothing to do. */
268 skb = arp_create(ARPOP_REPLY, ETH_P_ARP,
270 /*dst_hw*/ NULL, /*src_hw*/ NULL,
271 /*target_hw*/ dev->dev_addr);
275 return dev_queue_xmit(skb);
278 static int network_open(struct net_device *dev)
280 struct net_private *np = netdev_priv(dev);
282 memset(&np->stats, 0, sizeof(np->stats));
284 np->user_state = UST_OPEN;
286 network_alloc_rx_buffers(dev);
287 np->rx->event = np->rx_resp_cons + 1;
289 netif_start_queue(dev);
294 static void network_tx_buf_gc(struct net_device *dev)
296 NETIF_RING_IDX i, prod;
298 struct net_private *np = netdev_priv(dev);
301 if (np->backend_state != BEST_CONNECTED)
305 prod = np->tx->resp_prod;
306 rmb(); /* Ensure we see responses up to 'rp'. */
308 for (i = np->tx_resp_cons; i != prod; i++) {
309 id = np->tx->ring[MASK_NETIF_TX_IDX(i)].resp.id;
310 skb = np->tx_skbs[id];
311 ADD_ID_TO_FREELIST(np->tx_skbs, id);
312 dev_kfree_skb_irq(skb);
315 np->tx_resp_cons = prod;
318 * Set a new event, then check for race with update of tx_cons. Note
319 * that it is essential to schedule a callback, no matter how few
320 * buffers are pending. Even if there is space in the transmit ring,
321 * higher layers may be blocked because too much data is outstanding:
322 * in such cases notification from Xen is likely to be the only kick
326 prod + ((np->tx->req_prod - prod) >> 1) + 1;
328 } while (prod != np->tx->resp_prod);
330 if (np->tx_full && ((np->tx->req_prod - prod) < NETIF_TX_RING_SIZE)) {
332 if (np->user_state == UST_OPEN)
333 netif_wake_queue(dev);
338 static void network_alloc_rx_buffers(struct net_device *dev)
341 struct net_private *np = netdev_priv(dev);
344 NETIF_RING_IDX req_prod = np->rx->req_prod;
346 if (unlikely(np->backend_state != BEST_CONNECTED))
350 * Allocate skbuffs greedily, even though we batch updates to the
351 * receive ring. This creates a less bursty demand on the memory allocator,
352 * so should reduce the chance of failed allocation requests both for
353 * ourself and for other kernel subsystems.
355 batch_target = np->rx_target - (req_prod - np->rx_resp_cons);
356 for (i = skb_queue_len(&np->rx_batch); i < batch_target; i++) {
357 if (unlikely((skb = alloc_xen_skb(dev->mtu + RX_HEADROOM)) == NULL))
359 __skb_queue_tail(&np->rx_batch, skb);
362 /* Is the batch large enough to be worthwhile? */
363 if (i < (np->rx_target/2))
367 if ((skb = __skb_dequeue(&np->rx_batch)) == NULL)
372 id = GET_ID_FROM_FREELIST(np->rx_skbs);
374 np->rx_skbs[id] = skb;
376 np->rx->ring[MASK_NETIF_RX_IDX(req_prod + i)].req.id = id;
378 rx_pfn_array[i] = virt_to_machine(skb->head) >> PAGE_SHIFT;
380 /* Remove this page from pseudo phys map before passing back to Xen. */
381 phys_to_machine_mapping[__pa(skb->head) >> PAGE_SHIFT]
384 rx_mcl[i].op = __HYPERVISOR_update_va_mapping;
385 rx_mcl[i].args[0] = (unsigned long)skb->head;
386 rx_mcl[i].args[1] = 0;
387 rx_mcl[i].args[2] = 0;
390 /* After all PTEs have been zapped we blow away stale TLB entries. */
391 rx_mcl[i-1].args[2] = UVMF_TLB_FLUSH|UVMF_ALL;
393 /* Give away a batch of pages. */
394 rx_mcl[i].op = __HYPERVISOR_dom_mem_op;
395 rx_mcl[i].args[0] = MEMOP_decrease_reservation;
396 rx_mcl[i].args[1] = (unsigned long)rx_pfn_array;
397 rx_mcl[i].args[2] = (unsigned long)i;
398 rx_mcl[i].args[3] = 0;
399 rx_mcl[i].args[4] = DOMID_SELF;
401 /* Tell the ballon driver what is going on. */
402 balloon_update_driver_allowance(i);
404 /* Zap PTEs and give away pages in one big multicall. */
405 (void)HYPERVISOR_multicall(rx_mcl, i+1);
407 /* Check return status of HYPERVISOR_dom_mem_op(). */
408 if (unlikely(rx_mcl[i].args[5] != i))
409 panic("Unable to reduce memory reservation\n");
411 /* Above is a suitable barrier to ensure backend will see requests. */
412 np->rx->req_prod = req_prod + i;
414 /* Adjust our floating fill target if we risked running out of buffers. */
415 if (((req_prod - np->rx->resp_prod) < (np->rx_target / 4)) &&
416 ((np->rx_target *= 2) > RX_MAX_TARGET))
417 np->rx_target = RX_MAX_TARGET;
421 static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
424 struct net_private *np = netdev_priv(dev);
425 netif_tx_request_t *tx;
428 if (unlikely(np->tx_full)) {
429 printk(KERN_ALERT "%s: full queue wasn't stopped!\n", dev->name);
430 netif_stop_queue(dev);
434 if (unlikely((((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >=
436 struct sk_buff *nskb;
437 if (unlikely((nskb = alloc_xen_skb(skb->len)) == NULL))
439 skb_put(nskb, skb->len);
440 memcpy(nskb->data, skb->data, skb->len);
441 nskb->dev = skb->dev;
446 spin_lock_irq(&np->tx_lock);
448 if (np->backend_state != BEST_CONNECTED) {
449 spin_unlock_irq(&np->tx_lock);
453 i = np->tx->req_prod;
455 id = GET_ID_FROM_FREELIST(np->tx_skbs);
456 np->tx_skbs[id] = skb;
458 tx = &np->tx->ring[MASK_NETIF_TX_IDX(i)].req;
461 tx->addr = virt_to_machine(skb->data);
464 wmb(); /* Ensure that backend will see the request. */
465 np->tx->req_prod = i + 1;
467 network_tx_buf_gc(dev);
469 if ((i - np->tx_resp_cons) == (NETIF_TX_RING_SIZE - 1)) {
471 netif_stop_queue(dev);
474 spin_unlock_irq(&np->tx_lock);
476 np->stats.tx_bytes += skb->len;
477 np->stats.tx_packets++;
479 /* Only notify Xen if we really have to. */
481 if (np->tx->TX_TEST_IDX == i)
482 notify_via_evtchn(np->evtchn);
487 np->stats.tx_dropped++;
492 static irqreturn_t netif_int(int irq, void *dev_id, struct pt_regs *ptregs)
494 struct net_device *dev = dev_id;
495 struct net_private *np = netdev_priv(dev);
498 spin_lock_irqsave(&np->tx_lock, flags);
499 network_tx_buf_gc(dev);
500 spin_unlock_irqrestore(&np->tx_lock, flags);
502 if ((np->rx_resp_cons != np->rx->resp_prod) && (np->user_state == UST_OPEN))
503 netif_rx_schedule(dev);
509 static int netif_poll(struct net_device *dev, int *pbudget)
511 struct net_private *np = netdev_priv(dev);
512 struct sk_buff *skb, *nskb;
513 netif_rx_response_t *rx;
514 NETIF_RING_IDX i, rp;
515 mmu_update_t *mmu = rx_mmu;
516 multicall_entry_t *mcl = rx_mcl;
517 int work_done, budget, more_to_do = 1;
518 struct sk_buff_head rxq;
521 spin_lock(&np->rx_lock);
523 if (np->backend_state != BEST_CONNECTED) {
524 spin_unlock(&np->rx_lock);
528 skb_queue_head_init(&rxq);
530 if ((budget = *pbudget) > dev->quota)
533 rp = np->rx->resp_prod;
534 rmb(); /* Ensure we see queued responses up to 'rp'. */
536 for (i = np->rx_resp_cons, work_done = 0;
537 (i != rp) && (work_done < budget);
539 rx = &np->rx->ring[MASK_NETIF_RX_IDX(i)].resp;
542 * An error here is very odd. Usually indicates a backend bug,
543 * low-memory condition, or that we didn't have reservation headroom.
545 if (unlikely(rx->status <= 0)) {
547 printk(KERN_WARNING "Bad rx buffer (memory squeeze?).\n");
548 np->rx->ring[MASK_NETIF_RX_IDX(np->rx->req_prod)].req.id = rx->id;
555 skb = np->rx_skbs[rx->id];
556 ADD_ID_TO_FREELIST(np->rx_skbs, rx->id);
558 /* NB. We handle skb overflow later. */
559 skb->data = skb->head + (rx->addr & ~PAGE_MASK);
560 skb->len = rx->status;
561 skb->tail = skb->data + skb->len;
563 np->stats.rx_packets++;
564 np->stats.rx_bytes += rx->status;
566 /* Remap the page. */
567 mmu->ptr = (rx->addr & PAGE_MASK) | MMU_MACHPHYS_UPDATE;
568 mmu->val = __pa(skb->head) >> PAGE_SHIFT;
570 mcl->op = __HYPERVISOR_update_va_mapping;
571 mcl->args[0] = (unsigned long)skb->head;
572 mcl->args[1] = (rx->addr & PAGE_MASK) | __PAGE_KERNEL;
576 phys_to_machine_mapping[__pa(skb->head) >> PAGE_SHIFT] =
577 rx->addr >> PAGE_SHIFT;
579 __skb_queue_tail(&rxq, skb);
582 /* Some pages are no longer absent... */
583 balloon_update_driver_allowance(-work_done);
585 /* Do all the remapping work, and M->P updates, in one big hypercall. */
586 if (likely((mcl - rx_mcl) != 0)) {
587 mcl->op = __HYPERVISOR_mmu_update;
588 mcl->args[0] = (unsigned long)rx_mmu;
589 mcl->args[1] = mmu - rx_mmu;
591 mcl->args[3] = DOMID_SELF;
593 (void)HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl);
596 while ((skb = __skb_dequeue(&rxq)) != NULL) {
598 * Enough room in skbuff for the data we were passed? Also, Linux
599 * expects at least 16 bytes headroom in each receive buffer.
601 if (unlikely(skb->tail > skb->end) ||
602 unlikely((skb->data - skb->head) < 16)) {
605 /* Only copy the packet if it fits in the current MTU. */
606 if (skb->len <= (dev->mtu + ETH_HLEN)) {
607 if ((skb->tail > skb->end) && net_ratelimit())
608 printk(KERN_INFO "Received packet needs %d bytes more "
609 "headroom.\n", skb->tail - skb->end);
611 if ((nskb = alloc_xen_skb(skb->len + 2)) != NULL) {
612 skb_reserve(nskb, 2);
613 skb_put(nskb, skb->len);
614 memcpy(nskb->data, skb->data, skb->len);
615 nskb->dev = skb->dev;
618 else if (net_ratelimit())
619 printk(KERN_INFO "Received packet too big for MTU "
620 "(%d > %d)\n", skb->len - ETH_HLEN, dev->mtu);
622 /* Reinitialise and then destroy the old skbuff. */
624 skb->tail = skb->data;
625 init_skb_shinfo(skb);
628 /* Switch old for new, if we copied the buffer. */
629 if ((skb = nskb) == NULL)
633 /* Set the shared-info area, which is hidden behind the real data. */
634 init_skb_shinfo(skb);
636 /* Ethernet-specific work. Delayed to here as it peeks the header. */
637 skb->protocol = eth_type_trans(skb, dev);
640 netif_receive_skb(skb);
641 dev->last_rx = jiffies;
644 np->rx_resp_cons = i;
646 /* If we get a callback with very few responses, reduce fill target. */
647 /* NB. Note exponential increase, linear decrease. */
648 if (((np->rx->req_prod - np->rx->resp_prod) > ((3*np->rx_target) / 4)) &&
649 (--np->rx_target < RX_MIN_TARGET))
650 np->rx_target = RX_MIN_TARGET;
652 network_alloc_rx_buffers(dev);
654 *pbudget -= work_done;
655 dev->quota -= work_done;
657 if (work_done < budget) {
658 local_irq_save(flags);
660 np->rx->event = i + 1;
662 /* Deal with hypervisor racing our resetting of rx_event. */
664 if (np->rx->resp_prod == i) {
665 __netif_rx_complete(dev);
669 local_irq_restore(flags);
672 spin_unlock(&np->rx_lock);
678 static int network_close(struct net_device *dev)
680 struct net_private *np = netdev_priv(dev);
681 np->user_state = UST_CLOSED;
682 netif_stop_queue(np->dev);
687 static struct net_device_stats *network_get_stats(struct net_device *dev)
689 struct net_private *np = netdev_priv(dev);
694 static void network_connect(struct net_device *dev,
695 netif_fe_interface_status_t *status)
697 struct net_private *np;
699 netif_tx_request_t *tx;
701 np = netdev_priv(dev);
702 spin_lock_irq(&np->tx_lock);
703 spin_lock(&np->rx_lock);
705 /* Recovery procedure: */
707 /* Step 1: Reinitialise variables. */
708 np->rx_resp_cons = np->tx_resp_cons = np->tx_full = 0;
709 np->rx->event = np->tx->event = 1;
711 /* Step 2: Rebuild the RX and TX ring contents.
712 * NB. We could just free the queued TX packets now but we hope
713 * that sending them out might do some good. We have to rebuild
714 * the RX ring because some of our pages are currently flipped out
715 * so we can't just free the RX skbs.
716 * NB2. Freelist index entries are always going to be less than
717 * __PAGE_OFFSET, whereas pointers to skbs will always be equal or
718 * greater than __PAGE_OFFSET: we use this property to distinguish
722 /* Rebuild the TX buffer freelist and the TX ring itself.
723 * NB. This reorders packets. We could keep more private state
724 * to avoid this but maybe it doesn't matter so much given the
725 * interface has been down.
727 for (requeue_idx = 0, i = 1; i <= NETIF_TX_RING_SIZE; i++) {
728 if ((unsigned long)np->tx_skbs[i] >= __PAGE_OFFSET) {
729 struct sk_buff *skb = np->tx_skbs[i];
731 tx = &np->tx->ring[requeue_idx++].req;
734 tx->addr = virt_to_machine(skb->data);
737 np->stats.tx_bytes += skb->len;
738 np->stats.tx_packets++;
742 np->tx->req_prod = requeue_idx;
744 /* Rebuild the RX buffer freelist and the RX ring itself. */
745 for (requeue_idx = 0, i = 1; i <= NETIF_RX_RING_SIZE; i++)
746 if ((unsigned long)np->rx_skbs[i] >= __PAGE_OFFSET)
747 np->rx->ring[requeue_idx++].req.id = i;
749 np->rx->req_prod = requeue_idx;
751 /* Step 3: All public and private state should now be sane. Get
752 * ready to start sending and receiving packets and give the driver
753 * domain a kick because we've probably just requeued some
756 np->backend_state = BEST_CONNECTED;
758 notify_via_evtchn(status->evtchn);
759 network_tx_buf_gc(dev);
761 if (np->user_state == UST_OPEN)
762 netif_start_queue(dev);
764 spin_unlock(&np->rx_lock);
765 spin_unlock_irq(&np->tx_lock);
768 static void vif_show(struct net_private *np)
772 IPRINTK("<vif handle=%u %s(%s) evtchn=%u irq=%u tx=%p rx=%p>\n",
774 be_state_name[np->backend_state],
775 np->user_state ? "open" : "closed",
781 IPRINTK("<vif NULL>\n");
786 /* Send a connect message to xend to tell it to bring up the interface. */
787 static void send_interface_connect(struct net_private *np)
790 .type = CMSG_NETIF_FE,
791 .subtype = CMSG_NETIF_FE_INTERFACE_CONNECT,
792 .length = sizeof(netif_fe_interface_connect_t),
794 netif_fe_interface_connect_t *msg = (void*)cmsg.msg;
796 msg->handle = np->handle;
797 msg->tx_shmem_frame = (virt_to_machine(np->tx) >> PAGE_SHIFT);
798 msg->rx_shmem_frame = (virt_to_machine(np->rx) >> PAGE_SHIFT);
800 ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
803 /* Send a driver status notification to the domain controller. */
804 static int send_driver_status(int ok)
808 .type = CMSG_NETIF_FE,
809 .subtype = CMSG_NETIF_FE_DRIVER_STATUS,
810 .length = sizeof(netif_fe_driver_status_t),
812 netif_fe_driver_status_t *msg = (void*)cmsg.msg;
814 msg->status = (ok ? NETIF_DRIVER_STATUS_UP : NETIF_DRIVER_STATUS_DOWN);
815 err = ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
819 /* Stop network device and free tx/rx queues and irq.
821 static void vif_release(struct net_private *np)
823 /* Stop old i/f to prevent errors whilst we rebuild the state. */
824 spin_lock_irq(&np->tx_lock);
825 spin_lock(&np->rx_lock);
826 netif_stop_queue(np->dev);
827 /* np->backend_state = BEST_DISCONNECTED; */
828 spin_unlock(&np->rx_lock);
829 spin_unlock_irq(&np->tx_lock);
831 /* Free resources. */
833 free_irq(np->irq, np->dev);
834 unbind_evtchn_from_irq(np->evtchn);
835 free_page((unsigned long)np->tx);
836 free_page((unsigned long)np->rx);
844 /* Release vif resources and close it down completely.
846 static void vif_close(struct net_private *np)
848 WPRINTK("Unexpected netif-CLOSED message in state %s\n",
849 be_state_name[np->backend_state]);
851 np->backend_state = BEST_CLOSED;
852 /* todo: take dev down and free. */
856 /* Move the vif into disconnected state.
857 * Allocates tx/rx pages.
858 * Sends connect message to xend.
860 static void vif_disconnect(struct net_private *np)
862 if(np->tx) free_page((unsigned long)np->tx);
863 if(np->rx) free_page((unsigned long)np->rx);
864 // Before this np->tx and np->rx had better be null.
865 np->tx = (netif_tx_interface_t *)__get_free_page(GFP_KERNEL);
866 np->rx = (netif_rx_interface_t *)__get_free_page(GFP_KERNEL);
867 memset(np->tx, 0, PAGE_SIZE);
868 memset(np->rx, 0, PAGE_SIZE);
869 np->backend_state = BEST_DISCONNECTED;
870 send_interface_connect(np);
874 /* Begin interface recovery.
876 * NB. Whilst we're recovering, we turn the carrier state off. We
877 * take measures to ensure that this device isn't used for
878 * anything. We also stop the queue for this device. Various
879 * different approaches (e.g. continuing to buffer packets) have
880 * been tested but don't appear to improve the overall impact on
883 * TODO: (MAW) Change the Xend<->Guest protocol so that a recovery
884 * is initiated by a special "RESET" message - disconnect could
885 * just mean we're not allowed to use this interface any more.
887 static void vif_reset(struct net_private *np)
889 IPRINTK("Attempting to reconnect network interface: handle=%u\n",
896 /* Move the vif into connected state.
897 * Sets the mac and event channel from the message.
898 * Binds the irq to the event channel.
901 vif_connect(struct net_private *np, netif_fe_interface_status_t *status)
903 struct net_device *dev = np->dev;
904 memcpy(dev->dev_addr, status->mac, ETH_ALEN);
905 network_connect(dev, status);
906 np->evtchn = status->evtchn;
907 np->irq = bind_evtchn_to_irq(np->evtchn);
908 (void)request_irq(np->irq, netif_int, SA_SAMPLE_RANDOM, dev->name, dev);
909 netctrl_connected_count();
910 (void)send_fake_arp(dev);
915 /** Create a network device.
916 * @param handle device handle
917 * @param val return parameter for created device
918 * @return 0 on success, error code otherwise
920 static int create_netdev(int handle, struct net_device **val)
923 struct net_device *dev = NULL;
924 struct net_private *np = NULL;
926 if ((dev = alloc_etherdev(sizeof(struct net_private))) == NULL) {
927 printk(KERN_WARNING "%s> alloc_etherdev failed.\n", __FUNCTION__);
932 np = netdev_priv(dev);
933 np->backend_state = BEST_CLOSED;
934 np->user_state = UST_CLOSED;
937 spin_lock_init(&np->tx_lock);
938 spin_lock_init(&np->rx_lock);
940 skb_queue_head_init(&np->rx_batch);
941 np->rx_target = RX_MIN_TARGET;
943 /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */
944 for (i = 0; i <= NETIF_TX_RING_SIZE; i++)
945 np->tx_skbs[i] = (void *)(i+1);
946 for (i = 0; i <= NETIF_RX_RING_SIZE; i++)
947 np->rx_skbs[i] = (void *)(i+1);
949 dev->open = network_open;
950 dev->hard_start_xmit = network_start_xmit;
951 dev->stop = network_close;
952 dev->get_stats = network_get_stats;
953 dev->poll = netif_poll;
956 if ((err = register_netdev(dev)) != 0) {
957 printk(KERN_WARNING "%s> register_netdev err=%d\n", __FUNCTION__, err);
961 list_add(&np->list, &dev_list);
964 if ((err != 0) && (dev != NULL ))
966 else if (val != NULL)
971 /* Get the target interface for a status message.
972 * Creates the interface when it makes sense.
973 * The returned interface may be null when there is no error.
975 * @param status status message
976 * @param np return parameter for interface state
977 * @return 0 on success, error code otherwise
980 target_vif(netif_fe_interface_status_t *status, struct net_private **np)
983 struct net_device *dev;
985 DPRINTK("> handle=%d\n", status->handle);
986 if (status->handle < 0) {
991 if ((dev = find_dev_by_handle(status->handle)) != NULL)
994 if (status->status == NETIF_INTERFACE_STATUS_CLOSED)
996 if (status->status == NETIF_INTERFACE_STATUS_CHANGED)
999 /* It's a new interface in a good state - create it. */
1000 DPRINTK("> create device...\n");
1001 if ((err = create_netdev(status->handle, &dev)) != 0)
1004 netctrl.interface_n++;
1008 *np = ((dev && !err) ? netdev_priv(dev) : NULL);
1009 DPRINTK("< err=%d\n", err);
1013 /* Handle an interface status message. */
1014 static void netif_interface_status(netif_fe_interface_status_t *status)
1017 struct net_private *np = NULL;
1019 DPRINTK("> status=%s handle=%d\n",
1020 status_name[status->status], status->handle);
1022 if ((err = target_vif(status, &np)) != 0) {
1023 WPRINTK("Invalid netif: handle=%u\n", status->handle);
1028 DPRINTK("> no vif\n");
1032 switch (status->status) {
1033 case NETIF_INTERFACE_STATUS_CLOSED:
1034 switch (np->backend_state) {
1036 case BEST_DISCONNECTED:
1037 case BEST_CONNECTED:
1043 case NETIF_INTERFACE_STATUS_DISCONNECTED:
1044 switch (np->backend_state) {
1048 case BEST_DISCONNECTED:
1049 case BEST_CONNECTED:
1055 case NETIF_INTERFACE_STATUS_CONNECTED:
1056 switch (np->backend_state) {
1058 WPRINTK("Unexpected netif status %s in state %s\n",
1059 status_name[status->status],
1060 be_state_name[np->backend_state]);
1062 vif_connect(np, status);
1064 case BEST_DISCONNECTED:
1065 vif_connect(np, status);
1070 case NETIF_INTERFACE_STATUS_CHANGED:
1072 * The domain controller is notifying us that a device has been
1078 WPRINTK("Invalid netif status code %d\n", status->status);
1086 * Initialize the network control interface.
1088 static void netif_driver_status(netif_fe_driver_status_t *status)
1090 netctrl.up = status->status;
1091 netctrl_connected_count();
1094 /* Receive handler for control messages. */
1095 static void netif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
1098 switch (msg->subtype) {
1099 case CMSG_NETIF_FE_INTERFACE_STATUS:
1100 netif_interface_status((netif_fe_interface_status_t *) &msg->msg[0]);
1103 case CMSG_NETIF_FE_DRIVER_STATUS:
1104 netif_driver_status((netif_fe_driver_status_t *) &msg->msg[0]);
1112 ctrl_if_send_response(msg);
1117 /* Wait for all interfaces to be connected.
1119 * This works OK, but we'd like to use the probing mode (see below).
1121 static int probe_interfaces(void)
1123 int err = 0, conn = 0;
1124 int wait_i, wait_n = 100;
1128 for (wait_i = 0; wait_i < wait_n; wait_i++) {
1129 DPRINTK("> wait_i=%d\n", wait_i);
1130 conn = netctrl_connected();
1132 DPRINTK("> schedule_timeout...\n");
1133 set_current_state(TASK_INTERRUPTIBLE);
1134 schedule_timeout(10);
1137 DPRINTK("> wait finished...\n");
1139 err = netctrl_err(-ENETDOWN);
1140 WPRINTK("Failed to connect all virtual interfaces: err=%d\n", err);
1143 DPRINTK("< err=%d\n", err);
1148 /* Probe for interfaces until no more are found.
1150 * This is the mode we'd like to use, but at the moment it panics the kernel.
1152 static int probe_interfaces(void)
1155 int wait_i, wait_n = 100;
1157 .type = CMSG_NETIF_FE,
1158 .subtype = CMSG_NETIF_FE_INTERFACE_STATUS,
1159 .length = sizeof(netif_fe_interface_status_t),
1161 netif_fe_interface_status_t msg = {};
1162 ctrl_msg_t rmsg = {};
1163 netif_fe_interface_status_t *reply = (void*)rmsg.msg;
1164 int state = TASK_UNINTERRUPTIBLE;
1169 netctrl.interface_n = 0;
1170 for (wait_i = 0; wait_i < wait_n; wait_i++) {
1171 DPRINTK("> wait_i=%d query=%d\n", wait_i, query);
1173 memcpy(cmsg.msg, &msg, sizeof(msg));
1174 DPRINTK("> set_current_state...\n");
1175 set_current_state(state);
1176 DPRINTK("> rmsg=%p msg=%p, reply=%p\n", &rmsg, rmsg.msg, reply);
1177 DPRINTK("> sending...\n");
1178 err = ctrl_if_send_message_and_get_response(&cmsg, &rmsg, state);
1179 DPRINTK("> err=%d\n", err);
1181 DPRINTK("> rmsg=%p msg=%p, reply=%p\n", &rmsg, rmsg.msg, reply);
1182 if((int)reply->handle < 0) {
1183 // No more interfaces.
1186 query = -reply->handle - 2;
1187 DPRINTK(">netif_interface_status ...\n");
1188 netif_interface_status(reply);
1193 err = netctrl_err(-ENETDOWN);
1194 WPRINTK("Connecting virtual network interfaces failed: err=%d\n", err);
1197 DPRINTK("< err=%d\n", err);
1204 * We use this notifier to send out a fake ARP reply to reset switches and
1205 * router ARP caches when an IP interface is brought up on a VIF.
1208 inetdev_notify(struct notifier_block *this, unsigned long event, void *ptr)
1210 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
1211 struct net_device *dev = ifa->ifa_dev->dev;
1212 struct list_head *ent;
1213 struct net_private *np;
1215 if (event != NETDEV_UP)
1218 list_for_each (ent, &dev_list) {
1219 np = list_entry(ent, struct net_private, list);
1221 (void)send_fake_arp(dev);
1228 static struct notifier_block notifier_inetdev = {
1229 .notifier_call = inetdev_notify,
1234 static int __init netif_init(void)
1238 if (xen_start_info.flags & SIF_INITDOMAIN)
1241 IPRINTK("Initialising virtual ethernet driver.\n");
1242 INIT_LIST_HEAD(&dev_list);
1243 (void)register_inetaddr_notifier(¬ifier_inetdev);
1245 (void)ctrl_if_register_receiver(CMSG_NETIF_FE, netif_ctrlif_rx,
1246 CALLBACK_IN_BLOCKING_CONTEXT);
1247 send_driver_status(1);
1248 err = probe_interfaces();
1250 ctrl_if_unregister_receiver(CMSG_NETIF_FE, netif_ctrlif_rx);
1252 DPRINTK("< err=%d\n", err);
1256 static void vif_suspend(struct net_private *np)
1258 /* Avoid having tx/rx stuff happen until we're ready. */
1259 free_irq(np->irq, np->dev);
1260 unbind_evtchn_from_irq(np->evtchn);
1263 static void vif_resume(struct net_private *np)
1266 * Connect regardless of whether IFF_UP flag set.
1267 * Stop bad things from happening until we're back up.
1269 np->backend_state = BEST_DISCONNECTED;
1270 memset(np->tx, 0, PAGE_SIZE);
1271 memset(np->rx, 0, PAGE_SIZE);
1273 send_interface_connect(np);
1276 void netif_suspend(void)
1278 struct list_head *ent;
1279 struct net_private *np;
1281 list_for_each (ent, &dev_list) {
1282 np = list_entry(ent, struct net_private, list);
1287 void netif_resume(void)
1289 struct list_head *ent;
1290 struct net_private *np;
1292 list_for_each (ent, &dev_list) {
1293 np = list_entry(ent, struct net_private, list);
1299 module_init(netif_init);