1 /******************************************************************************
4 * XenLinux virtual block-device tap.
5 * Block request routing data path.
7 * Copyright (c) 2004, Andrew Warfield
8 * -- see full header in blktap.c
12 #include <asm-xen/evtchn.h>
14 /*-----[ The data paths ]-------------------------------------------------*/
16 /* Connection to a single backend domain. */
17 blkif_front_ring_t blktap_be_ring;
19 /*-----[ Tracking active requests ]---------------------------------------*/
21 /* this must be the same as MAX_PENDING_REQS in blkback.c */
22 #define MAX_ACTIVE_REQS ((ACTIVE_RING_IDX)64U)
24 active_req_t active_reqs[MAX_ACTIVE_REQS];
25 ACTIVE_RING_IDX active_req_ring[MAX_ACTIVE_REQS];
26 spinlock_t active_req_lock = SPIN_LOCK_UNLOCKED;
27 ACTIVE_RING_IDX active_prod, active_cons;
28 #define MASK_ACTIVE_IDX(_i) ((_i)&(MAX_ACTIVE_REQS-1))
29 #define ACTIVE_IDX(_ar) (_ar - active_reqs)
30 #define NR_ACTIVE_REQS (MAX_ACTIVE_REQS - active_prod + active_cons)
32 inline active_req_t *get_active_req(void)
38 ASSERT(active_cons != active_prod);
40 spin_lock_irqsave(&active_req_lock, flags);
41 idx = active_req_ring[MASK_ACTIVE_IDX(active_cons++)];
42 ar = &active_reqs[idx];
43 spin_unlock_irqrestore(&active_req_lock, flags);
48 inline void free_active_req(active_req_t *ar)
52 spin_lock_irqsave(&active_req_lock, flags);
53 active_req_ring[MASK_ACTIVE_IDX(active_prod++)] = ACTIVE_IDX(ar);
54 spin_unlock_irqrestore(&active_req_lock, flags);
57 active_req_t *lookup_active_req(ACTIVE_RING_IDX idx)
59 return &active_reqs[idx];
62 void active_reqs_init(void)
67 active_prod = MAX_ACTIVE_REQS;
68 memset(active_reqs, 0, sizeof(active_reqs));
69 for ( i = 0; i < MAX_ACTIVE_REQS; i++ )
70 active_req_ring[i] = i;
73 /* Requests passing through the tap to the backend hijack the id field
74 * in the request message. In it we put the AR index _AND_ the fe domid.
75 * the domid is used by the backend to map the pages properly.
78 static inline unsigned long MAKE_ID(domid_t fe_dom, ACTIVE_RING_IDX idx)
80 return ( (fe_dom << 16) | MASK_ACTIVE_IDX(idx) );
83 /*-----[ Ring helpers ]---------------------------------------------------*/
85 inline int write_resp_to_fe_ring(blkif_t *blkif, blkif_response_t *rsp)
87 blkif_response_t *resp_d;
90 ar = &active_reqs[ID_TO_IDX(rsp->id)];
93 resp_d = RING_GET_RESPONSE(&blkif->blk_ring,
94 blkif->blk_ring.rsp_prod_pvt);
95 memcpy(resp_d, rsp, sizeof(blkif_response_t));
97 blkif->blk_ring.rsp_prod_pvt++;
105 inline int write_req_to_be_ring(blkif_request_t *req)
107 blkif_request_t *req_d;
109 if ( blktap_be_state != BLKIF_STATE_CONNECTED ) {
110 WPRINTK("Tap trying to access an unconnected backend!\n");
114 req_d = RING_GET_REQUEST(&blktap_be_ring,
115 blktap_be_ring.req_prod_pvt);
116 memcpy(req_d, req, sizeof(blkif_request_t));
118 blktap_be_ring.req_prod_pvt++;
123 void kick_fe_domain(blkif_t *blkif)
125 RING_PUSH_RESPONSES(&blkif->blk_ring);
126 notify_via_evtchn(blkif->evtchn);
127 DPRINTK("notified FE(dom %u)\n", blkif->domid);
131 void kick_be_domain(void)
133 if ( blktap_be_state != BLKIF_STATE_CONNECTED )
136 wmb(); /* Ensure that the frontend can see the requests. */
137 RING_PUSH_REQUESTS(&blktap_be_ring);
138 notify_via_evtchn(blktap_be_evtchn);
139 DPRINTK("notified BE\n");
142 /*-----[ Data to/from Frontend (client) VMs ]-----------------------------*/
144 /*-----[ Scheduler list maint -from blkback ]--- */
146 static struct list_head blkio_schedule_list;
147 static spinlock_t blkio_schedule_list_lock;
149 static int __on_blkdev_list(blkif_t *blkif)
151 return blkif->blkdev_list.next != NULL;
154 static void remove_from_blkdev_list(blkif_t *blkif)
157 if ( !__on_blkdev_list(blkif) ) return;
158 spin_lock_irqsave(&blkio_schedule_list_lock, flags);
159 if ( __on_blkdev_list(blkif) )
161 list_del(&blkif->blkdev_list);
162 blkif->blkdev_list.next = NULL;
165 spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
168 static void add_to_blkdev_list_tail(blkif_t *blkif)
171 if ( __on_blkdev_list(blkif) ) return;
172 spin_lock_irqsave(&blkio_schedule_list_lock, flags);
173 if ( !__on_blkdev_list(blkif) && (blkif->status == CONNECTED) )
175 list_add_tail(&blkif->blkdev_list, &blkio_schedule_list);
178 spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
182 /*-----[ Scheduler functions - from blkback ]--- */
184 static DECLARE_WAIT_QUEUE_HEAD(blkio_schedule_wait);
186 static int do_block_io_op(blkif_t *blkif, int max_to_do);
188 static int blkio_schedule(void *arg)
190 DECLARE_WAITQUEUE(wq, current);
193 struct list_head *ent;
201 /* Wait for work to do. */
202 add_wait_queue(&blkio_schedule_wait, &wq);
203 set_current_state(TASK_INTERRUPTIBLE);
204 if ( (NR_ACTIVE_REQS == MAX_ACTIVE_REQS) ||
205 list_empty(&blkio_schedule_list) )
207 __set_current_state(TASK_RUNNING);
208 remove_wait_queue(&blkio_schedule_wait, &wq);
210 /* Queue up a batch of requests. */
211 while ( (NR_ACTIVE_REQS < MAX_ACTIVE_REQS) &&
212 !list_empty(&blkio_schedule_list) )
214 ent = blkio_schedule_list.next;
215 blkif = list_entry(ent, blkif_t, blkdev_list);
217 remove_from_blkdev_list(blkif);
218 if ( do_block_io_op(blkif, BATCH_PER_DOMAIN) )
219 add_to_blkdev_list_tail(blkif);
223 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
224 /* Push the batch through to disc. */
225 run_task_queue(&tq_disk);
230 static void maybe_trigger_blkio_schedule(void)
233 * Needed so that two processes, who together make the following predicate
234 * true, don't both read stale values and evaluate the predicate
235 * incorrectly. Incredibly unlikely to stall the scheduler on x86, but...
239 if ( (NR_ACTIVE_REQS < (MAX_ACTIVE_REQS)) && /* XXX!!! was M_A_R/2*/
240 !list_empty(&blkio_schedule_list) )
241 wake_up(&blkio_schedule_wait);
244 void blkif_deschedule(blkif_t *blkif)
246 remove_from_blkdev_list(blkif);
249 void __init blkdev_schedule_init(void)
251 spin_lock_init(&blkio_schedule_list_lock);
252 INIT_LIST_HEAD(&blkio_schedule_list);
254 if ( kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES) < 0 )
258 /*-----[ Interrupt entry from a frontend ]------ */
260 irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs)
262 blkif_t *blkif = dev_id;
264 add_to_blkdev_list_tail(blkif);
265 maybe_trigger_blkio_schedule();
269 /*-----[ Other Frontend Ring functions ]-------- */
271 /* irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs)*/
272 static int do_block_io_op(blkif_t *blkif, int max_to_do)
274 /* we have pending messages from the real frontend. */
276 blkif_request_t *req_s;
281 int notify_be = 0, notify_user = 0;
283 DPRINTK("PT got FE interrupt.\n");
285 if (NR_ACTIVE_REQS == MAX_ACTIVE_REQS) return 1;
287 /* lock both rings */
288 spin_lock_irqsave(&blkif_io_lock, flags);
290 rp = blkif->blk_ring.sring->req_prod;
293 for ( i = blkif->blk_ring.req_cons;
295 !RING_REQUEST_CONS_OVERFLOW(&blkif->blk_ring, i);
299 if ((--max_to_do == 0) || (NR_ACTIVE_REQS == MAX_ACTIVE_REQS))
305 req_s = RING_GET_REQUEST(&blkif->blk_ring, i);
306 /* This is a new request:
307 * Assign an active request record, and remap the id.
309 ar = get_active_req();
311 ar->nr_pages = req_s->nr_segments;
314 req_s->id = MAKE_ID(blkif->domid, ACTIVE_IDX(ar));
315 /* WPRINTK("%3u < %3lu\n", ID_TO_IDX(req_s->id), ar->id); */
317 /* FE -> BE interposition point is here. */
319 /* ------------------------------------------------------------- */
320 /* BLKIF_OP_PROBE_HACK: */
321 /* Signal to the backend that we are a tap domain. */
323 if (req_s->operation == BLKIF_OP_PROBE) {
324 DPRINTK("Adding BLKTAP_COOKIE to PROBE request.\n");
325 req_s->frame_and_sects[1] = BLKTAP_COOKIE;
328 /* ------------------------------------------------------------- */
330 /* If we are in MODE_INTERCEPT_FE or MODE_COPY_FE: */
331 if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) ||
332 (blktap_mode & BLKTAP_MODE_COPY_FE) ) {
334 /* Copy the response message to UFERing */
335 /* In MODE_INTERCEPT_FE, map attached pages into the app vma */
336 /* In MODE_COPY_FE_PAGES, copy attached pages into the app vma */
338 DPRINTK("req->UFERing\n");
339 blktap_write_fe_ring(req_s);
343 /* If we are not in MODE_INTERCEPT_FE or MODE_INTERCEPT_BE: */
344 if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_FE) ||
345 (blktap_mode & BLKTAP_MODE_INTERCEPT_BE)) ) {
347 /* be included to prevent noise from the fe when its off */
348 /* copy the request message to the BERing */
350 DPRINTK("blktap: FERing[%u] -> BERing[%u]\n",
351 (unsigned)i & (RING_SIZE(&blktap_be_ring)-1),
352 (unsigned)blktap_be_ring.req_prod_pvt &
353 (RING_SIZE((&blktap_be_ring)-1)));
355 write_req_to_be_ring(req_s);
360 blkif->blk_ring.req_cons = i;
363 spin_unlock_irqrestore(&blkif_io_lock, flags);
373 /*-----[ Data to/from Backend (server) VM ]------------------------------*/
376 irqreturn_t blkif_ptbe_int(int irq, void *dev_id,
377 struct pt_regs *ptregs)
379 blkif_response_t *resp_s;
384 DPRINTK("PT got BE interrupt.\n");
386 /* lock both rings */
387 spin_lock_irqsave(&blkif_io_lock, flags);
389 rp = blktap_be_ring.sring->rsp_prod;
392 for ( i = blktap_be_ring.rsp_cons; i != rp; i++)
394 resp_s = RING_GET_RESPONSE(&blktap_be_ring, i);
396 /* BE -> FE interposition point is here. */
398 blkif = active_reqs[ID_TO_IDX(resp_s->id)].blkif;
400 /* If we are in MODE_INTERCEPT_BE or MODE_COPY_BE: */
401 if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) ||
402 (blktap_mode & BLKTAP_MODE_COPY_BE) ) {
404 /* Copy the response message to UBERing */
405 /* In MODE_INTERCEPT_BE, map attached pages into the app vma */
406 /* In MODE_COPY_BE_PAGES, copy attached pages into the app vma */
408 DPRINTK("rsp->UBERing\n");
409 blktap_write_be_ring(resp_s);
414 /* If we are NOT in MODE_INTERCEPT_BE or MODE_INTERCEPT_FE: */
415 if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_BE) ||
416 (blktap_mode & BLKTAP_MODE_INTERCEPT_FE)) ) {
418 /* (fe included to prevent random interference from the BE) */
419 /* Copy the response message to FERing */
421 DPRINTK("blktap: BERing[%u] -> FERing[%u]\n",
422 (unsigned)i & (RING_SIZE(&blkif->blk_ring)-1),
423 (unsigned)blkif->blk_ring.rsp_prod_pvt &
424 (RING_SIZE((&blkif->blk_ring)-1)));
426 write_resp_to_fe_ring(blkif, resp_s);
427 kick_fe_domain(blkif);
432 blktap_be_ring.rsp_cons = i;
435 spin_unlock_irqrestore(&blkif_io_lock, flags);
440 /* Debug : print the current ring indices. */
442 void print_vm_ring_idxs(void)
447 WPRINTK("FE Rings: \n---------\n");
448 for ( i = 0; i < 50; i++) {
449 blkif = blkif_find_by_handle((domid_t)i, 0);
451 if (blkif->blk_ring.sring != NULL) {
452 WPRINTK("%2d: req_cons: %2d, rsp_prod_prv: %2d "
453 "| req_prod: %2d, rsp_prod: %2d\n", i,
454 blkif->blk_ring.req_cons,
455 blkif->blk_ring.rsp_prod_pvt,
456 blkif->blk_ring.sring->req_prod,
457 blkif->blk_ring.sring->rsp_prod);
459 WPRINTK("%2d: [no device channel yet]\n", i);
463 if (blktap_be_ring.sring != NULL) {
464 WPRINTK("BE Ring: \n--------\n");
465 WPRINTK("BE: rsp_cons: %2d, req_prod_prv: %2d "
466 "| req_prod: %2d, rsp_prod: %2d\n",
467 blktap_be_ring.rsp_cons,
468 blktap_be_ring.req_prod_pvt,
469 blktap_be_ring.sring->req_prod,
470 blktap_be_ring.sring->rsp_prod);