drivers/xen/blkback/blkback.c

   1 /******************************************************************************
   2  * arch/xen/drivers/blkif/backend/main.c
   3  *
   4  * Back-end of the driver for virtual block devices. This portion of the
   5  * driver exports a 'unified' block-device interface that can be accessed
   6  * by any operating system that implements a compatible front end. A
   7  * reference front-end implementation can be found in:
   8  *  arch/xen/drivers/blkif/frontend
   9  *
  10  * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
  11  * Copyright (c) 2005, Christopher Clark
  12  */
  13
  14 #include "common.h"
  15 #include <asm-xen/evtchn.h>
  16 #ifdef CONFIG_XEN_BLKDEV_GRANT
  17 #include <asm-xen/xen-public/grant_table.h>
  18 #endif
  19
  20 /*
  21  * These are rather arbitrary. They are fairly large because adjacent requests
  22  * pulled from a communication ring are quite likely to end up being part of
  23  * the same scatter/gather request at the disc.
  24  *
  25  * ** TRY INCREASING 'MAX_PENDING_REQS' IF WRITE SPEEDS SEEM TOO LOW **
  26  * This will increase the chances of being able to write whole tracks.
  27  * 64 should be enough to keep us competitive with Linux.
  28  */
  29 #define MAX_PENDING_REQS 64
  30 #define BATCH_PER_DOMAIN 16
  31
  32 static unsigned long mmap_vstart;
  33 #define MMAP_PAGES                                              \
  34     (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST)
  35 #define MMAP_VADDR(_req,_seg)                                   \
  36     (mmap_vstart +                                              \
  37      ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) +    \
  38      ((_seg) * PAGE_SIZE))
  39
  40 /*
  41  * Each outstanding request that we've passed to the lower device layers has a
  42  * 'pending_req' allocated to it. Each buffer_head that completes decrements
  43  * the pendcnt towards zero. When it hits zero, the specified domain has a
  44  * response queued for it, with the saved 'id' passed back.
  45  */
  46 typedef struct {
  47     blkif_t       *blkif;
  48     unsigned long  id;
  49     int            nr_pages;
  50     atomic_t       pendcnt;
  51     unsigned short operation;
  52     int            status;
  53 } pending_req_t;
  54
  55 /*
  56  * We can't allocate pending_req's in order, since they may complete out of
  57  * order. We therefore maintain an allocation ring. This ring also indicates
  58  * when enough work has been passed down -- at that point the allocation ring
  59  * will be empty.
  60  */
  61 static pending_req_t pending_reqs[MAX_PENDING_REQS];
  62 static unsigned char pending_ring[MAX_PENDING_REQS];
  63 static spinlock_t pend_prod_lock = SPIN_LOCK_UNLOCKED;
  64 /* NB. We use a different index type to differentiate from shared blk rings. */
  65 typedef unsigned int PEND_RING_IDX;
  66 #define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
  67 static PEND_RING_IDX pending_prod, pending_cons;
  68 #define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
  69
  70 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
  71 static kmem_cache_t *buffer_head_cachep;
  72 #else
  73 static request_queue_t *plugged_queue;
  74 static inline void flush_plugged_queue(void)
  75 {
  76     request_queue_t *q = plugged_queue;
  77     if ( q != NULL )
  78     {
  79         if ( q->unplug_fn != NULL )
  80             q->unplug_fn(q);
  81         blk_put_queue(q);
  82         plugged_queue = NULL;
  83     }
  84 }
  85 #endif
  86
  87 #ifdef CONFIG_XEN_BLKDEV_GRANT
  88 /* When using grant tables to map a frame for device access then the
  89  * handle returned must be used to unmap the frame. This is needed to
  90  * drop the ref count on the frame.
  91  */
  92 static u16 pending_grant_handles[MMAP_PAGES];
  93 #define pending_handle(_idx, _i) \
  94     (pending_grant_handles[((_idx) * BLKIF_MAX_SEGMENTS_PER_REQUEST) + (_i)])
  95 #define BLKBACK_INVALID_HANDLE (0xFFFF)
  96 #endif
  97
  98 #ifdef CONFIG_XEN_BLKDEV_TAP_BE
  99 /*
 100  * If the tap driver is used, we may get pages belonging to either the tap
 101  * or (more likely) the real frontend.  The backend must specify which domain
 102  * a given page belongs to in update_va_mapping though.  For the moment,
 103  * the tap rewrites the ID field of the request to contain the request index
 104  * and the id of the real front end domain.
 105  */
 106 #define BLKTAP_COOKIE 0xbeadfeed
 107 static inline domid_t ID_TO_DOM(unsigned long id) { return (id >> 16); }
 108 #endif
 109
 110 static int do_block_io_op(blkif_t *blkif, int max_to_do);
 111 static void dispatch_probe(blkif_t *blkif, blkif_request_t *req);
 112 static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req);
 113 static void make_response(blkif_t *blkif, unsigned long id,
 114                           unsigned short op, int st);
 115
 116 static void fast_flush_area(int idx, int nr_pages)
 117 {
 118 #ifdef CONFIG_XEN_BLKDEV_GRANT
 119     gnttab_op_t       aop[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 120     unsigned int      i, invcount = 0;
 121     u16               handle;
 122
 123     for ( i = 0; i < nr_pages; i++ )
 124     {
 125         if ( BLKBACK_INVALID_HANDLE != ( handle = pending_handle(idx, i) ) )
 126         {
 127             aop[i].u.unmap_grant_ref.host_virt_addr = MMAP_VADDR(idx, i);
 128             aop[i].u.unmap_grant_ref.dev_bus_addr   = 0;
 129             aop[i].u.unmap_grant_ref.handle         = handle;
 130             pending_handle(idx, i) = BLKBACK_INVALID_HANDLE;
 131             invcount++;
 132         }
 133     }
 134     if ( unlikely(HYPERVISOR_grant_table_op(
 135                     GNTTABOP_unmap_grant_ref, aop, invcount)))
 136         BUG();
 137 #else
 138
 139     multicall_entry_t mcl[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 140     int               i;
 141
 142     for ( i = 0; i < nr_pages; i++ )
 143     {
 144         mcl[i].op = __HYPERVISOR_update_va_mapping;
 145         mcl[i].args[0] = MMAP_VADDR(idx, i);
 146         mcl[i].args[1] = 0;
 147         mcl[i].args[2] = 0;
 148     }
 149
 150     mcl[nr_pages-1].args[2] = UVMF_TLB_FLUSH|UVMF_ALL;
 151     if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) )
 152         BUG();
 153 #endif
 154 }
 155
 156
 157 /******************************************************************
 158  * BLOCK-DEVICE SCHEDULER LIST MAINTENANCE
 159  */
 160
 161 static struct list_head blkio_schedule_list;
 162 static spinlock_t blkio_schedule_list_lock;
 163
 164 static int __on_blkdev_list(blkif_t *blkif)
 165 {
 166     return blkif->blkdev_list.next != NULL;
 167 }
 168
 169 static void remove_from_blkdev_list(blkif_t *blkif)
 170 {
 171     unsigned long flags;
 172     if ( !__on_blkdev_list(blkif) ) return;
 173     spin_lock_irqsave(&blkio_schedule_list_lock, flags);
 174     if ( __on_blkdev_list(blkif) )
 175     {
 176         list_del(&blkif->blkdev_list);
 177         blkif->blkdev_list.next = NULL;
 178         blkif_put(blkif);
 179     }
 180     spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
 181 }
 182
 183 static void add_to_blkdev_list_tail(blkif_t *blkif)
 184 {
 185     unsigned long flags;
 186     if ( __on_blkdev_list(blkif) ) return;
 187     spin_lock_irqsave(&blkio_schedule_list_lock, flags);
 188     if ( !__on_blkdev_list(blkif) && (blkif->status == CONNECTED) )
 189     {
 190         list_add_tail(&blkif->blkdev_list, &blkio_schedule_list);
 191         blkif_get(blkif);
 192     }
 193     spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
 194 }
 195
 196
 197 /******************************************************************
 198  * SCHEDULER FUNCTIONS
 199  */
 200
 201 static DECLARE_WAIT_QUEUE_HEAD(blkio_schedule_wait);
 202
 203 static int blkio_schedule(void *arg)
 204 {
 205     DECLARE_WAITQUEUE(wq, current);
 206
 207     blkif_t          *blkif;
 208     struct list_head *ent;
 209
 210     daemonize(
 211 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
 212         "xenblkd"
 213 #endif
 214         );
 215
 216     for ( ; ; )
 217     {
 218         /* Wait for work to do. */
 219         add_wait_queue(&blkio_schedule_wait, &wq);
 220         set_current_state(TASK_INTERRUPTIBLE);
 221         if ( (NR_PENDING_REQS == MAX_PENDING_REQS) ||
 222              list_empty(&blkio_schedule_list) )
 223             schedule();
 224         __set_current_state(TASK_RUNNING);
 225         remove_wait_queue(&blkio_schedule_wait, &wq);
 226
 227         /* Queue up a batch of requests. */
 228         while ( (NR_PENDING_REQS < MAX_PENDING_REQS) &&
 229                 !list_empty(&blkio_schedule_list) )
 230         {
 231             ent = blkio_schedule_list.next;
 232             blkif = list_entry(ent, blkif_t, blkdev_list);
 233             blkif_get(blkif);
 234             remove_from_blkdev_list(blkif);
 235             if ( do_block_io_op(blkif, BATCH_PER_DOMAIN) )
 236                 add_to_blkdev_list_tail(blkif);
 237             blkif_put(blkif);
 238         }
 239
 240         /* Push the batch through to disc. */
 241 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
 242         run_task_queue(&tq_disk);
 243 #else
 244         flush_plugged_queue();
 245 #endif
 246     }
 247 }
 248
 249 static void maybe_trigger_blkio_schedule(void)
 250 {
 251     /*
 252      * Needed so that two processes, who together make the following predicate
 253      * true, don't both read stale values and evaluate the predicate
 254      * incorrectly. Incredibly unlikely to stall the scheduler on x86, but...
 255      */
 256     smp_mb();
 257
 258     if ( (NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
 259          !list_empty(&blkio_schedule_list) )
 260         wake_up(&blkio_schedule_wait);
 261 }
 262
 263
 264
 265 /******************************************************************
 266  * COMPLETION CALLBACK -- Called as bh->b_end_io()
 267  */
 268
 269 static void __end_block_io_op(pending_req_t *pending_req, int uptodate)
 270 {
 271     unsigned long flags;
 272
 273     /* An error fails the entire request. */
 274     if ( !uptodate )
 275     {
 276         DPRINTK("Buffer not up-to-date at end of operation\n");
 277         pending_req->status = BLKIF_RSP_ERROR;
 278     }
 279
 280     if ( atomic_dec_and_test(&pending_req->pendcnt) )
 281     {
 282         int pending_idx = pending_req - pending_reqs;
 283         fast_flush_area(pending_idx, pending_req->nr_pages);
 284         make_response(pending_req->blkif, pending_req->id,
 285                       pending_req->operation, pending_req->status);
 286         blkif_put(pending_req->blkif);
 287         spin_lock_irqsave(&pend_prod_lock, flags);
 288         pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
 289         spin_unlock_irqrestore(&pend_prod_lock, flags);
 290         maybe_trigger_blkio_schedule();
 291     }
 292 }
 293
 294 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
 295 static void end_block_io_op(struct buffer_head *bh, int uptodate)
 296 {
 297     __end_block_io_op(bh->b_private, uptodate);
 298     kmem_cache_free(buffer_head_cachep, bh);
 299 }
 300 #else
 301 static int end_block_io_op(struct bio *bio, unsigned int done, int error)
 302 {
 303     if ( done || error )
 304         __end_block_io_op(bio->bi_private, (done && !error));
 305     bio_put(bio);
 306     return error;
 307 }
 308 #endif
 309
 310
 311 /******************************************************************************
 312  * NOTIFICATION FROM GUEST OS.
 313  */
 314
 315 irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
 316 {
 317     blkif_t *blkif = dev_id;
 318     add_to_blkdev_list_tail(blkif);
 319     maybe_trigger_blkio_schedule();
 320     return IRQ_HANDLED;
 321 }
 322
 323
 324
 325 /******************************************************************
 326  * DOWNWARD CALLS -- These interface with the block-device layer proper.
 327  */
 328
 329 static int do_block_io_op(blkif_t *blkif, int max_to_do)
 330 {
 331     blkif_back_ring_t *blk_ring = &blkif->blk_ring;
 332     blkif_request_t *req;
 333     RING_IDX i, rp;
 334     int more_to_do = 0;
 335
 336     rp = blk_ring->sring->req_prod;
 337     rmb(); /* Ensure we see queued requests up to 'rp'. */
 338
 339     for ( i = blk_ring->req_cons;
 340          (i != rp) && !RING_REQUEST_CONS_OVERFLOW(blk_ring, i);
 341           i++ )
 342     {
 343         if ( (max_to_do-- == 0) || (NR_PENDING_REQS == MAX_PENDING_REQS) )
 344         {
 345             more_to_do = 1;
 346             break;
 347         }
 348
 349         req = RING_GET_REQUEST(blk_ring, i);
 350         switch ( req->operation )
 351         {
 352         case BLKIF_OP_READ:
 353         case BLKIF_OP_WRITE:
 354             dispatch_rw_block_io(blkif, req);
 355             break;
 356
 357         case BLKIF_OP_PROBE:
 358             dispatch_probe(blkif, req);
 359             break;
 360
 361         default:
 362             DPRINTK("error: unknown block io operation [%d]\n",
 363                     req->operation);
 364             make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
 365             break;
 366         }
 367     }
 368
 369     blk_ring->req_cons = i;
 370     return more_to_do;
 371 }
 372
 373 static void dispatch_probe(blkif_t *blkif, blkif_request_t *req)
 374 {
 375     int rsp = BLKIF_RSP_ERROR;
 376     int pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
 377
 378     /* We expect one buffer only. */
 379     if ( unlikely(req->nr_segments != 1) )
 380         goto out;
 381
 382     /* Make sure the buffer is page-sized. */
 383     if ( (blkif_first_sect(req->frame_and_sects[0]) != 0) ||
 384          (blkif_last_sect(req->frame_and_sects[0]) != 7) )
 385         goto out;
 386
 387 #ifdef CONFIG_XEN_BLKDEV_GRANT
 388     {
 389         gnttab_op_t     op;
 390
 391         op.u.map_grant_ref.host_virt_addr = MMAP_VADDR(pending_idx, 0);
 392         op.u.map_grant_ref.flags = GNTMAP_host_map;
 393         op.u.map_grant_ref.ref = blkif_gref_from_fas(req->frame_and_sects[0]);
 394         op.u.map_grant_ref.dom = blkif->domid;
 395
 396         if ( unlikely(HYPERVISOR_grant_table_op(
 397                         GNTTABOP_map_grant_ref, &op, 1)))
 398             BUG();
 399
 400         if ( op.u.map_grant_ref.handle < 0 )
 401             goto out;
 402
 403         pending_handle(pending_idx, 0) = op.u.map_grant_ref.handle;
 404     }
 405 #else /* else CONFIG_XEN_BLKDEV_GRANT */
 406
 407 #ifdef CONFIG_XEN_BLKDEV_TAP_BE
 408     /* Grab the real frontend out of the probe message. */
 409     if (req->frame_and_sects[1] == BLKTAP_COOKIE)
 410         blkif->is_blktap = 1;
 411 #endif
 412
 413
 414 #ifdef CONFIG_XEN_BLKDEV_TAP_BE
 415     if ( HYPERVISOR_update_va_mapping_otherdomain(
 416         MMAP_VADDR(pending_idx, 0),
 417         (pte_t) { (req->frame_and_sects[0] & PAGE_MASK) | __PAGE_KERNEL },
 418         0, (blkif->is_blktap ? ID_TO_DOM(req->id) : blkif->domid) ) )
 419
 420         goto out;
 421 #else
 422     if ( HYPERVISOR_update_va_mapping_otherdomain(
 423         MMAP_VADDR(pending_idx, 0),
 424         (pte_t) { (req->frame_and_sects[0] & PAGE_MASK) | __PAGE_KERNEL },
 425         0, blkif->domid) )
 426
 427         goto out;
 428 #endif
 429 #endif /* endif CONFIG_XEN_BLKDEV_GRANT */
 430
 431     rsp = vbd_probe(blkif, (vdisk_t *)MMAP_VADDR(pending_idx, 0),
 432                     PAGE_SIZE / sizeof(vdisk_t));
 433
 434  out:
 435     fast_flush_area(pending_idx, 1);
 436     make_response(blkif, req->id, req->operation, rsp);
 437 }
 438
 439 static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req)
 440 {
 441     extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]);
 442     int operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ;
 443     unsigned long fas = 0;
 444     int i, pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
 445     pending_req_t *pending_req;
 446 #ifdef CONFIG_XEN_BLKDEV_GRANT
 447     gnttab_op_t       aop[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 448 #else
 449     unsigned long remap_prot;
 450     multicall_entry_t mcl[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 451 #endif
 452     struct phys_req preq;
 453     struct {
 454         unsigned long buf; unsigned int nsec;
 455     } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 456     unsigned int nseg;
 457 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
 458     struct buffer_head *bh;
 459 #else
 460     struct bio *bio = NULL, *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 461     int nbio = 0;
 462     request_queue_t *q;
 463 #endif
 464
 465     /* Check that number of segments is sane. */
 466     nseg = req->nr_segments;
 467     if ( unlikely(nseg == 0) ||
 468          unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST) )
 469     {
 470         DPRINTK("Bad number of segments in request (%d)\n", nseg);
 471         goto bad_descriptor;
 472     }
 473
 474     preq.dev           = req->device;
 475     preq.sector_number = req->sector_number;
 476     preq.nr_sects      = 0;
 477
 478 #ifdef CONFIG_XEN_BLKDEV_GRANT
 479     for ( i = 0; i < nseg; i++ )
 480     {
 481         fas         = req->frame_and_sects[i];
 482         seg[i].nsec = blkif_last_sect(fas) - blkif_first_sect(fas) + 1;
 483
 484         if ( seg[i].nsec <= 0 )
 485             goto bad_descriptor;
 486         preq.nr_sects += seg[i].nsec;
 487
 488         aop[i].u.map_grant_ref.host_virt_addr = MMAP_VADDR(pending_idx, i);
 489
 490         aop[i].u.map_grant_ref.dom = blkif->domid;
 491         aop[i].u.map_grant_ref.ref = blkif_gref_from_fas(fas);
 492         aop[i].u.map_grant_ref.flags = ( GNTMAP_host_map   |
 493                                        ( ( operation == READ ) ?
 494                                              0 : GNTMAP_readonly ) );
 495     }
 496
 497     if ( unlikely(HYPERVISOR_grant_table_op(
 498                     GNTTABOP_map_grant_ref, aop, nseg)))
 499         BUG();
 500
 501     for ( i = 0; i < nseg; i++ )
 502     {
 503         if ( unlikely(aop[i].u.map_grant_ref.handle < 0) )
 504         {
 505             DPRINTK("invalid buffer -- could not remap it\n");
 506             fast_flush_area(pending_idx, nseg);
 507             goto bad_descriptor;
 508         }
 509
 510         phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] =
 511             FOREIGN_FRAME(aop[i].u.map_grant_ref.dev_bus_addr);
 512
 513         pending_handle(pending_idx, i) = aop[i].u.map_grant_ref.handle;
 514     }
 515 #endif
 516
 517     for ( i = 0; i < nseg; i++ )
 518     {
 519         fas         = req->frame_and_sects[i];
 520 #ifdef CONFIG_XEN_BLKDEV_GRANT
 521         seg[i].buf  = (aop[i].u.map_grant_ref.dev_bus_addr << PAGE_SHIFT) |
 522                       (blkif_first_sect(fas) << 9);
 523 #else
 524         seg[i].buf  = (fas & PAGE_MASK) | (blkif_first_sect(fas) << 9);
 525         seg[i].nsec = blkif_last_sect(fas) - blkif_first_sect(fas) + 1;
 526         if ( seg[i].nsec <= 0 )
 527             goto bad_descriptor;
 528         preq.nr_sects += seg[i].nsec;
 529 #endif
 530     }
 531
 532     if ( vbd_translate(&preq, blkif, operation) != 0 )
 533     {
 534         DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n",
 535                 operation == READ ? "read" : "write", preq.sector_number,
 536                 preq.sector_number + preq.nr_sects, preq.dev);
 537         goto bad_descriptor;
 538     }
 539
 540 #ifndef CONFIG_XEN_BLKDEV_GRANT
 541     if ( operation == READ )
 542         remap_prot = _PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW;
 543     else
 544         remap_prot = _PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED;
 545
 546     for ( i = 0; i < nseg; i++ )
 547     {
 548         mcl[i].op = __HYPERVISOR_update_va_mapping_otherdomain;
 549         mcl[i].args[0] = MMAP_VADDR(pending_idx, i);
 550         mcl[i].args[1] = (seg[i].buf & PAGE_MASK) | remap_prot;
 551         mcl[i].args[2] = 0;
 552         mcl[i].args[3] = blkif->domid;
 553 #ifdef CONFIG_XEN_BLKDEV_TAP_BE
 554         if ( blkif->is_blktap )
 555             mcl[i].args[3] = ID_TO_DOM(req->id);
 556 #endif
 557         phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] =
 558             FOREIGN_FRAME(seg[i].buf >> PAGE_SHIFT);
 559     }
 560
 561     BUG_ON(HYPERVISOR_multicall(mcl, nseg) != 0);
 562
 563     for ( i = 0; i < nseg; i++ )
 564     {
 565         if ( unlikely(mcl[i].args[5] != 0) )
 566         {
 567             DPRINTK("invalid buffer -- could not remap it\n");
 568             fast_flush_area(pending_idx, nseg);
 569             goto bad_descriptor;
 570         }
 571     }
 572 #endif /* end ifndef CONFIG_XEN_BLKDEV_GRANT */
 573
 574     pending_req = &pending_reqs[pending_idx];
 575     pending_req->blkif     = blkif;
 576     pending_req->id        = req->id;
 577     pending_req->operation = operation;
 578     pending_req->status    = BLKIF_RSP_OKAY;
 579     pending_req->nr_pages  = nseg;
 580
 581 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
 582
 583     atomic_set(&pending_req->pendcnt, nseg);
 584     pending_cons++;
 585     blkif_get(blkif);
 586
 587     for ( i = 0; i < nseg; i++ )
 588     {
 589         bh = kmem_cache_alloc(buffer_head_cachep, GFP_KERNEL);
 590         if ( unlikely(bh == NULL) )
 591         {
 592             __end_block_io_op(pending_req, 0);
 593             continue;
 594         }
 595
 596         memset(bh, 0, sizeof (struct buffer_head));
 597
 598         init_waitqueue_head(&bh->b_wait);
 599         bh->b_size          = seg[i].nsec << 9;
 600         bh->b_dev           = preq.dev;
 601         bh->b_rdev          = preq.dev;
 602         bh->b_rsector       = (unsigned long)preq.sector_number;
 603         bh->b_data          = (char *)MMAP_VADDR(pending_idx, i) +
 604             (seg[i].buf & ~PAGE_MASK);
 605         bh->b_page          = virt_to_page(MMAP_VADDR(pending_idx, i));
 606         bh->b_end_io        = end_block_io_op;
 607         bh->b_private       = pending_req;
 608
 609         bh->b_state = (1 << BH_Mapped) | (1 << BH_Lock) |
 610             (1 << BH_Req) | (1 << BH_Launder);
 611         if ( operation == WRITE )
 612             bh->b_state |= (1 << BH_JBD) | (1 << BH_Req) | (1 << BH_Uptodate);
 613
 614         atomic_set(&bh->b_count, 1);
 615
 616         /* Dispatch a single request. We'll flush it to disc later. */
 617         generic_make_request(operation, bh);
 618
 619         preq.sector_number += seg[i].nsec;
 620     }
 621
 622 #else
 623
 624     for ( i = 0; i < nseg; i++ )
 625     {
 626         while ( (bio == NULL) ||
 627                 (bio_add_page(bio,
 628                               virt_to_page(MMAP_VADDR(pending_idx, i)),
 629                               seg[i].nsec << 9,
 630                               seg[i].buf & ~PAGE_MASK) == 0) )
 631         {
 632             bio = biolist[nbio++] = bio_alloc(GFP_KERNEL, nseg-i);
 633             if ( unlikely(bio == NULL) )
 634             {
 635                 for ( i = 0; i < (nbio-1); i++ )
 636                     bio_put(biolist[i]);
 637                 fast_flush_area(pending_idx, nseg);
 638                 goto bad_descriptor;
 639             }
 640
 641             bio->bi_bdev    = preq.bdev;
 642             bio->bi_private = pending_req;
 643             bio->bi_end_io  = end_block_io_op;
 644             bio->bi_sector  = preq.sector_number;
 645         }
 646
 647         preq.sector_number += seg[i].nsec;
 648     }
 649
 650     if ( (q = bdev_get_queue(bio->bi_bdev)) != plugged_queue )
 651     {
 652         flush_plugged_queue();
 653         blk_get_queue(q);
 654         plugged_queue = q;
 655     }
 656
 657     atomic_set(&pending_req->pendcnt, nbio);
 658     pending_cons++;
 659     blkif_get(blkif);
 660
 661     for ( i = 0; i < nbio; i++ )
 662         submit_bio(operation, biolist[i]);
 663
 664 #endif
 665
 666     return;
 667
 668  bad_descriptor:
 669     make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
 670 }
 671
 672
 673
 674 /******************************************************************
 675  * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING
 676  */
 677
 678
 679 static void make_response(blkif_t *blkif, unsigned long id,
 680                           unsigned short op, int st)
 681 {
 682     blkif_response_t *resp;
 683     unsigned long     flags;
 684     blkif_back_ring_t *blk_ring = &blkif->blk_ring;
 685
 686     /* Place on the response ring for the relevant domain. */
 687     spin_lock_irqsave(&blkif->blk_ring_lock, flags);
 688     resp = RING_GET_RESPONSE(blk_ring, blk_ring->rsp_prod_pvt);
 689     resp->id        = id;
 690     resp->operation = op;
 691     resp->status    = st;
 692     wmb(); /* Ensure other side can see the response fields. */
 693     blk_ring->rsp_prod_pvt++;
 694     RING_PUSH_RESPONSES(blk_ring);
 695     spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
 696
 697     /* Kick the relevant domain. */
 698     notify_via_evtchn(blkif->evtchn);
 699 }
 700
 701 void blkif_deschedule(blkif_t *blkif)
 702 {
 703     remove_from_blkdev_list(blkif);
 704 }
 705
 706 static int __init blkif_init(void)
 707 {
 708     int i;
 709
 710     if ( !(xen_start_info.flags & SIF_INITDOMAIN) &&
 711          !(xen_start_info.flags & SIF_BLK_BE_DOMAIN) )
 712         return 0;
 713
 714     blkif_interface_init();
 715
 716     if ( (mmap_vstart = allocate_empty_lowmem_region(MMAP_PAGES)) == 0 )
 717         BUG();
 718
 719     pending_cons = 0;
 720     pending_prod = MAX_PENDING_REQS;
 721     memset(pending_reqs, 0, sizeof(pending_reqs));
 722     for ( i = 0; i < MAX_PENDING_REQS; i++ )
 723         pending_ring[i] = i;
 724
 725     spin_lock_init(&blkio_schedule_list_lock);
 726     INIT_LIST_HEAD(&blkio_schedule_list);
 727
 728     if ( kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES) < 0 )
 729         BUG();
 730
 731 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
 732     buffer_head_cachep = kmem_cache_create(
 733         "buffer_head_cache", sizeof(struct buffer_head),
 734         0, SLAB_HWCACHE_ALIGN, NULL, NULL);
 735 #endif
 736
 737     blkif_ctrlif_init();
 738
 739 #ifdef CONFIG_XEN_BLKDEV_GRANT
 740     memset( pending_grant_handles,  BLKBACK_INVALID_HANDLE, MMAP_PAGES );
 741     printk(KERN_ALERT "Blkif backend is using grant tables.\n");
 742 #endif
 743
 744 #ifdef CONFIG_XEN_BLKDEV_TAP_BE
 745     printk(KERN_ALERT "NOTE: Blkif backend is running with tap support on!\n");
 746 #endif
 747
 748     return 0;
 749 }
 750
 751 __initcall(blkif_init);