fs/block_dev.c

   1 /*
   2  *  linux/fs/block_dev.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  *  Copyright (C) 2001  Andrea Arcangeli <andrea@suse.de> SuSE
   6  */
   7
   8 #include <linux/config.h>
   9 #include <linux/init.h>
  10 #include <linux/mm.h>
  11 #include <linux/fcntl.h>
  12 #include <linux/slab.h>
  13 #include <linux/kmod.h>
  14 #include <linux/major.h>
  15 #include <linux/devfs_fs_kernel.h>
  16 #include <linux/smp_lock.h>
  17 #include <linux/highmem.h>
  18 #include <linux/blkdev.h>
  19 #include <linux/module.h>
  20 #include <linux/blkpg.h>
  21 #include <linux/buffer_head.h>
  22 #include <linux/mpage.h>
  23 #include <linux/mount.h>
  24 #include <linux/uio.h>
  25 #include <linux/namei.h>
  26 #include <asm/uaccess.h>
  27
  28 struct bdev_inode {
  29         struct block_device bdev;
  30         struct inode vfs_inode;
  31 };
  32
  33 static inline struct bdev_inode *BDEV_I(struct inode *inode)
  34 {
  35         return container_of(inode, struct bdev_inode, vfs_inode);
  36 }
  37
  38 inline struct block_device *I_BDEV(struct inode *inode)
  39 {
  40         return &BDEV_I(inode)->bdev;
  41 }
  42
  43 EXPORT_SYMBOL(I_BDEV);
  44
  45 static sector_t max_block(struct block_device *bdev)
  46 {
  47         sector_t retval = ~((sector_t)0);
  48         loff_t sz = i_size_read(bdev->bd_inode);
  49
  50         if (sz) {
  51                 unsigned int size = block_size(bdev);
  52                 unsigned int sizebits = blksize_bits(size);
  53                 retval = (sz >> sizebits);
  54         }
  55         return retval;
  56 }
  57
  58 /* Kill _all_ buffers, dirty or not.. */
  59 static void kill_bdev(struct block_device *bdev)
  60 {
  61         invalidate_bdev(bdev, 1);
  62         truncate_inode_pages(bdev->bd_inode->i_mapping, 0);
  63 }
  64
  65 int set_blocksize(struct block_device *bdev, int size)
  66 {
  67         int oldsize;
  68
  69         /* Size must be a power of two, and between 512 and PAGE_SIZE */
  70         if (size > PAGE_SIZE || size < 512 || (size & (size-1)))
  71                 return -EINVAL;
  72
  73         /* Size cannot be smaller than the size supported by the device */
  74         if (size < bdev_hardsect_size(bdev))
  75                 return -EINVAL;
  76
  77         oldsize = bdev->bd_block_size;
  78         if (oldsize == size)
  79                 return 0;
  80
  81         /* Ok, we're actually changing the blocksize.. */
  82         sync_blockdev(bdev);
  83         bdev->bd_block_size = size;
  84         bdev->bd_inode->i_blkbits = blksize_bits(size);
  85         kill_bdev(bdev);
  86         return 0;
  87 }
  88
  89 EXPORT_SYMBOL(set_blocksize);
  90
  91 int sb_set_blocksize(struct super_block *sb, int size)
  92 {
  93         int bits;
  94         if (set_blocksize(sb->s_bdev, size) < 0)
  95                 return 0;
  96         sb->s_blocksize = size;
  97         for (bits = 9, size >>= 9; size >>= 1; bits++)
  98                 ;
  99         sb->s_blocksize_bits = bits;
 100         return sb->s_blocksize;
 101 }
 102
 103 EXPORT_SYMBOL(sb_set_blocksize);
 104
 105 int sb_min_blocksize(struct super_block *sb, int size)
 106 {
 107         int minsize = bdev_hardsect_size(sb->s_bdev);
 108         if (size < minsize)
 109                 size = minsize;
 110         return sb_set_blocksize(sb, size);
 111 }
 112
 113 EXPORT_SYMBOL(sb_min_blocksize);
 114
 115 static int
 116 blkdev_get_block(struct inode *inode, sector_t iblock,
 117                 struct buffer_head *bh, int create)
 118 {
 119         if (iblock >= max_block(I_BDEV(inode))) {
 120                 if (create)
 121                         return -EIO;
 122
 123                 /*
 124                  * for reads, we're just trying to fill a partial page.
 125                  * return a hole, they will have to call get_block again
 126                  * before they can fill it, and they will get -EIO at that
 127                  * time
 128                  */
 129                 return 0;
 130         }
 131         bh->b_bdev = I_BDEV(inode);
 132         bh->b_blocknr = iblock;
 133         set_buffer_mapped(bh);
 134         return 0;
 135 }
 136
 137 static int
 138 blkdev_get_blocks(struct inode *inode, sector_t iblock,
 139                 unsigned long max_blocks, struct buffer_head *bh, int create)
 140 {
 141         if ((iblock + max_blocks) > max_block(I_BDEV(inode)))
 142                 return -EIO;
 143
 144         bh->b_bdev = I_BDEV(inode);
 145         bh->b_blocknr = iblock;
 146         bh->b_size = max_blocks << inode->i_blkbits;
 147         set_buffer_mapped(bh);
 148         return 0;
 149 }
 150
 151 static ssize_t
 152 blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
 153                         loff_t offset, unsigned long nr_segs)
 154 {
 155         struct file *file = iocb->ki_filp;
 156         struct inode *inode = file->f_mapping->host;
 157
 158         return blockdev_direct_IO_no_locking(rw, iocb, inode, I_BDEV(inode),
 159                                 iov, offset, nr_segs, blkdev_get_blocks, NULL);
 160 }
 161
 162 static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
 163 {
 164         return block_write_full_page(page, blkdev_get_block, wbc);
 165 }
 166
 167 static int blkdev_readpage(struct file * file, struct page * page)
 168 {
 169         return block_read_full_page(page, blkdev_get_block);
 170 }
 171
 172 static int blkdev_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
 173 {
 174         return block_prepare_write(page, from, to, blkdev_get_block);
 175 }
 176
 177 static int blkdev_commit_write(struct file *file, struct page *page, unsigned from, unsigned to)
 178 {
 179         return block_commit_write(page, from, to);
 180 }
 181
 182 /*
 183  * private llseek:
 184  * for a block special file file->f_dentry->d_inode->i_size is zero
 185  * so we compute the size by hand (just as in block_read/write above)
 186  */
 187 static loff_t block_llseek(struct file *file, loff_t offset, int origin)
 188 {
 189         struct inode *bd_inode = file->f_mapping->host;
 190         loff_t size;
 191         loff_t retval;
 192
 193         down(&bd_inode->i_sem);
 194         size = i_size_read(bd_inode);
 195
 196         switch (origin) {
 197                 case 2:
 198                         offset += size;
 199                         break;
 200                 case 1:
 201                         offset += file->f_pos;
 202         }
 203         retval = -EINVAL;
 204         if (offset >= 0 && offset <= size) {
 205                 if (offset != file->f_pos) {
 206                         file->f_pos = offset;
 207                 }
 208                 retval = offset;
 209         }
 210         up(&bd_inode->i_sem);
 211         return retval;
 212 }
 213
 214 /*
 215  *      Filp is never NULL; the only case when ->fsync() is called with
 216  *      NULL first argument is nfsd_sync_dir() and that's not a directory.
 217  */
 218
 219 static int block_fsync(struct file *filp, struct dentry *dentry, int datasync)
 220 {
 221         return sync_blockdev(I_BDEV(filp->f_mapping->host));
 222 }
 223
 224 /*
 225  * pseudo-fs
 226  */
 227
 228 static spinlock_t bdev_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
 229 static kmem_cache_t * bdev_cachep;
 230
 231 static struct inode *bdev_alloc_inode(struct super_block *sb)
 232 {
 233         struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, SLAB_KERNEL);
 234         if (!ei)
 235                 return NULL;
 236         return &ei->vfs_inode;
 237 }
 238
 239 static void bdev_destroy_inode(struct inode *inode)
 240 {
 241         kmem_cache_free(bdev_cachep, BDEV_I(inode));
 242 }
 243
 244 static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
 245 {
 246         struct bdev_inode *ei = (struct bdev_inode *) foo;
 247         struct block_device *bdev = &ei->bdev;
 248
 249         if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
 250             SLAB_CTOR_CONSTRUCTOR)
 251         {
 252                 memset(bdev, 0, sizeof(*bdev));
 253                 sema_init(&bdev->bd_sem, 1);
 254                 sema_init(&bdev->bd_mount_sem, 1);
 255                 INIT_LIST_HEAD(&bdev->bd_inodes);
 256                 INIT_LIST_HEAD(&bdev->bd_list);
 257                 inode_init_once(&ei->vfs_inode);
 258         }
 259 }
 260
 261 static inline void __bd_forget(struct inode *inode)
 262 {
 263         list_del_init(&inode->i_devices);
 264         inode->i_bdev = NULL;
 265         inode->i_mapping = &inode->i_data;
 266 }
 267
 268 static void bdev_clear_inode(struct inode *inode)
 269 {
 270         struct block_device *bdev = &BDEV_I(inode)->bdev;
 271         struct list_head *p;
 272         spin_lock(&bdev_lock);
 273         while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) {
 274                 __bd_forget(list_entry(p, struct inode, i_devices));
 275         }
 276         list_del_init(&bdev->bd_list);
 277         spin_unlock(&bdev_lock);
 278 }
 279
 280 static struct super_operations bdev_sops = {
 281         .statfs = simple_statfs,
 282         .alloc_inode = bdev_alloc_inode,
 283         .destroy_inode = bdev_destroy_inode,
 284         .drop_inode = generic_delete_inode,
 285         .clear_inode = bdev_clear_inode,
 286 };
 287
 288 static struct super_block *bd_get_sb(struct file_system_type *fs_type,
 289         int flags, const char *dev_name, void *data)
 290 {
 291         return get_sb_pseudo(fs_type, "bdev:", &bdev_sops, 0x62646576);
 292 }
 293
 294 static struct file_system_type bd_type = {
 295         .name           = "bdev",
 296         .get_sb         = bd_get_sb,
 297         .kill_sb        = kill_anon_super,
 298 };
 299
 300 static struct vfsmount *bd_mnt;
 301 struct super_block *blockdev_superblock;
 302
 303 void __init bdev_cache_init(void)
 304 {
 305         int err;
 306         bdev_cachep = kmem_cache_create("bdev_cache",
 307                                         sizeof(struct bdev_inode),
 308                                         0,
 309                                         SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT,
 310                                         init_once,
 311                                         NULL);
 312         if (!bdev_cachep)
 313                 panic("Cannot create bdev_cache SLAB cache");
 314         err = register_filesystem(&bd_type);
 315         if (err)
 316                 panic("Cannot register bdev pseudo-fs");
 317         bd_mnt = kern_mount(&bd_type);
 318         err = PTR_ERR(bd_mnt);
 319         if (IS_ERR(bd_mnt))
 320                 panic("Cannot create bdev pseudo-fs");
 321         blockdev_superblock = bd_mnt->mnt_sb;   /* For writeback */
 322 }
 323
 324 /*
 325  * Most likely _very_ bad one - but then it's hardly critical for small
 326  * /dev and can be fixed when somebody will need really large one.
 327  * Keep in mind that it will be fed through icache hash function too.
 328  */
 329 static inline unsigned long hash(dev_t dev)
 330 {
 331         return MAJOR(dev)+MINOR(dev);
 332 }
 333
 334 static int bdev_test(struct inode *inode, void *data)
 335 {
 336         return BDEV_I(inode)->bdev.bd_dev == *(dev_t *)data;
 337 }
 338
 339 static int bdev_set(struct inode *inode, void *data)
 340 {
 341         BDEV_I(inode)->bdev.bd_dev = *(dev_t *)data;
 342         return 0;
 343 }
 344
 345 static LIST_HEAD(all_bdevs);
 346
 347 struct block_device *bdget(dev_t dev)
 348 {
 349         struct block_device *bdev;
 350         struct inode *inode;
 351
 352         inode = iget5_locked(bd_mnt->mnt_sb, hash(dev),
 353                         bdev_test, bdev_set, &dev);
 354
 355         if (!inode)
 356                 return NULL;
 357
 358         bdev = &BDEV_I(inode)->bdev;
 359
 360         if (inode->i_state & I_NEW) {
 361                 bdev->bd_contains = NULL;
 362                 bdev->bd_inode = inode;
 363                 bdev->bd_block_size = (1 << inode->i_blkbits);
 364                 bdev->bd_part_count = 0;
 365                 bdev->bd_invalidated = 0;
 366                 inode->i_mode = S_IFBLK;
 367                 inode->i_rdev = dev;
 368                 inode->i_bdev = bdev;
 369                 inode->i_data.a_ops = &def_blk_aops;
 370                 mapping_set_gfp_mask(&inode->i_data, GFP_USER);
 371                 inode->i_data.backing_dev_info = &default_backing_dev_info;
 372                 spin_lock(&bdev_lock);
 373                 list_add(&bdev->bd_list, &all_bdevs);
 374                 spin_unlock(&bdev_lock);
 375                 unlock_new_inode(inode);
 376         }
 377         return bdev;
 378 }
 379
 380 EXPORT_SYMBOL(bdget);
 381
 382 long nr_blockdev_pages(void)
 383 {
 384         struct list_head *p;
 385         long ret = 0;
 386         spin_lock(&bdev_lock);
 387         list_for_each(p, &all_bdevs) {
 388                 struct block_device *bdev;
 389                 bdev = list_entry(p, struct block_device, bd_list);
 390                 ret += bdev->bd_inode->i_mapping->nrpages;
 391         }
 392         spin_unlock(&bdev_lock);
 393         return ret;
 394 }
 395
 396 void bdput(struct block_device *bdev)
 397 {
 398         iput(bdev->bd_inode);
 399 }
 400
 401 EXPORT_SYMBOL(bdput);
 402
 403 static struct block_device *bd_acquire(struct inode *inode)
 404 {
 405         struct block_device *bdev;
 406         spin_lock(&bdev_lock);
 407         bdev = inode->i_bdev;
 408         if (bdev && igrab(bdev->bd_inode)) {
 409                 spin_unlock(&bdev_lock);
 410                 return bdev;
 411         }
 412         spin_unlock(&bdev_lock);
 413         bdev = bdget(inode->i_rdev);
 414         if (bdev) {
 415                 spin_lock(&bdev_lock);
 416                 if (inode->i_bdev)
 417                         __bd_forget(inode);
 418                 inode->i_bdev = bdev;
 419                 inode->i_mapping = bdev->bd_inode->i_mapping;
 420                 list_add(&inode->i_devices, &bdev->bd_inodes);
 421                 spin_unlock(&bdev_lock);
 422         }
 423         return bdev;
 424 }
 425
 426 /* Call when you free inode */
 427
 428 void bd_forget(struct inode *inode)
 429 {
 430         spin_lock(&bdev_lock);
 431         if (inode->i_bdev)
 432                 __bd_forget(inode);
 433         spin_unlock(&bdev_lock);
 434 }
 435
 436 int bd_claim(struct block_device *bdev, void *holder)
 437 {
 438         int res;
 439         spin_lock(&bdev_lock);
 440
 441         /* first decide result */
 442         if (bdev->bd_holder == holder)
 443                 res = 0;         /* already a holder */
 444         else if (bdev->bd_holder != NULL)
 445                 res = -EBUSY;    /* held by someone else */
 446         else if (bdev->bd_contains == bdev)
 447                 res = 0;         /* is a whole device which isn't held */
 448
 449         else if (bdev->bd_contains->bd_holder == bd_claim)
 450                 res = 0;         /* is a partition of a device that is being partitioned */
 451         else if (bdev->bd_contains->bd_holder != NULL)
 452                 res = -EBUSY;    /* is a partition of a held device */
 453         else
 454                 res = 0;         /* is a partition of an un-held device */
 455
 456         /* now impose change */
 457         if (res==0) {
 458                 /* note that for a whole device bd_holders
 459                  * will be incremented twice, and bd_holder will
 460                  * be set to bd_claim before being set to holder
 461                  */
 462                 bdev->bd_contains->bd_holders ++;
 463                 bdev->bd_contains->bd_holder = bd_claim;
 464                 bdev->bd_holders++;
 465                 bdev->bd_holder = holder;
 466         }
 467         spin_unlock(&bdev_lock);
 468         return res;
 469 }
 470
 471 EXPORT_SYMBOL(bd_claim);
 472
 473 void bd_release(struct block_device *bdev)
 474 {
 475         spin_lock(&bdev_lock);
 476         if (!--bdev->bd_contains->bd_holders)
 477                 bdev->bd_contains->bd_holder = NULL;
 478         if (!--bdev->bd_holders)
 479                 bdev->bd_holder = NULL;
 480         spin_unlock(&bdev_lock);
 481 }
 482
 483 EXPORT_SYMBOL(bd_release);
 484
 485 /*
 486  * Tries to open block device by device number.  Use it ONLY if you
 487  * really do not have anything better - i.e. when you are behind a
 488  * truly sucky interface and all you are given is a device number.  _Never_
 489  * to be used for internal purposes.  If you ever need it - reconsider
 490  * your API.
 491  */
 492 struct block_device *open_by_devnum(dev_t dev, unsigned mode)
 493 {
 494         struct block_device *bdev = bdget(dev);
 495         int err = -ENOMEM;
 496         int flags = mode & FMODE_WRITE ? O_RDWR : O_RDONLY;
 497         if (bdev)
 498                 err = blkdev_get(bdev, mode, flags);
 499         return err ? ERR_PTR(err) : bdev;
 500 }
 501
 502 EXPORT_SYMBOL(open_by_devnum);
 503
 504 /*
 505  * This routine checks whether a removable media has been changed,
 506  * and invalidates all buffer-cache-entries in that case. This
 507  * is a relatively slow routine, so we have to try to minimize using
 508  * it. Thus it is called only upon a 'mount' or 'open'. This
 509  * is the best way of combining speed and utility, I think.
 510  * People changing diskettes in the middle of an operation deserve
 511  * to lose :-)
 512  */
 513 int check_disk_change(struct block_device *bdev)
 514 {
 515         struct gendisk *disk = bdev->bd_disk;
 516         struct block_device_operations * bdops = disk->fops;
 517
 518         if (!bdops->media_changed)
 519                 return 0;
 520         if (!bdops->media_changed(bdev->bd_disk))
 521                 return 0;
 522
 523         if (__invalidate_device(bdev, 0))
 524                 printk("VFS: busy inodes on changed media.\n");
 525
 526         if (bdops->revalidate_disk)
 527                 bdops->revalidate_disk(bdev->bd_disk);
 528         if (bdev->bd_disk->minors > 1)
 529                 bdev->bd_invalidated = 1;
 530         return 1;
 531 }
 532
 533 EXPORT_SYMBOL(check_disk_change);
 534
 535 void bd_set_size(struct block_device *bdev, loff_t size)
 536 {
 537         unsigned bsize = bdev_hardsect_size(bdev);
 538
 539         bdev->bd_inode->i_size = size;
 540         while (bsize < PAGE_CACHE_SIZE) {
 541                 if (size & bsize)
 542                         break;
 543                 bsize <<= 1;
 544         }
 545         bdev->bd_block_size = bsize;
 546         bdev->bd_inode->i_blkbits = blksize_bits(bsize);
 547 }
 548 EXPORT_SYMBOL(bd_set_size);
 549
 550 static int do_open(struct block_device *bdev, struct file *file)
 551 {
 552         struct module *owner = NULL;
 553         struct gendisk *disk;
 554         int ret = -ENXIO;
 555         int part;
 556
 557         file->f_mapping = bdev->bd_inode->i_mapping;
 558         lock_kernel();
 559         disk = get_gendisk(bdev->bd_dev, &part);
 560         if (!disk) {
 561                 unlock_kernel();
 562                 bdput(bdev);
 563                 return ret;
 564         }
 565         owner = disk->fops->owner;
 566
 567         down(&bdev->bd_sem);
 568         if (!bdev->bd_openers) {
 569                 bdev->bd_disk = disk;
 570                 bdev->bd_contains = bdev;
 571                 if (!part) {
 572                         struct backing_dev_info *bdi;
 573                         if (disk->fops->open) {
 574                                 ret = disk->fops->open(bdev->bd_inode, file);
 575                                 if (ret)
 576                                         goto out_first;
 577                         }
 578                         if (!bdev->bd_openers) {
 579                                 bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
 580                                 bdi = blk_get_backing_dev_info(bdev);
 581                                 if (bdi == NULL)
 582                                         bdi = &default_backing_dev_info;
 583                                 bdev->bd_inode->i_data.backing_dev_info = bdi;
 584                         }
 585                         if (bdev->bd_invalidated)
 586                                 rescan_partitions(disk, bdev);
 587                 } else {
 588                         struct hd_struct *p;
 589                         struct block_device *whole;
 590                         whole = bdget_disk(disk, 0);
 591                         ret = -ENOMEM;
 592                         if (!whole)
 593                                 goto out_first;
 594                         ret = blkdev_get(whole, file->f_mode, file->f_flags);
 595                         if (ret)
 596                                 goto out_first;
 597                         bdev->bd_contains = whole;
 598                         down(&whole->bd_sem);
 599                         whole->bd_part_count++;
 600                         p = disk->part[part - 1];
 601                         bdev->bd_inode->i_data.backing_dev_info =
 602                            whole->bd_inode->i_data.backing_dev_info;
 603                         if (!(disk->flags & GENHD_FL_UP) || !p || !p->nr_sects) {
 604                                 whole->bd_part_count--;
 605                                 up(&whole->bd_sem);
 606                                 ret = -ENXIO;
 607                                 goto out_first;
 608                         }
 609                         kobject_get(&p->kobj);
 610                         bdev->bd_part = p;
 611                         bd_set_size(bdev, (loff_t) p->nr_sects << 9);
 612                         up(&whole->bd_sem);
 613                 }
 614         } else {
 615                 put_disk(disk);
 616                 module_put(owner);
 617                 if (bdev->bd_contains == bdev) {
 618                         if (bdev->bd_disk->fops->open) {
 619                                 ret = bdev->bd_disk->fops->open(bdev->bd_inode, file);
 620                                 if (ret)
 621                                         goto out;
 622                         }
 623                         if (bdev->bd_invalidated)
 624                                 rescan_partitions(bdev->bd_disk, bdev);
 625                 } else {
 626                         down(&bdev->bd_contains->bd_sem);
 627                         bdev->bd_contains->bd_part_count++;
 628                         up(&bdev->bd_contains->bd_sem);
 629                 }
 630         }
 631         bdev->bd_openers++;
 632         up(&bdev->bd_sem);
 633         unlock_kernel();
 634         return 0;
 635
 636 out_first:
 637         bdev->bd_disk = NULL;
 638         bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
 639         if (bdev != bdev->bd_contains)
 640                 blkdev_put(bdev->bd_contains);
 641         bdev->bd_contains = NULL;
 642         put_disk(disk);
 643         module_put(owner);
 644 out:
 645         up(&bdev->bd_sem);
 646         unlock_kernel();
 647         if (ret)
 648                 bdput(bdev);
 649         return ret;
 650 }
 651
 652 int blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags)
 653 {
 654         /*
 655          * This crockload is due to bad choice of ->open() type.
 656          * It will go away.
 657          * For now, block device ->open() routine must _not_
 658          * examine anything in 'inode' argument except ->i_rdev.
 659          */
 660         struct file fake_file = {};
 661         struct dentry fake_dentry = {};
 662         fake_file.f_mode = mode;
 663         fake_file.f_flags = flags;
 664         fake_file.f_dentry = &fake_dentry;
 665         fake_dentry.d_inode = bdev->bd_inode;
 666
 667         return do_open(bdev, &fake_file);
 668 }
 669
 670 EXPORT_SYMBOL(blkdev_get);
 671
 672 int blkdev_open(struct inode * inode, struct file * filp)
 673 {
 674         struct block_device *bdev;
 675         int res;
 676
 677         /*
 678          * Preserve backwards compatibility and allow large file access
 679          * even if userspace doesn't ask for it explicitly. Some mkfs
 680          * binary needs it. We might want to drop this workaround
 681          * during an unstable branch.
 682          */
 683         filp->f_flags |= O_LARGEFILE;
 684
 685         bdev = bd_acquire(inode);
 686
 687         res = do_open(bdev, filp);
 688         if (res)
 689                 return res;
 690
 691         if (!(filp->f_flags & O_EXCL) )
 692                 return 0;
 693
 694         if (!(res = bd_claim(bdev, filp)))
 695                 return 0;
 696
 697         blkdev_put(bdev);
 698         return res;
 699 }
 700
 701 EXPORT_SYMBOL(blkdev_open);
 702
 703 int blkdev_put(struct block_device *bdev)
 704 {
 705         int ret = 0;
 706         struct inode *bd_inode = bdev->bd_inode;
 707         struct gendisk *disk = bdev->bd_disk;
 708
 709         down(&bdev->bd_sem);
 710         lock_kernel();
 711         if (!--bdev->bd_openers) {
 712                 sync_blockdev(bdev);
 713                 kill_bdev(bdev);
 714         }
 715         if (bdev->bd_contains == bdev) {
 716                 if (disk->fops->release)
 717                         ret = disk->fops->release(bd_inode, NULL);
 718         } else {
 719                 down(&bdev->bd_contains->bd_sem);
 720                 bdev->bd_contains->bd_part_count--;
 721                 up(&bdev->bd_contains->bd_sem);
 722         }
 723         if (!bdev->bd_openers) {
 724                 struct module *owner = disk->fops->owner;
 725
 726                 put_disk(disk);
 727                 module_put(owner);
 728
 729                 if (bdev->bd_contains != bdev) {
 730                         kobject_put(&bdev->bd_part->kobj);
 731                         bdev->bd_part = NULL;
 732                 }
 733                 bdev->bd_disk = NULL;
 734                 bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
 735                 if (bdev != bdev->bd_contains) {
 736                         blkdev_put(bdev->bd_contains);
 737                 }
 738                 bdev->bd_contains = NULL;
 739         }
 740         unlock_kernel();
 741         up(&bdev->bd_sem);
 742         bdput(bdev);
 743         return ret;
 744 }
 745
 746 EXPORT_SYMBOL(blkdev_put);
 747
 748 static int blkdev_close(struct inode * inode, struct file * filp)
 749 {
 750         struct block_device *bdev = I_BDEV(filp->f_mapping->host);
 751         if (bdev->bd_holder == filp)
 752                 bd_release(bdev);
 753         return blkdev_put(bdev);
 754 }
 755
 756 static ssize_t blkdev_file_write(struct file *file, const char __user *buf,
 757                                    size_t count, loff_t *ppos)
 758 {
 759         struct iovec local_iov = { .iov_base = (void __user *)buf, .iov_len = count };
 760
 761         return generic_file_write_nolock(file, &local_iov, 1, ppos);
 762 }
 763
 764 static ssize_t blkdev_file_aio_write(struct kiocb *iocb, const char __user *buf,
 765                                    size_t count, loff_t pos)
 766 {
 767         struct iovec local_iov = { .iov_base = (void __user *)buf, .iov_len = count };
 768
 769         return generic_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos);
 770 }
 771
 772 static int block_ioctl(struct inode *inode, struct file *file, unsigned cmd,
 773                         unsigned long arg)
 774 {
 775         return blkdev_ioctl(file->f_mapping->host, file, cmd, arg);
 776 }
 777
 778 struct address_space_operations def_blk_aops = {
 779         .readpage       = blkdev_readpage,
 780         .writepage      = blkdev_writepage,
 781         .sync_page      = block_sync_page,
 782         .prepare_write  = blkdev_prepare_write,
 783         .commit_write   = blkdev_commit_write,
 784         .writepages     = generic_writepages,
 785         .direct_IO      = blkdev_direct_IO,
 786 };
 787
 788 struct file_operations def_blk_fops = {
 789         .open           = blkdev_open,
 790         .release        = blkdev_close,
 791         .llseek         = block_llseek,
 792         .read           = generic_file_read,
 793         .write          = blkdev_file_write,
 794         .aio_read       = generic_file_aio_read,
 795         .aio_write      = blkdev_file_aio_write,
 796         .mmap           = generic_file_mmap,
 797         .fsync          = block_fsync,
 798         .ioctl          = block_ioctl,
 799         .readv          = generic_file_readv,
 800         .writev         = generic_file_write_nolock,
 801         .sendfile       = generic_file_sendfile,
 802 };
 803
 804 EXPORT_SYMBOL(def_blk_fops);
 805
 806 int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
 807 {
 808         int res;
 809         mm_segment_t old_fs = get_fs();
 810         set_fs(KERNEL_DS);
 811         res = blkdev_ioctl(bdev->bd_inode, NULL, cmd, arg);
 812         set_fs(old_fs);
 813         return res;
 814 }
 815
 816 EXPORT_SYMBOL(ioctl_by_bdev);
 817
 818 /**
 819  * lookup_bdev  - lookup a struct block_device by name
 820  *
 821  * @path:       special file representing the block device
 822  *
 823  * Get a reference to the blockdevice at @path in the current
 824  * namespace if possible and return it.  Return ERR_PTR(error)
 825  * otherwise.
 826  */
 827 struct block_device *lookup_bdev(const char *path)
 828 {
 829         struct block_device *bdev;
 830         struct inode *inode;
 831         struct nameidata nd;
 832         int error;
 833
 834         if (!path || !*path)
 835                 return ERR_PTR(-EINVAL);
 836
 837         error = path_lookup(path, LOOKUP_FOLLOW, &nd);
 838         if (error)
 839                 return ERR_PTR(error);
 840
 841         inode = nd.dentry->d_inode;
 842         error = -ENOTBLK;
 843         if (!S_ISBLK(inode->i_mode))
 844                 goto fail;
 845         error = -EACCES;
 846         if (nd.mnt->mnt_flags & MNT_NODEV)
 847                 goto fail;
 848         error = -ENOMEM;
 849         bdev = bd_acquire(inode);
 850         if (!bdev)
 851                 goto fail;
 852 out:
 853         path_release(&nd);
 854         return bdev;
 855 fail:
 856         bdev = ERR_PTR(error);
 857         goto out;
 858 }
 859
 860 /**
 861  * open_bdev_excl  -  open a block device by name and set it up for use
 862  *
 863  * @path:       special file representing the block device
 864  * @flags:      %MS_RDONLY for opening read-only
 865  * @holder:     owner for exclusion
 866  *
 867  * Open the blockdevice described by the special file at @path, claim it
 868  * for the @holder.
 869  */
 870 struct block_device *open_bdev_excl(const char *path, int flags, void *holder)
 871 {
 872         struct block_device *bdev;
 873         mode_t mode = FMODE_READ;
 874         int error = 0;
 875
 876         bdev = lookup_bdev(path);
 877         if (IS_ERR(bdev))
 878                 return bdev;
 879
 880         if (!(flags & MS_RDONLY))
 881                 mode |= FMODE_WRITE;
 882         error = blkdev_get(bdev, mode, 0);
 883         if (error)
 884                 return ERR_PTR(error);
 885         error = -EACCES;
 886         if (!(flags & MS_RDONLY) && bdev_read_only(bdev))
 887                 goto blkdev_put;
 888         error = bd_claim(bdev, holder);
 889         if (error)
 890                 goto blkdev_put;
 891
 892         return bdev;
 893
 894 blkdev_put:
 895         blkdev_put(bdev);
 896         return ERR_PTR(error);
 897 }
 898
 899 EXPORT_SYMBOL(open_bdev_excl);
 900
 901 /**
 902  * close_bdev_excl  -  release a blockdevice openen by open_bdev_excl()
 903  *
 904  * @bdev:       blockdevice to close
 905  *
 906  * This is the counterpart to open_bdev_excl().
 907  */
 908 void close_bdev_excl(struct block_device *bdev)
 909 {
 910         bd_release(bdev);
 911         blkdev_put(bdev);
 912 }
 913
 914 EXPORT_SYMBOL(close_bdev_excl);