X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=include%2Flinux%2Fraid%2Fmd_k.h;h=e2df61f5b09a464ae66d11a95dae453da4e258fc;hb=cc169158e75d370ff961e5653a326a7ee1688b6b;hp=c9a0d4013be7c0273a17424b0820cdd16e945fc8;hpb=207e0a826fdee4bfe853681aef2175a739c11286;p=linux-2.6.git diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index c9a0d4013..e2df61f5b 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -15,62 +15,22 @@ #ifndef _MD_K_H #define _MD_K_H -#define MD_RESERVED 0UL -#define LINEAR 1UL -#define RAID0 2UL -#define RAID1 3UL -#define RAID5 4UL -#define TRANSLUCENT 5UL -#define HSM 6UL -#define MULTIPATH 7UL -#define RAID6 8UL -#define RAID10 9UL -#define FAULTY 10UL -#define MAX_PERSONALITY 11UL +/* and dm-bio-list.h is not under include/linux because.... ??? */ +#include "../../../drivers/md/dm-bio-list.h" #define LEVEL_MULTIPATH (-4) #define LEVEL_LINEAR (-1) #define LEVEL_FAULTY (-5) +/* we need a value for 'no level specified' and 0 + * means 'raid0', so we need something else. This is + * for internal use only + */ +#define LEVEL_NONE (-1000000) + #define MaxSector (~(sector_t)0) #define MD_THREAD_NAME_MAX 14 -static inline int pers_to_level (int pers) -{ - switch (pers) { - case FAULTY: return LEVEL_FAULTY; - case MULTIPATH: return LEVEL_MULTIPATH; - case HSM: return -3; - case TRANSLUCENT: return -2; - case LINEAR: return LEVEL_LINEAR; - case RAID0: return 0; - case RAID1: return 1; - case RAID5: return 5; - case RAID6: return 6; - case RAID10: return 10; - } - BUG(); - return MD_RESERVED; -} - -static inline int level_to_pers (int level) -{ - switch (level) { - case LEVEL_FAULTY: return FAULTY; - case LEVEL_MULTIPATH: return MULTIPATH; - case -3: return HSM; - case -2: return TRANSLUCENT; - case LEVEL_LINEAR: return LINEAR; - case 0: return RAID0; - case 1: return RAID1; - case 4: - case 5: return RAID5; - case 6: return RAID6; - case 10: return RAID10; - } - return MD_RESERVED; -} - typedef struct mddev_s mddev_t; typedef struct mdk_rdev_s mdk_rdev_t; @@ -82,70 +42,6 @@ typedef struct mdk_rdev_s mdk_rdev_t; #define MAX_CHUNK_SIZE (4096*1024) -/* - * default readahead - */ - -static inline int disk_faulty(mdp_disk_t * d) -{ - return d->state & (1 << MD_DISK_FAULTY); -} - -static inline int disk_active(mdp_disk_t * d) -{ - return d->state & (1 << MD_DISK_ACTIVE); -} - -static inline int disk_sync(mdp_disk_t * d) -{ - return d->state & (1 << MD_DISK_SYNC); -} - -static inline int disk_spare(mdp_disk_t * d) -{ - return !disk_sync(d) && !disk_active(d) && !disk_faulty(d); -} - -static inline int disk_removed(mdp_disk_t * d) -{ - return d->state & (1 << MD_DISK_REMOVED); -} - -static inline void mark_disk_faulty(mdp_disk_t * d) -{ - d->state |= (1 << MD_DISK_FAULTY); -} - -static inline void mark_disk_active(mdp_disk_t * d) -{ - d->state |= (1 << MD_DISK_ACTIVE); -} - -static inline void mark_disk_sync(mdp_disk_t * d) -{ - d->state |= (1 << MD_DISK_SYNC); -} - -static inline void mark_disk_spare(mdp_disk_t * d) -{ - d->state = 0; -} - -static inline void mark_disk_removed(mdp_disk_t * d) -{ - d->state = (1 << MD_DISK_FAULTY) | (1 << MD_DISK_REMOVED); -} - -static inline void mark_disk_inactive(mdp_disk_t * d) -{ - d->state &= ~(1 << MD_DISK_ACTIVE); -} - -static inline void mark_disk_nonsync(mdp_disk_t * d) -{ - d->state &= ~(1 << MD_DISK_SYNC); -} - /* * MD's 'extended' device */ @@ -163,8 +59,11 @@ struct mdk_rdev_s int sb_loaded; sector_t data_offset; /* start of data in array */ sector_t sb_offset; + int sb_size; /* bytes in the superblock */ int preferred_minor; /* autorun support */ + struct kobject kobj; + /* A device can be in one of three states based on two flags: * Not working: faulty==1 in_sync==0 * Fully working: faulty==0 in_sync==1 @@ -175,24 +74,37 @@ struct mdk_rdev_s * It can never have faulty==1, in_sync==1 * This reduces the burden of testing multiple flags in many cases */ - int faulty; /* if faulty do not issue IO requests */ - int in_sync; /* device is a full member of the array */ + + unsigned long flags; +#define Faulty 1 /* device is known to have a fault */ +#define In_sync 2 /* device is in_sync with rest of array */ +#define WriteMostly 4 /* Avoid reading if at all possible */ +#define BarriersNotsupp 5 /* BIO_RW_BARRIER is not supported */ int desc_nr; /* descriptor index in the superblock */ int raid_disk; /* role of device in array */ + int saved_raid_disk; /* role that device used to have in the + * array and could again if we did a partial + * resync from the bitmap + */ atomic_t nr_pending; /* number of pending requests. * only maintained for arrays that * support hot removal */ + atomic_t read_errors; /* number of consecutive read errors that + * we have tried to ignore. + */ + atomic_t corrected_errors; /* number of corrected read errors, + * for reporting to userspace and storing + * in superblock. + */ }; -typedef struct mdk_personality_s mdk_personality_t; - struct mddev_s { void *private; - mdk_personality_t *pers; + struct mdk_personality *pers; dev_t unit; int md_minor; struct list_head disks; @@ -201,6 +113,8 @@ struct mddev_s struct gendisk *gendisk; + struct kobject kobj; + /* Superblock information */ int major_version, minor_version, @@ -209,6 +123,7 @@ struct mddev_s int chunk_size; time_t ctime, utime; int level, layout; + char clevel[16]; int raid_disks; int max_disks; sector_t size; /* used size of component devices */ @@ -217,6 +132,14 @@ struct mddev_s char uuid[16]; + /* If the array is being reshaped, we need to record the + * new shape and an indication of where we are up to. + * This is written to the superblock. + * If reshape_position is MaxSector, then no reshape is happening (yet). + */ + sector_t reshape_position; + int delta_disks, new_level, new_layout, new_chunk; + struct mdk_thread_s *thread; /* management thread */ struct mdk_thread_s *sync_thread; /* doing resync or reconstruct */ sector_t curr_resync; /* blocks scheduled */ @@ -224,6 +147,19 @@ struct mddev_s sector_t resync_mark_cnt;/* blocks written at resync_mark */ sector_t resync_max_sectors; /* may be set by personality */ + + sector_t resync_mismatches; /* count of sectors where + * parity/replica mismatch found + */ + + /* allow user-space to request suspension of IO to regions of the array */ + sector_t suspend_lo; + sector_t suspend_hi; + /* if zero, use the system-wide default */ + int sync_speed_min; + int sync_speed_max; + + int ok_start_degraded; /* recovery/resync flags * NEEDED: we might need to start a resync/recover * RUNNING: a thread is running, or about to be started @@ -231,6 +167,11 @@ struct mddev_s * ERR: and IO error was detected - abort the resync/recovery * INTR: someone requested a (clean) early abort. * DONE: thread is done and is waiting to be reaped + * REQUEST: user-space has requested a sync (used with SYNC) + * CHECK: user-space request for for check-only, no repair + * RESHAPE: A reshape is happening + * + * If neither SYNC or RESHAPE are set, then it is a recovery. */ #define MD_RECOVERY_RUNNING 0 #define MD_RECOVERY_SYNC 1 @@ -238,20 +179,35 @@ struct mddev_s #define MD_RECOVERY_INTR 3 #define MD_RECOVERY_DONE 4 #define MD_RECOVERY_NEEDED 5 +#define MD_RECOVERY_REQUESTED 6 +#define MD_RECOVERY_CHECK 7 +#define MD_RECOVERY_RESHAPE 8 unsigned long recovery; int in_sync; /* know to not need resync */ - struct semaphore reconfig_sem; + struct mutex reconfig_mutex; atomic_t active; int changed; /* true if we might need to reread partition info */ int degraded; /* whether md should consider * adding a spare */ + int barriers_work; /* initialised to true, cleared as soon + * as a barrier request to slave + * fails. Only supported + */ + struct bio *biolist; /* bios that need to be retried + * because BIO_RW_BARRIER is not supported + */ atomic_t recovery_active; /* blocks scheduled, but not written */ wait_queue_head_t recovery_wait; sector_t recovery_cp; + + spinlock_t write_lock; + wait_queue_head_t sb_wait; /* for waiting on superblock updates */ + atomic_t pending_writes; /* number of active superblock writes */ + unsigned int safemode; /* if set, update "clean" superblock * when no writes pending. */ @@ -260,13 +216,27 @@ struct mddev_s atomic_t writes_pending; request_queue_t *queue; /* for plugging ... */ + atomic_t write_behind; /* outstanding async IO */ + unsigned int max_write_behind; /* 0 = sync */ + + struct bitmap *bitmap; /* the bitmap for the device */ + struct file *bitmap_file; /* the bitmap file */ + long bitmap_offset; /* offset from superblock of + * start of bitmap. May be + * negative, but not '0' + */ + long default_bitmap_offset; /* this is the offset to use when + * hot-adding a bitmap. It should + * eventually be settable by sysfs. + */ + struct list_head all_mddevs; }; static inline void rdev_dec_pending(mdk_rdev_t *rdev, mddev_t *mddev) { - int faulty = rdev->faulty; + int faulty = test_bit(Faulty, &rdev->flags); if (atomic_dec_and_test(&rdev->nr_pending) && faulty) set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); } @@ -276,9 +246,11 @@ static inline void md_sync_acct(struct block_device *bdev, unsigned long nr_sect atomic_add(nr_sectors, &bdev->bd_contains->bd_disk->sync_io); } -struct mdk_personality_s +struct mdk_personality { char *name; + int level; + struct list_head list; struct module *owner; int (*make_request)(request_queue_t *q, struct bio *bio); int (*run)(mddev_t *mddev); @@ -291,10 +263,24 @@ struct mdk_personality_s int (*hot_add_disk) (mddev_t *mddev, mdk_rdev_t *rdev); int (*hot_remove_disk) (mddev_t *mddev, int number); int (*spare_active) (mddev_t *mddev); - int (*sync_request)(mddev_t *mddev, sector_t sector_nr, int go_faster); + sector_t (*sync_request)(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster); int (*resize) (mddev_t *mddev, sector_t sectors); - int (*reshape) (mddev_t *mddev, int raid_disks); + int (*check_reshape) (mddev_t *mddev); + int (*start_reshape) (mddev_t *mddev); int (*reconfig) (mddev_t *mddev, int layout, int chunk_size); + /* quiesce moves between quiescence states + * 0 - fully active + * 1 - no new requests allowed + * others - reserved + */ + void (*quiesce) (mddev_t *mddev, int state); +}; + + +struct md_sysfs_entry { + struct attribute attr; + ssize_t (*show)(mddev_t *, char *); + ssize_t (*store)(mddev_t *, const char *, size_t); }; @@ -303,8 +289,6 @@ static inline char * mdname (mddev_t * mddev) return mddev->gendisk ? mddev->gendisk->disk_name : "mdX"; } -extern mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr); - /* * iterates through some rdev ringlist. It's safe to remove the * current 'rdev'. Dont touch 'tmp' though. @@ -332,9 +316,8 @@ typedef struct mdk_thread_s { mddev_t *mddev; wait_queue_head_t wqueue; unsigned long flags; - struct completion *event; struct task_struct *tsk; - const char *name; + unsigned long timeout; } mdk_thread_t; #define THREAD_WAKEUP 0 @@ -365,5 +348,10 @@ do { \ __wait_event_lock_irq(wq, condition, lock, cmd); \ } while (0) +static inline void safe_put_page(struct page *p) +{ + if (p) put_page(p); +} + #endif