Loading drivers/md/bitmap.c +98 −68 Original line number Diff line number Diff line Loading @@ -27,6 +27,7 @@ #include <linux/mount.h> #include <linux/buffer_head.h> #include <linux/seq_file.h> #include <trace/events/block.h> #include "md.h" #include "bitmap.h" Loading Loading @@ -208,11 +209,13 @@ static struct md_rdev *next_active_rdev(struct md_rdev *rdev, struct mddev *mdde static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait) { struct md_rdev *rdev = NULL; struct md_rdev *rdev; struct block_device *bdev; struct mddev *mddev = bitmap->mddev; struct bitmap_storage *store = &bitmap->storage; restart: rdev = NULL; while ((rdev = next_active_rdev(rdev, mddev)) != NULL) { int size = PAGE_SIZE; loff_t offset = mddev->bitmap_info.offset; Loading Loading @@ -268,8 +271,8 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait) page); } if (wait) md_super_wait(mddev); if (wait && md_super_wait(mddev) < 0) goto restart; return 0; bad_alignment: Loading Loading @@ -405,7 +408,7 @@ static int read_page(struct file *file, unsigned long index, ret = -EIO; out: if (ret) printk(KERN_ALERT "md: bitmap read error: (%dB @ %llu): %d\n", pr_err("md: bitmap read error: (%dB @ %llu): %d\n", (int)PAGE_SIZE, (unsigned long long)index << PAGE_SHIFT, ret); Loading @@ -416,6 +419,28 @@ static int read_page(struct file *file, unsigned long index, * bitmap file superblock operations */ /* * bitmap_wait_writes() should be called before writing any bitmap * blocks, to ensure previous writes, particularly from * bitmap_daemon_work(), have completed. */ static void bitmap_wait_writes(struct bitmap *bitmap) { if (bitmap->storage.file) wait_event(bitmap->write_wait, atomic_read(&bitmap->pending_writes)==0); else /* Note that we ignore the return value. The writes * might have failed, but that would just mean that * some bits which should be cleared haven't been, * which is safe. The relevant bitmap blocks will * probably get written again, but there is no great * loss if they aren't. */ md_super_wait(bitmap->mddev); } /* update the event counter and sync the superblock to disk */ void bitmap_update_sb(struct bitmap *bitmap) { Loading Loading @@ -455,24 +480,24 @@ void bitmap_print_sb(struct bitmap *bitmap) if (!bitmap || !bitmap->storage.sb_page) return; sb = kmap_atomic(bitmap->storage.sb_page); printk(KERN_DEBUG "%s: bitmap file superblock:\n", bmname(bitmap)); printk(KERN_DEBUG " magic: %08x\n", le32_to_cpu(sb->magic)); printk(KERN_DEBUG " version: %d\n", le32_to_cpu(sb->version)); printk(KERN_DEBUG " uuid: %08x.%08x.%08x.%08x\n", pr_debug("%s: bitmap file superblock:\n", bmname(bitmap)); pr_debug(" magic: %08x\n", le32_to_cpu(sb->magic)); pr_debug(" version: %d\n", le32_to_cpu(sb->version)); pr_debug(" uuid: %08x.%08x.%08x.%08x\n", *(__u32 *)(sb->uuid+0), *(__u32 *)(sb->uuid+4), *(__u32 *)(sb->uuid+8), *(__u32 *)(sb->uuid+12)); printk(KERN_DEBUG " events: %llu\n", pr_debug(" events: %llu\n", (unsigned long long) le64_to_cpu(sb->events)); printk(KERN_DEBUG "events cleared: %llu\n", pr_debug("events cleared: %llu\n", (unsigned long long) le64_to_cpu(sb->events_cleared)); printk(KERN_DEBUG " state: %08x\n", le32_to_cpu(sb->state)); printk(KERN_DEBUG " chunksize: %d B\n", le32_to_cpu(sb->chunksize)); printk(KERN_DEBUG " daemon sleep: %ds\n", le32_to_cpu(sb->daemon_sleep)); printk(KERN_DEBUG " sync size: %llu KB\n", pr_debug(" state: %08x\n", le32_to_cpu(sb->state)); pr_debug(" chunksize: %d B\n", le32_to_cpu(sb->chunksize)); pr_debug(" daemon sleep: %ds\n", le32_to_cpu(sb->daemon_sleep)); pr_debug(" sync size: %llu KB\n", (unsigned long long)le64_to_cpu(sb->sync_size)/2); printk(KERN_DEBUG "max write behind: %d\n", le32_to_cpu(sb->write_behind)); pr_debug("max write behind: %d\n", le32_to_cpu(sb->write_behind)); kunmap_atomic(sb); } Loading Loading @@ -506,14 +531,14 @@ static int bitmap_new_disk_sb(struct bitmap *bitmap) BUG_ON(!chunksize); if (!is_power_of_2(chunksize)) { kunmap_atomic(sb); printk(KERN_ERR "bitmap chunksize not a power of 2\n"); pr_warn("bitmap chunksize not a power of 2\n"); return -EINVAL; } sb->chunksize = cpu_to_le32(chunksize); daemon_sleep = bitmap->mddev->bitmap_info.daemon_sleep; if (!daemon_sleep || (daemon_sleep > MAX_SCHEDULE_TIMEOUT)) { printk(KERN_INFO "Choosing daemon_sleep default (5 sec)\n"); pr_debug("Choosing daemon_sleep default (5 sec)\n"); daemon_sleep = 5 * HZ; } sb->daemon_sleep = cpu_to_le32(daemon_sleep); Loading Loading @@ -584,7 +609,7 @@ static int bitmap_read_sb(struct bitmap *bitmap) /* to 4k blocks */ bm_blocks = DIV_ROUND_UP_SECTOR_T(bm_blocks, 4096); offset = bitmap->mddev->bitmap_info.offset + (bitmap->cluster_slot * (bm_blocks << 3)); pr_info("%s:%d bm slot: %d offset: %llu\n", __func__, __LINE__, pr_debug("%s:%d bm slot: %d offset: %llu\n", __func__, __LINE__, bitmap->cluster_slot, offset); } Loading Loading @@ -634,7 +659,7 @@ static int bitmap_read_sb(struct bitmap *bitmap) else if (write_behind > COUNTER_MAX) reason = "write-behind limit out of range (0 - 16383)"; if (reason) { printk(KERN_INFO "%s: invalid bitmap file superblock: %s\n", pr_warn("%s: invalid bitmap file superblock: %s\n", bmname(bitmap), reason); goto out; } Loading @@ -648,16 +673,13 @@ static int bitmap_read_sb(struct bitmap *bitmap) * bitmap's UUID and event counter to the mddev's */ if (memcmp(sb->uuid, bitmap->mddev->uuid, 16)) { printk(KERN_INFO "%s: bitmap superblock UUID mismatch\n", pr_warn("%s: bitmap superblock UUID mismatch\n", bmname(bitmap)); goto out; } events = le64_to_cpu(sb->events); if (!nodes && (events < bitmap->mddev->events)) { printk(KERN_INFO "%s: bitmap file is out of date (%llu < %llu) " "-- forcing full recovery\n", pr_warn("%s: bitmap file is out of date (%llu < %llu) -- forcing full recovery\n", bmname(bitmap), events, (unsigned long long) bitmap->mddev->events); set_bit(BITMAP_STALE, &bitmap->flags); Loading @@ -679,7 +701,7 @@ static int bitmap_read_sb(struct bitmap *bitmap) if (err == 0 && nodes && (bitmap->cluster_slot < 0)) { err = md_setup_cluster(bitmap->mddev, nodes); if (err) { pr_err("%s: Could not setup cluster service (%d)\n", pr_warn("%s: Could not setup cluster service (%d)\n", bmname(bitmap), err); goto out_no_sb; } Loading Loading @@ -847,14 +869,12 @@ static void bitmap_file_kick(struct bitmap *bitmap) ptr = file_path(bitmap->storage.file, path, PAGE_SIZE); printk(KERN_ALERT "%s: kicking failed bitmap file %s from array!\n", pr_warn("%s: kicking failed bitmap file %s from array!\n", bmname(bitmap), IS_ERR(ptr) ? "" : ptr); kfree(path); } else printk(KERN_ALERT "%s: disabling internal bitmap due to errors\n", pr_warn("%s: disabling internal bitmap due to errors\n", bmname(bitmap)); } } Loading Loading @@ -983,6 +1003,7 @@ void bitmap_unplug(struct bitmap *bitmap) { unsigned long i; int dirty, need_write; int writing = 0; if (!bitmap || !bitmap->storage.filemap || test_bit(BITMAP_STALE, &bitmap->flags)) Loading @@ -997,15 +1018,19 @@ void bitmap_unplug(struct bitmap *bitmap) need_write = test_and_clear_page_attr(bitmap, i, BITMAP_PAGE_NEEDWRITE); if (dirty || need_write) { if (!writing) { bitmap_wait_writes(bitmap); if (bitmap->mddev->queue) blk_add_trace_msg(bitmap->mddev->queue, "md bitmap_unplug"); } clear_page_attr(bitmap, i, BITMAP_PAGE_PENDING); write_page(bitmap, bitmap->storage.filemap[i], 0); writing = 1; } } if (bitmap->storage.file) wait_event(bitmap->write_wait, atomic_read(&bitmap->pending_writes)==0); else md_super_wait(bitmap->mddev); if (writing) bitmap_wait_writes(bitmap); if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) bitmap_file_kick(bitmap); Loading Loading @@ -1056,11 +1081,10 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) outofdate = test_bit(BITMAP_STALE, &bitmap->flags); if (outofdate) printk(KERN_INFO "%s: bitmap file is out of date, doing full " "recovery\n", bmname(bitmap)); pr_warn("%s: bitmap file is out of date, doing full recovery\n", bmname(bitmap)); if (file && i_size_read(file->f_mapping->host) < store->bytes) { printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n", pr_warn("%s: bitmap file too short %lu < %lu\n", bmname(bitmap), (unsigned long) i_size_read(file->f_mapping->host), store->bytes); Loading Loading @@ -1137,15 +1161,14 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) offset = 0; } printk(KERN_INFO "%s: bitmap initialized from disk: " "read %lu pages, set %lu of %lu bits\n", pr_debug("%s: bitmap initialized from disk: read %lu pages, set %lu of %lu bits\n", bmname(bitmap), store->file_pages, bit_cnt, chunks); return 0; err: printk(KERN_INFO "%s: bitmap initialisation failed: %d\n", pr_warn("%s: bitmap initialisation failed: %d\n", bmname(bitmap), ret); return ret; } Loading Loading @@ -1225,6 +1248,10 @@ void bitmap_daemon_work(struct mddev *mddev) } bitmap->allclean = 1; if (bitmap->mddev->queue) blk_add_trace_msg(bitmap->mddev->queue, "md bitmap_daemon_work"); /* Any file-page which is PENDING now needs to be written. * So set NEEDWRITE now, then after we make any last-minute changes * we will write it. Loading Loading @@ -1289,6 +1316,7 @@ void bitmap_daemon_work(struct mddev *mddev) } spin_unlock_irq(&counts->lock); bitmap_wait_writes(bitmap); /* Now start writeout on any page in NEEDWRITE that isn't DIRTY. * DIRTY pages need to be written by bitmap_unplug so it can wait * for them. Loading Loading @@ -1595,7 +1623,7 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector, bool force) atomic_read(&bitmap->mddev->recovery_active) == 0); bitmap->mddev->curr_resync_completed = sector; set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags); set_bit(MD_SB_CHANGE_CLEAN, &bitmap->mddev->sb_flags); sector &= ~((1ULL << bitmap->counts.chunkshift) - 1); s = 0; while (s < sector && s < bitmap->mddev->resync_max_sectors) { Loading Loading @@ -1825,7 +1853,7 @@ struct bitmap *bitmap_create(struct mddev *mddev, int slot) if (err) goto error; printk(KERN_INFO "created bitmap (%lu pages) for device %s\n", pr_debug("created bitmap (%lu pages) for device %s\n", bitmap->counts.pages, bmname(bitmap)); err = test_bit(BITMAP_WRITE_ERROR, &bitmap->flags) ? -EIO : 0; Loading Loading @@ -2029,8 +2057,10 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks, !bitmap->mddev->bitmap_info.external, mddev_is_clustered(bitmap->mddev) ? bitmap->cluster_slot : 0); if (ret) if (ret) { bitmap_file_unmap(&store); goto err; } pages = DIV_ROUND_UP(chunks, PAGE_COUNTER_RATIO); Loading Loading @@ -2089,7 +2119,7 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks, bitmap->mddev->bitmap_info.chunksize = 1 << (old_counts.chunkshift + BITMAP_BLOCK_SHIFT); blocks = old_counts.chunks << old_counts.chunkshift; pr_err("Could not pre-allocate in-memory bitmap for cluster raid\n"); pr_warn("Could not pre-allocate in-memory bitmap for cluster raid\n"); break; } else bitmap->counts.bp[page].count += 1; Loading Loading @@ -2266,7 +2296,7 @@ location_store(struct mddev *mddev, const char *buf, size_t len) /* Ensure new bitmap info is stored in * metadata promptly. */ set_bit(MD_CHANGE_DEVS, &mddev->flags); set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); md_wakeup_thread(mddev->thread); } rv = 0; Loading drivers/md/dm-raid.c +2 −2 Original line number Diff line number Diff line Loading @@ -2011,7 +2011,7 @@ static int super_load(struct md_rdev *rdev, struct md_rdev *refdev) sb->compat_features = cpu_to_le32(FEATURE_FLAG_SUPPORTS_V190); /* Force writing of superblocks to disk */ set_bit(MD_CHANGE_DEVS, &rdev->mddev->flags); set_bit(MD_SB_CHANGE_DEVS, &rdev->mddev->sb_flags); /* Any superblock is better than none, choose that if given */ return refdev ? 0 : 1; Loading Loading @@ -3497,7 +3497,7 @@ static void rs_update_sbs(struct raid_set *rs) struct mddev *mddev = &rs->md; int ro = mddev->ro; set_bit(MD_CHANGE_DEVS, &mddev->flags); set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); mddev->ro = 0; md_update_sb(mddev, 1); mddev->ro = ro; Loading drivers/md/linear.c +17 −14 Original line number Diff line number Diff line Loading @@ -21,6 +21,7 @@ #include <linux/seq_file.h> #include <linux/module.h> #include <linux/slab.h> #include <trace/events/block.h> #include "md.h" #include "linear.h" Loading Loading @@ -101,7 +102,7 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks) sector_t sectors; if (j < 0 || j >= raid_disks || disk->rdev) { printk(KERN_ERR "md/linear:%s: disk numbering problem. Aborting!\n", pr_warn("md/linear:%s: disk numbering problem. Aborting!\n", mdname(mddev)); goto out; } Loading @@ -123,7 +124,7 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks) discard_supported = true; } if (cnt != raid_disks) { printk(KERN_ERR "md/linear:%s: not enough drives present. Aborting!\n", pr_warn("md/linear:%s: not enough drives present. Aborting!\n", mdname(mddev)); goto out; } Loading Loading @@ -227,22 +228,22 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio) } do { tmp_dev = which_dev(mddev, bio->bi_iter.bi_sector); sector_t bio_sector = bio->bi_iter.bi_sector; tmp_dev = which_dev(mddev, bio_sector); start_sector = tmp_dev->end_sector - tmp_dev->rdev->sectors; end_sector = tmp_dev->end_sector; data_offset = tmp_dev->rdev->data_offset; bio->bi_bdev = tmp_dev->rdev->bdev; if (unlikely(bio->bi_iter.bi_sector >= end_sector || bio->bi_iter.bi_sector < start_sector)) if (unlikely(bio_sector >= end_sector || bio_sector < start_sector)) goto out_of_bounds; if (unlikely(bio_end_sector(bio) > end_sector)) { /* This bio crosses a device boundary, so we have to * split it. */ split = bio_split(bio, end_sector - bio->bi_iter.bi_sector, split = bio_split(bio, end_sector - bio_sector, GFP_NOIO, fs_bio_set); bio_chain(split, bio); } else { Loading @@ -256,15 +257,18 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio) !blk_queue_discard(bdev_get_queue(split->bi_bdev)))) { /* Just ignore it */ bio_endio(split); } else } else { if (mddev->gendisk) trace_block_bio_remap(bdev_get_queue(split->bi_bdev), split, disk_devt(mddev->gendisk), bio_sector); generic_make_request(split); } } while (split != bio); return; out_of_bounds: printk(KERN_ERR "md/linear:%s: make_request: Sector %llu out of bounds on " "dev %s: %llu sectors, offset %llu\n", pr_err("md/linear:%s: make_request: Sector %llu out of bounds on dev %s: %llu sectors, offset %llu\n", mdname(mddev), (unsigned long long)bio->bi_iter.bi_sector, bdevname(tmp_dev->rdev->bdev, b), Loading @@ -275,7 +279,6 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio) static void linear_status (struct seq_file *seq, struct mddev *mddev) { seq_printf(seq, " %dk rounding", mddev->chunk_sectors / 2); } Loading drivers/md/md.c +364 −337 File changed.Preview size limit exceeded, changes collapsed. Show changes drivers/md/md.h +67 −41 Original line number Diff line number Diff line Loading @@ -29,6 +29,16 @@ #define MaxSector (~(sector_t)0) /* * These flags should really be called "NO_RETRY" rather than * "FAILFAST" because they don't make any promise about time lapse, * only about the number of retries, which will be zero. * REQ_FAILFAST_DRIVER is not included because * Commit: 4a27446f3e39 ("[SCSI] modify scsi to handle new fail fast flags.") * seems to suggest that the errors it avoids retrying should usually * be retried. */ #define MD_FAILFAST (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT) /* * MD's 'extended' device */ Loading Loading @@ -168,6 +178,19 @@ enum flag_bits { * so it is safe to remove without * another synchronize_rcu() call. */ ExternalBbl, /* External metadata provides bad * block management for a disk */ FailFast, /* Minimal retries should be attempted on * this device, so use REQ_FAILFAST_DEV. * Also don't try to repair failed reads. * It is expects that no bad block log * is present. */ LastDev, /* Seems to be the last working dev as * it didn't fail, so don't use FailFast * any more for metadata */ }; static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors, Loading @@ -189,6 +212,31 @@ extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors, int is_new); struct md_cluster_info; enum mddev_flags { MD_ARRAY_FIRST_USE, /* First use of array, needs initialization */ MD_CLOSING, /* If set, we are closing the array, do not open * it then */ MD_JOURNAL_CLEAN, /* A raid with journal is already clean */ MD_HAS_JOURNAL, /* The raid array has journal feature set */ MD_RELOAD_SB, /* Reload the superblock because another node * updated it. */ MD_CLUSTER_RESYNC_LOCKED, /* cluster raid only, which means node * already took resync lock, need to * release the lock */ MD_FAILFAST_SUPPORTED, /* Using MD_FAILFAST on metadata writes is * supported as calls to md_error() will * never cause the array to become failed. */ }; enum mddev_sb_flags { MD_SB_CHANGE_DEVS, /* Some device status has changed */ MD_SB_CHANGE_CLEAN, /* transition to or from 'clean' */ MD_SB_CHANGE_PENDING, /* switch from 'clean' to 'active' in progress */ MD_SB_NEED_REWRITE, /* metadata write needs to be repeated */ }; struct mddev { void *private; struct md_personality *pers; Loading @@ -196,21 +244,7 @@ struct mddev { int md_minor; struct list_head disks; unsigned long flags; #define MD_CHANGE_DEVS 0 /* Some device status has changed */ #define MD_CHANGE_CLEAN 1 /* transition to or from 'clean' */ #define MD_CHANGE_PENDING 2 /* switch from 'clean' to 'active' in progress */ #define MD_UPDATE_SB_FLAGS (1 | 2 | 4) /* If these are set, md_update_sb needed */ #define MD_ARRAY_FIRST_USE 3 /* First use of array, needs initialization */ #define MD_CLOSING 4 /* If set, we are closing the array, do not open * it then */ #define MD_JOURNAL_CLEAN 5 /* A raid with journal is already clean */ #define MD_HAS_JOURNAL 6 /* The raid array has journal feature set */ #define MD_RELOAD_SB 7 /* Reload the superblock because another node * updated it. */ #define MD_CLUSTER_RESYNC_LOCKED 8 /* cluster raid only, which means node * already took resync lock, need to * release the lock */ unsigned long sb_flags; int suspended; atomic_t active_io; Loading Loading @@ -304,31 +338,6 @@ struct mddev { int parallel_resync; int ok_start_degraded; /* recovery/resync flags * NEEDED: we might need to start a resync/recover * RUNNING: a thread is running, or about to be started * SYNC: actually doing a resync, not a recovery * RECOVER: doing recovery, or need to try it. * INTR: resync needs to be aborted for some reason * DONE: thread is done and is waiting to be reaped * REQUEST: user-space has requested a sync (used with SYNC) * CHECK: user-space request for check-only, no repair * RESHAPE: A reshape is happening * ERROR: sync-action interrupted because io-error * * If neither SYNC or RESHAPE are set, then it is a recovery. */ #define MD_RECOVERY_RUNNING 0 #define MD_RECOVERY_SYNC 1 #define MD_RECOVERY_RECOVER 2 #define MD_RECOVERY_INTR 3 #define MD_RECOVERY_DONE 4 #define MD_RECOVERY_NEEDED 5 #define MD_RECOVERY_REQUESTED 6 #define MD_RECOVERY_CHECK 7 #define MD_RECOVERY_RESHAPE 8 #define MD_RECOVERY_FROZEN 9 #define MD_RECOVERY_ERROR 10 unsigned long recovery; /* If a RAID personality determines that recovery (of a particular Loading Loading @@ -442,6 +451,23 @@ struct mddev { unsigned int good_device_nr; /* good device num within cluster raid */ }; enum recovery_flags { /* * If neither SYNC or RESHAPE are set, then it is a recovery. */ MD_RECOVERY_RUNNING, /* a thread is running, or about to be started */ MD_RECOVERY_SYNC, /* actually doing a resync, not a recovery */ MD_RECOVERY_RECOVER, /* doing recovery, or need to try it. */ MD_RECOVERY_INTR, /* resync needs to be aborted for some reason */ MD_RECOVERY_DONE, /* thread is done and is waiting to be reaped */ MD_RECOVERY_NEEDED, /* we might need to start a resync/recover */ MD_RECOVERY_REQUESTED, /* user-space has requested a sync (used with SYNC) */ MD_RECOVERY_CHECK, /* user-space request for check-only, no repair */ MD_RECOVERY_RESHAPE, /* A reshape is happening */ MD_RECOVERY_FROZEN, /* User request to abort, and not restart, any action */ MD_RECOVERY_ERROR, /* sync-action interrupted because io-error */ }; static inline int __must_check mddev_lock(struct mddev *mddev) { return mutex_lock_interruptible(&mddev->reconfig_mutex); Loading Loading @@ -623,7 +649,7 @@ extern int mddev_congested(struct mddev *mddev, int bits); extern void md_flush_request(struct mddev *mddev, struct bio *bio); extern void md_super_write(struct mddev *mddev, struct md_rdev *rdev, sector_t sector, int size, struct page *page); extern void md_super_wait(struct mddev *mddev); extern int md_super_wait(struct mddev *mddev); extern int sync_page_io(struct md_rdev *rdev, sector_t sector, int size, struct page *page, int op, int op_flags, bool metadata_op); Loading Loading
drivers/md/bitmap.c +98 −68 Original line number Diff line number Diff line Loading @@ -27,6 +27,7 @@ #include <linux/mount.h> #include <linux/buffer_head.h> #include <linux/seq_file.h> #include <trace/events/block.h> #include "md.h" #include "bitmap.h" Loading Loading @@ -208,11 +209,13 @@ static struct md_rdev *next_active_rdev(struct md_rdev *rdev, struct mddev *mdde static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait) { struct md_rdev *rdev = NULL; struct md_rdev *rdev; struct block_device *bdev; struct mddev *mddev = bitmap->mddev; struct bitmap_storage *store = &bitmap->storage; restart: rdev = NULL; while ((rdev = next_active_rdev(rdev, mddev)) != NULL) { int size = PAGE_SIZE; loff_t offset = mddev->bitmap_info.offset; Loading Loading @@ -268,8 +271,8 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait) page); } if (wait) md_super_wait(mddev); if (wait && md_super_wait(mddev) < 0) goto restart; return 0; bad_alignment: Loading Loading @@ -405,7 +408,7 @@ static int read_page(struct file *file, unsigned long index, ret = -EIO; out: if (ret) printk(KERN_ALERT "md: bitmap read error: (%dB @ %llu): %d\n", pr_err("md: bitmap read error: (%dB @ %llu): %d\n", (int)PAGE_SIZE, (unsigned long long)index << PAGE_SHIFT, ret); Loading @@ -416,6 +419,28 @@ static int read_page(struct file *file, unsigned long index, * bitmap file superblock operations */ /* * bitmap_wait_writes() should be called before writing any bitmap * blocks, to ensure previous writes, particularly from * bitmap_daemon_work(), have completed. */ static void bitmap_wait_writes(struct bitmap *bitmap) { if (bitmap->storage.file) wait_event(bitmap->write_wait, atomic_read(&bitmap->pending_writes)==0); else /* Note that we ignore the return value. The writes * might have failed, but that would just mean that * some bits which should be cleared haven't been, * which is safe. The relevant bitmap blocks will * probably get written again, but there is no great * loss if they aren't. */ md_super_wait(bitmap->mddev); } /* update the event counter and sync the superblock to disk */ void bitmap_update_sb(struct bitmap *bitmap) { Loading Loading @@ -455,24 +480,24 @@ void bitmap_print_sb(struct bitmap *bitmap) if (!bitmap || !bitmap->storage.sb_page) return; sb = kmap_atomic(bitmap->storage.sb_page); printk(KERN_DEBUG "%s: bitmap file superblock:\n", bmname(bitmap)); printk(KERN_DEBUG " magic: %08x\n", le32_to_cpu(sb->magic)); printk(KERN_DEBUG " version: %d\n", le32_to_cpu(sb->version)); printk(KERN_DEBUG " uuid: %08x.%08x.%08x.%08x\n", pr_debug("%s: bitmap file superblock:\n", bmname(bitmap)); pr_debug(" magic: %08x\n", le32_to_cpu(sb->magic)); pr_debug(" version: %d\n", le32_to_cpu(sb->version)); pr_debug(" uuid: %08x.%08x.%08x.%08x\n", *(__u32 *)(sb->uuid+0), *(__u32 *)(sb->uuid+4), *(__u32 *)(sb->uuid+8), *(__u32 *)(sb->uuid+12)); printk(KERN_DEBUG " events: %llu\n", pr_debug(" events: %llu\n", (unsigned long long) le64_to_cpu(sb->events)); printk(KERN_DEBUG "events cleared: %llu\n", pr_debug("events cleared: %llu\n", (unsigned long long) le64_to_cpu(sb->events_cleared)); printk(KERN_DEBUG " state: %08x\n", le32_to_cpu(sb->state)); printk(KERN_DEBUG " chunksize: %d B\n", le32_to_cpu(sb->chunksize)); printk(KERN_DEBUG " daemon sleep: %ds\n", le32_to_cpu(sb->daemon_sleep)); printk(KERN_DEBUG " sync size: %llu KB\n", pr_debug(" state: %08x\n", le32_to_cpu(sb->state)); pr_debug(" chunksize: %d B\n", le32_to_cpu(sb->chunksize)); pr_debug(" daemon sleep: %ds\n", le32_to_cpu(sb->daemon_sleep)); pr_debug(" sync size: %llu KB\n", (unsigned long long)le64_to_cpu(sb->sync_size)/2); printk(KERN_DEBUG "max write behind: %d\n", le32_to_cpu(sb->write_behind)); pr_debug("max write behind: %d\n", le32_to_cpu(sb->write_behind)); kunmap_atomic(sb); } Loading Loading @@ -506,14 +531,14 @@ static int bitmap_new_disk_sb(struct bitmap *bitmap) BUG_ON(!chunksize); if (!is_power_of_2(chunksize)) { kunmap_atomic(sb); printk(KERN_ERR "bitmap chunksize not a power of 2\n"); pr_warn("bitmap chunksize not a power of 2\n"); return -EINVAL; } sb->chunksize = cpu_to_le32(chunksize); daemon_sleep = bitmap->mddev->bitmap_info.daemon_sleep; if (!daemon_sleep || (daemon_sleep > MAX_SCHEDULE_TIMEOUT)) { printk(KERN_INFO "Choosing daemon_sleep default (5 sec)\n"); pr_debug("Choosing daemon_sleep default (5 sec)\n"); daemon_sleep = 5 * HZ; } sb->daemon_sleep = cpu_to_le32(daemon_sleep); Loading Loading @@ -584,7 +609,7 @@ static int bitmap_read_sb(struct bitmap *bitmap) /* to 4k blocks */ bm_blocks = DIV_ROUND_UP_SECTOR_T(bm_blocks, 4096); offset = bitmap->mddev->bitmap_info.offset + (bitmap->cluster_slot * (bm_blocks << 3)); pr_info("%s:%d bm slot: %d offset: %llu\n", __func__, __LINE__, pr_debug("%s:%d bm slot: %d offset: %llu\n", __func__, __LINE__, bitmap->cluster_slot, offset); } Loading Loading @@ -634,7 +659,7 @@ static int bitmap_read_sb(struct bitmap *bitmap) else if (write_behind > COUNTER_MAX) reason = "write-behind limit out of range (0 - 16383)"; if (reason) { printk(KERN_INFO "%s: invalid bitmap file superblock: %s\n", pr_warn("%s: invalid bitmap file superblock: %s\n", bmname(bitmap), reason); goto out; } Loading @@ -648,16 +673,13 @@ static int bitmap_read_sb(struct bitmap *bitmap) * bitmap's UUID and event counter to the mddev's */ if (memcmp(sb->uuid, bitmap->mddev->uuid, 16)) { printk(KERN_INFO "%s: bitmap superblock UUID mismatch\n", pr_warn("%s: bitmap superblock UUID mismatch\n", bmname(bitmap)); goto out; } events = le64_to_cpu(sb->events); if (!nodes && (events < bitmap->mddev->events)) { printk(KERN_INFO "%s: bitmap file is out of date (%llu < %llu) " "-- forcing full recovery\n", pr_warn("%s: bitmap file is out of date (%llu < %llu) -- forcing full recovery\n", bmname(bitmap), events, (unsigned long long) bitmap->mddev->events); set_bit(BITMAP_STALE, &bitmap->flags); Loading @@ -679,7 +701,7 @@ static int bitmap_read_sb(struct bitmap *bitmap) if (err == 0 && nodes && (bitmap->cluster_slot < 0)) { err = md_setup_cluster(bitmap->mddev, nodes); if (err) { pr_err("%s: Could not setup cluster service (%d)\n", pr_warn("%s: Could not setup cluster service (%d)\n", bmname(bitmap), err); goto out_no_sb; } Loading Loading @@ -847,14 +869,12 @@ static void bitmap_file_kick(struct bitmap *bitmap) ptr = file_path(bitmap->storage.file, path, PAGE_SIZE); printk(KERN_ALERT "%s: kicking failed bitmap file %s from array!\n", pr_warn("%s: kicking failed bitmap file %s from array!\n", bmname(bitmap), IS_ERR(ptr) ? "" : ptr); kfree(path); } else printk(KERN_ALERT "%s: disabling internal bitmap due to errors\n", pr_warn("%s: disabling internal bitmap due to errors\n", bmname(bitmap)); } } Loading Loading @@ -983,6 +1003,7 @@ void bitmap_unplug(struct bitmap *bitmap) { unsigned long i; int dirty, need_write; int writing = 0; if (!bitmap || !bitmap->storage.filemap || test_bit(BITMAP_STALE, &bitmap->flags)) Loading @@ -997,15 +1018,19 @@ void bitmap_unplug(struct bitmap *bitmap) need_write = test_and_clear_page_attr(bitmap, i, BITMAP_PAGE_NEEDWRITE); if (dirty || need_write) { if (!writing) { bitmap_wait_writes(bitmap); if (bitmap->mddev->queue) blk_add_trace_msg(bitmap->mddev->queue, "md bitmap_unplug"); } clear_page_attr(bitmap, i, BITMAP_PAGE_PENDING); write_page(bitmap, bitmap->storage.filemap[i], 0); writing = 1; } } if (bitmap->storage.file) wait_event(bitmap->write_wait, atomic_read(&bitmap->pending_writes)==0); else md_super_wait(bitmap->mddev); if (writing) bitmap_wait_writes(bitmap); if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) bitmap_file_kick(bitmap); Loading Loading @@ -1056,11 +1081,10 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) outofdate = test_bit(BITMAP_STALE, &bitmap->flags); if (outofdate) printk(KERN_INFO "%s: bitmap file is out of date, doing full " "recovery\n", bmname(bitmap)); pr_warn("%s: bitmap file is out of date, doing full recovery\n", bmname(bitmap)); if (file && i_size_read(file->f_mapping->host) < store->bytes) { printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n", pr_warn("%s: bitmap file too short %lu < %lu\n", bmname(bitmap), (unsigned long) i_size_read(file->f_mapping->host), store->bytes); Loading Loading @@ -1137,15 +1161,14 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) offset = 0; } printk(KERN_INFO "%s: bitmap initialized from disk: " "read %lu pages, set %lu of %lu bits\n", pr_debug("%s: bitmap initialized from disk: read %lu pages, set %lu of %lu bits\n", bmname(bitmap), store->file_pages, bit_cnt, chunks); return 0; err: printk(KERN_INFO "%s: bitmap initialisation failed: %d\n", pr_warn("%s: bitmap initialisation failed: %d\n", bmname(bitmap), ret); return ret; } Loading Loading @@ -1225,6 +1248,10 @@ void bitmap_daemon_work(struct mddev *mddev) } bitmap->allclean = 1; if (bitmap->mddev->queue) blk_add_trace_msg(bitmap->mddev->queue, "md bitmap_daemon_work"); /* Any file-page which is PENDING now needs to be written. * So set NEEDWRITE now, then after we make any last-minute changes * we will write it. Loading Loading @@ -1289,6 +1316,7 @@ void bitmap_daemon_work(struct mddev *mddev) } spin_unlock_irq(&counts->lock); bitmap_wait_writes(bitmap); /* Now start writeout on any page in NEEDWRITE that isn't DIRTY. * DIRTY pages need to be written by bitmap_unplug so it can wait * for them. Loading Loading @@ -1595,7 +1623,7 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector, bool force) atomic_read(&bitmap->mddev->recovery_active) == 0); bitmap->mddev->curr_resync_completed = sector; set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags); set_bit(MD_SB_CHANGE_CLEAN, &bitmap->mddev->sb_flags); sector &= ~((1ULL << bitmap->counts.chunkshift) - 1); s = 0; while (s < sector && s < bitmap->mddev->resync_max_sectors) { Loading Loading @@ -1825,7 +1853,7 @@ struct bitmap *bitmap_create(struct mddev *mddev, int slot) if (err) goto error; printk(KERN_INFO "created bitmap (%lu pages) for device %s\n", pr_debug("created bitmap (%lu pages) for device %s\n", bitmap->counts.pages, bmname(bitmap)); err = test_bit(BITMAP_WRITE_ERROR, &bitmap->flags) ? -EIO : 0; Loading Loading @@ -2029,8 +2057,10 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks, !bitmap->mddev->bitmap_info.external, mddev_is_clustered(bitmap->mddev) ? bitmap->cluster_slot : 0); if (ret) if (ret) { bitmap_file_unmap(&store); goto err; } pages = DIV_ROUND_UP(chunks, PAGE_COUNTER_RATIO); Loading Loading @@ -2089,7 +2119,7 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks, bitmap->mddev->bitmap_info.chunksize = 1 << (old_counts.chunkshift + BITMAP_BLOCK_SHIFT); blocks = old_counts.chunks << old_counts.chunkshift; pr_err("Could not pre-allocate in-memory bitmap for cluster raid\n"); pr_warn("Could not pre-allocate in-memory bitmap for cluster raid\n"); break; } else bitmap->counts.bp[page].count += 1; Loading Loading @@ -2266,7 +2296,7 @@ location_store(struct mddev *mddev, const char *buf, size_t len) /* Ensure new bitmap info is stored in * metadata promptly. */ set_bit(MD_CHANGE_DEVS, &mddev->flags); set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); md_wakeup_thread(mddev->thread); } rv = 0; Loading
drivers/md/dm-raid.c +2 −2 Original line number Diff line number Diff line Loading @@ -2011,7 +2011,7 @@ static int super_load(struct md_rdev *rdev, struct md_rdev *refdev) sb->compat_features = cpu_to_le32(FEATURE_FLAG_SUPPORTS_V190); /* Force writing of superblocks to disk */ set_bit(MD_CHANGE_DEVS, &rdev->mddev->flags); set_bit(MD_SB_CHANGE_DEVS, &rdev->mddev->sb_flags); /* Any superblock is better than none, choose that if given */ return refdev ? 0 : 1; Loading Loading @@ -3497,7 +3497,7 @@ static void rs_update_sbs(struct raid_set *rs) struct mddev *mddev = &rs->md; int ro = mddev->ro; set_bit(MD_CHANGE_DEVS, &mddev->flags); set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); mddev->ro = 0; md_update_sb(mddev, 1); mddev->ro = ro; Loading
drivers/md/linear.c +17 −14 Original line number Diff line number Diff line Loading @@ -21,6 +21,7 @@ #include <linux/seq_file.h> #include <linux/module.h> #include <linux/slab.h> #include <trace/events/block.h> #include "md.h" #include "linear.h" Loading Loading @@ -101,7 +102,7 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks) sector_t sectors; if (j < 0 || j >= raid_disks || disk->rdev) { printk(KERN_ERR "md/linear:%s: disk numbering problem. Aborting!\n", pr_warn("md/linear:%s: disk numbering problem. Aborting!\n", mdname(mddev)); goto out; } Loading @@ -123,7 +124,7 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks) discard_supported = true; } if (cnt != raid_disks) { printk(KERN_ERR "md/linear:%s: not enough drives present. Aborting!\n", pr_warn("md/linear:%s: not enough drives present. Aborting!\n", mdname(mddev)); goto out; } Loading Loading @@ -227,22 +228,22 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio) } do { tmp_dev = which_dev(mddev, bio->bi_iter.bi_sector); sector_t bio_sector = bio->bi_iter.bi_sector; tmp_dev = which_dev(mddev, bio_sector); start_sector = tmp_dev->end_sector - tmp_dev->rdev->sectors; end_sector = tmp_dev->end_sector; data_offset = tmp_dev->rdev->data_offset; bio->bi_bdev = tmp_dev->rdev->bdev; if (unlikely(bio->bi_iter.bi_sector >= end_sector || bio->bi_iter.bi_sector < start_sector)) if (unlikely(bio_sector >= end_sector || bio_sector < start_sector)) goto out_of_bounds; if (unlikely(bio_end_sector(bio) > end_sector)) { /* This bio crosses a device boundary, so we have to * split it. */ split = bio_split(bio, end_sector - bio->bi_iter.bi_sector, split = bio_split(bio, end_sector - bio_sector, GFP_NOIO, fs_bio_set); bio_chain(split, bio); } else { Loading @@ -256,15 +257,18 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio) !blk_queue_discard(bdev_get_queue(split->bi_bdev)))) { /* Just ignore it */ bio_endio(split); } else } else { if (mddev->gendisk) trace_block_bio_remap(bdev_get_queue(split->bi_bdev), split, disk_devt(mddev->gendisk), bio_sector); generic_make_request(split); } } while (split != bio); return; out_of_bounds: printk(KERN_ERR "md/linear:%s: make_request: Sector %llu out of bounds on " "dev %s: %llu sectors, offset %llu\n", pr_err("md/linear:%s: make_request: Sector %llu out of bounds on dev %s: %llu sectors, offset %llu\n", mdname(mddev), (unsigned long long)bio->bi_iter.bi_sector, bdevname(tmp_dev->rdev->bdev, b), Loading @@ -275,7 +279,6 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio) static void linear_status (struct seq_file *seq, struct mddev *mddev) { seq_printf(seq, " %dk rounding", mddev->chunk_sectors / 2); } Loading
drivers/md/md.h +67 −41 Original line number Diff line number Diff line Loading @@ -29,6 +29,16 @@ #define MaxSector (~(sector_t)0) /* * These flags should really be called "NO_RETRY" rather than * "FAILFAST" because they don't make any promise about time lapse, * only about the number of retries, which will be zero. * REQ_FAILFAST_DRIVER is not included because * Commit: 4a27446f3e39 ("[SCSI] modify scsi to handle new fail fast flags.") * seems to suggest that the errors it avoids retrying should usually * be retried. */ #define MD_FAILFAST (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT) /* * MD's 'extended' device */ Loading Loading @@ -168,6 +178,19 @@ enum flag_bits { * so it is safe to remove without * another synchronize_rcu() call. */ ExternalBbl, /* External metadata provides bad * block management for a disk */ FailFast, /* Minimal retries should be attempted on * this device, so use REQ_FAILFAST_DEV. * Also don't try to repair failed reads. * It is expects that no bad block log * is present. */ LastDev, /* Seems to be the last working dev as * it didn't fail, so don't use FailFast * any more for metadata */ }; static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors, Loading @@ -189,6 +212,31 @@ extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors, int is_new); struct md_cluster_info; enum mddev_flags { MD_ARRAY_FIRST_USE, /* First use of array, needs initialization */ MD_CLOSING, /* If set, we are closing the array, do not open * it then */ MD_JOURNAL_CLEAN, /* A raid with journal is already clean */ MD_HAS_JOURNAL, /* The raid array has journal feature set */ MD_RELOAD_SB, /* Reload the superblock because another node * updated it. */ MD_CLUSTER_RESYNC_LOCKED, /* cluster raid only, which means node * already took resync lock, need to * release the lock */ MD_FAILFAST_SUPPORTED, /* Using MD_FAILFAST on metadata writes is * supported as calls to md_error() will * never cause the array to become failed. */ }; enum mddev_sb_flags { MD_SB_CHANGE_DEVS, /* Some device status has changed */ MD_SB_CHANGE_CLEAN, /* transition to or from 'clean' */ MD_SB_CHANGE_PENDING, /* switch from 'clean' to 'active' in progress */ MD_SB_NEED_REWRITE, /* metadata write needs to be repeated */ }; struct mddev { void *private; struct md_personality *pers; Loading @@ -196,21 +244,7 @@ struct mddev { int md_minor; struct list_head disks; unsigned long flags; #define MD_CHANGE_DEVS 0 /* Some device status has changed */ #define MD_CHANGE_CLEAN 1 /* transition to or from 'clean' */ #define MD_CHANGE_PENDING 2 /* switch from 'clean' to 'active' in progress */ #define MD_UPDATE_SB_FLAGS (1 | 2 | 4) /* If these are set, md_update_sb needed */ #define MD_ARRAY_FIRST_USE 3 /* First use of array, needs initialization */ #define MD_CLOSING 4 /* If set, we are closing the array, do not open * it then */ #define MD_JOURNAL_CLEAN 5 /* A raid with journal is already clean */ #define MD_HAS_JOURNAL 6 /* The raid array has journal feature set */ #define MD_RELOAD_SB 7 /* Reload the superblock because another node * updated it. */ #define MD_CLUSTER_RESYNC_LOCKED 8 /* cluster raid only, which means node * already took resync lock, need to * release the lock */ unsigned long sb_flags; int suspended; atomic_t active_io; Loading Loading @@ -304,31 +338,6 @@ struct mddev { int parallel_resync; int ok_start_degraded; /* recovery/resync flags * NEEDED: we might need to start a resync/recover * RUNNING: a thread is running, or about to be started * SYNC: actually doing a resync, not a recovery * RECOVER: doing recovery, or need to try it. * INTR: resync needs to be aborted for some reason * DONE: thread is done and is waiting to be reaped * REQUEST: user-space has requested a sync (used with SYNC) * CHECK: user-space request for check-only, no repair * RESHAPE: A reshape is happening * ERROR: sync-action interrupted because io-error * * If neither SYNC or RESHAPE are set, then it is a recovery. */ #define MD_RECOVERY_RUNNING 0 #define MD_RECOVERY_SYNC 1 #define MD_RECOVERY_RECOVER 2 #define MD_RECOVERY_INTR 3 #define MD_RECOVERY_DONE 4 #define MD_RECOVERY_NEEDED 5 #define MD_RECOVERY_REQUESTED 6 #define MD_RECOVERY_CHECK 7 #define MD_RECOVERY_RESHAPE 8 #define MD_RECOVERY_FROZEN 9 #define MD_RECOVERY_ERROR 10 unsigned long recovery; /* If a RAID personality determines that recovery (of a particular Loading Loading @@ -442,6 +451,23 @@ struct mddev { unsigned int good_device_nr; /* good device num within cluster raid */ }; enum recovery_flags { /* * If neither SYNC or RESHAPE are set, then it is a recovery. */ MD_RECOVERY_RUNNING, /* a thread is running, or about to be started */ MD_RECOVERY_SYNC, /* actually doing a resync, not a recovery */ MD_RECOVERY_RECOVER, /* doing recovery, or need to try it. */ MD_RECOVERY_INTR, /* resync needs to be aborted for some reason */ MD_RECOVERY_DONE, /* thread is done and is waiting to be reaped */ MD_RECOVERY_NEEDED, /* we might need to start a resync/recover */ MD_RECOVERY_REQUESTED, /* user-space has requested a sync (used with SYNC) */ MD_RECOVERY_CHECK, /* user-space request for check-only, no repair */ MD_RECOVERY_RESHAPE, /* A reshape is happening */ MD_RECOVERY_FROZEN, /* User request to abort, and not restart, any action */ MD_RECOVERY_ERROR, /* sync-action interrupted because io-error */ }; static inline int __must_check mddev_lock(struct mddev *mddev) { return mutex_lock_interruptible(&mddev->reconfig_mutex); Loading Loading @@ -623,7 +649,7 @@ extern int mddev_congested(struct mddev *mddev, int bits); extern void md_flush_request(struct mddev *mddev, struct bio *bio); extern void md_super_write(struct mddev *mddev, struct md_rdev *rdev, sector_t sector, int size, struct page *page); extern void md_super_wait(struct mddev *mddev); extern int md_super_wait(struct mddev *mddev); extern int sync_page_io(struct md_rdev *rdev, sector_t sector, int size, struct page *page, int op, int op_flags, bool metadata_op); Loading