Commit a782483c authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Jens Axboe
Browse files

block: remove the nr_sects field in struct hd_struct



Now that the hd_struct always has a block device attached to it, there is
no need for having two size field that just get out of sync.

Additionally the field in hd_struct did not use proper serialization,
possibly allowing for torn writes.  By only using the block_device field
this problem also gets fixed.

Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: default avatarJan Kara <jack@suse.cz>
Reviewed-by: default avatarHannes Reinecke <hare@suse.de>
Acked-by: Coly Li <colyli@suse.de>			[bcache]
Acked-by: Chao Yu <yuchao0@huawei.com>			[f2fs]
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent e6cb5382
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -613,7 +613,7 @@ void guard_bio_eod(struct bio *bio)
	rcu_read_lock();
	part = __disk_get_part(bio->bi_disk, bio->bi_partno);
	if (part)
		maxsector = part_nr_sects_read(part);
		maxsector = bdev_nr_sectors(part->bdev);
	else	
		maxsector = get_capacity(bio->bi_disk);
	rcu_read_unlock();
+1 −1
Original line number Diff line number Diff line
@@ -755,7 +755,7 @@ static inline int blk_partition_remap(struct bio *bio)
		goto out;

	if (bio_sectors(bio)) {
		if (bio_check_eod(bio, part_nr_sects_read(p)))
		if (bio_check_eod(bio, bdev_nr_sectors(p->bdev)))
			goto out;
		bio->bi_iter.bi_sector += p->start_sect;
		trace_block_bio_remap(bio->bi_disk->queue, bio, part_devt(p),
+0 −53
Original line number Diff line number Diff line
@@ -387,59 +387,6 @@ static inline void hd_free_part(struct hd_struct *part)
	percpu_ref_exit(&part->ref);
}

/*
 * Any access of part->nr_sects which is not protected by partition
 * bd_mutex or gendisk bdev bd_mutex, should be done using this
 * accessor function.
 *
 * Code written along the lines of i_size_read() and i_size_write().
 * CONFIG_PREEMPTION case optimizes the case of UP kernel with preemption
 * on.
 */
static inline sector_t part_nr_sects_read(struct hd_struct *part)
{
#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
	sector_t nr_sects;
	unsigned seq;
	do {
		seq = read_seqcount_begin(&part->nr_sects_seq);
		nr_sects = part->nr_sects;
	} while (read_seqcount_retry(&part->nr_sects_seq, seq));
	return nr_sects;
#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
	sector_t nr_sects;

	preempt_disable();
	nr_sects = part->nr_sects;
	preempt_enable();
	return nr_sects;
#else
	return part->nr_sects;
#endif
}

/*
 * Should be called with mutex lock held (typically bd_mutex) of partition
 * to provide mutual exlusion among writers otherwise seqcount might be
 * left in wrong state leaving the readers spinning infinitely.
 */
static inline void part_nr_sects_write(struct hd_struct *part, sector_t size)
{
#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
	preempt_disable();
	write_seqcount_begin(&part->nr_sects_seq);
	part->nr_sects = size;
	write_seqcount_end(&part->nr_sects_seq);
	preempt_enable();
#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
	preempt_disable();
	part->nr_sects = size;
	preempt_enable();
#else
	part->nr_sects = size;
#endif
}

int bio_add_hw_page(struct request_queue *q, struct bio *bio,
		struct page *page, unsigned int len, unsigned int offset,
		unsigned int max_sectors, bool *same_page);
+35 −24
Original line number Diff line number Diff line
@@ -40,6 +40,16 @@ static void disk_add_events(struct gendisk *disk);
static void disk_del_events(struct gendisk *disk);
static void disk_release_events(struct gendisk *disk);

void set_capacity(struct gendisk *disk, sector_t sectors)
{
	struct block_device *bdev = disk->part0.bdev;

	spin_lock(&bdev->bd_size_lock);
	i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT);
	spin_unlock(&bdev->bd_size_lock);
}
EXPORT_SYMBOL(set_capacity);

/*
 * Set disk capacity and notify if the size is not currently zero and will not
 * be set to zero.  Returns true if a uevent was sent, otherwise false.
@@ -47,18 +57,30 @@ static void disk_release_events(struct gendisk *disk);
bool set_capacity_and_notify(struct gendisk *disk, sector_t size)
{
	sector_t capacity = get_capacity(disk);
	char *envp[] = { "RESIZE=1", NULL };

	set_capacity(disk, size);
	revalidate_disk_size(disk, true);

	if (capacity != size && capacity != 0 && size != 0) {
		char *envp[] = { "RESIZE=1", NULL };
	/*
	 * Only print a message and send a uevent if the gendisk is user visible
	 * and alive.  This avoids spamming the log and udev when setting the
	 * initial capacity during probing.
	 */
	if (size == capacity ||
	    (disk->flags & (GENHD_FL_UP | GENHD_FL_HIDDEN)) != GENHD_FL_UP)
		return false;

		kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp);
		return true;
	}
	pr_info("%s: detected capacity change from %lld to %lld\n",
		disk->disk_name, size, capacity);

	/*
	 * Historically we did not send a uevent for changes to/from an empty
	 * device.
	 */
	if (!capacity || !size)
		return false;
	kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp);
	return true;
}
EXPORT_SYMBOL_GPL(set_capacity_and_notify);

@@ -247,7 +269,7 @@ struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter)
		part = rcu_dereference(ptbl->part[piter->idx]);
		if (!part)
			continue;
		if (!part_nr_sects_read(part) &&
		if (!bdev_nr_sectors(part->bdev) &&
		    !(piter->flags & DISK_PITER_INCL_EMPTY) &&
		    !(piter->flags & DISK_PITER_INCL_EMPTY_PART0 &&
		      piter->idx == 0))
@@ -284,7 +306,7 @@ EXPORT_SYMBOL_GPL(disk_part_iter_exit);
static inline int sector_in_part(struct hd_struct *part, sector_t sector)
{
	return part->start_sect <= sector &&
		sector < part->start_sect + part_nr_sects_read(part);
		sector < part->start_sect + bdev_nr_sectors(part->bdev);
}

/**
@@ -986,8 +1008,8 @@ void __init printk_all_partitions(void)

			printk("%s%s %10llu %s %s", is_part0 ? "" : "  ",
			       bdevt_str(part_devt(part), devt_buf),
			       (unsigned long long)part_nr_sects_read(part) >> 1
			       , disk_name(disk, part->partno, name_buf),
			       bdev_nr_sectors(part->bdev) >> 1,
			       disk_name(disk, part->partno, name_buf),
			       part->info ? part->info->uuid : "");
			if (is_part0) {
				if (dev->parent && dev->parent->driver)
@@ -1079,7 +1101,7 @@ static int show_partition(struct seq_file *seqf, void *v)
	while ((part = disk_part_iter_next(&piter)))
		seq_printf(seqf, "%4d  %7d %10llu %s\n",
			   MAJOR(part_devt(part)), MINOR(part_devt(part)),
			   (unsigned long long)part_nr_sects_read(part) >> 1,
			   bdev_nr_sectors(part->bdev) >> 1,
			   disk_name(sgp, part->partno, buf));
	disk_part_iter_exit(&piter);

@@ -1161,8 +1183,7 @@ ssize_t part_size_show(struct device *dev,
{
	struct hd_struct *p = dev_to_part(dev);

	return sprintf(buf, "%llu\n",
		(unsigned long long)part_nr_sects_read(p));
	return sprintf(buf, "%llu\n", bdev_nr_sectors(p->bdev));
}

ssize_t part_stat_show(struct device *dev,
@@ -1618,16 +1639,6 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
	ptbl = rcu_dereference_protected(disk->part_tbl, 1);
	rcu_assign_pointer(ptbl->part[0], &disk->part0);

	/*
	 * set_capacity() and get_capacity() currently don't use
	 * seqcounter to read/update the part0->nr_sects. Still init
	 * the counter as we can read the sectors in IO submission
	 * patch using seqence counters.
	 *
	 * TODO: Ideally set_capacity() and get_capacity() should be
	 * converted to make use of bd_mutex and sequence counters.
	 */
	hd_sects_seq_init(&disk->part0);
	if (hd_ref_init(&disk->part0))
		goto out_free_bdstats;

+11 −6
Original line number Diff line number Diff line
@@ -85,6 +85,13 @@ static int (*check_part[])(struct parsed_partitions *) = {
	NULL
};

static void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors)
{
	spin_lock(&bdev->bd_size_lock);
	i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT);
	spin_unlock(&bdev->bd_size_lock);
}

static struct parsed_partitions *allocate_partitions(struct gendisk *hd)
{
	struct parsed_partitions *state;
@@ -295,7 +302,7 @@ static void hd_struct_free_work(struct work_struct *work)
	put_device(disk_to_dev(disk));

	part->start_sect = 0;
	part->nr_sects = 0;
	bdev_set_nr_sectors(part->bdev, 0);
	part_stat_set_all(part, 0);
	put_device(part_to_dev(part));
}
@@ -412,11 +419,10 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno,
		goto out_free_stats;
	p->bdev = bdev;

	hd_sects_seq_init(p);
	pdev = part_to_dev(p);

	p->start_sect = start;
	p->nr_sects = len;
	bdev_set_nr_sectors(bdev, len);
	p->partno = partno;
	p->policy = get_disk_ro(disk);

@@ -509,7 +515,7 @@ static bool partition_overlaps(struct gendisk *disk, sector_t start,
	disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY);
	while ((part = disk_part_iter_next(&piter))) {
		if (part->partno == skip_partno ||
		    start >= part->start_sect + part->nr_sects ||
		    start >= part->start_sect + bdev_nr_sectors(part->bdev) ||
		    start + length <= part->start_sect)
			continue;
		overlap = true;
@@ -600,8 +606,7 @@ int bdev_resize_partition(struct block_device *bdev, int partno,
	if (partition_overlaps(bdev->bd_disk, start, length, partno))
		goto out_unlock;

	part_nr_sects_write(part, length);
	bd_set_nr_sectors(bdevp, length);
	bdev_set_nr_sectors(bdevp, length);

	ret = 0;
out_unlock:
Loading