Commit ceff49d9 authored by Yufen Yu's avatar Yufen Yu Committed by Zheng Zengkai
Browse files

md/raid1: fix a race between removing rdev and access conf->mirrors[i].rdev

hulk inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4JYYO?from=project-issue
CVE: NA

---------------------------

We get a NULL pointer dereference oops when test raid1 as follow:

mdadm -CR /dev/md1 -l 1 -n 2 /dev/sd[ab]

mdadm /dev/md1 -f /dev/sda
mdadm /dev/md1 -r /dev/sda
mdadm /dev/md1 -a /dev/sda
sleep 5
mdadm /dev/md1 -f /dev/sdb
mdadm /dev/md1 -r /dev/sdb
mdadm /dev/md1 -a /dev/sdb

After a disk(/dev/sda) has been removed, we add the disk to
raid array again, which would trigger recovery action.
Since the rdev current state is 'spare', read/write bio can
be issued to the disk.

Then we set the other disk (/dev/sdb) faulty. Since the raid
array is now in degraded state and /dev/sdb is the only
'In_sync' disk, raid1_error() will return but without set
faulty success.

However, that can interrupt the recovery action and
md_check_recovery will try to call remove_and_add_spares()
to remove the spare disk. And the race condition between
remove_and_add_spares() and raid1_write_request() in follow
can cause NULL pointer dereference for conf->mirrors[i].rdev:

raid1_write_request()   md_check_recovery    raid1_error()
rcu_read_lock()
rdev != NULL
!test_bit(Faulty, &rdev->flags)

                                           conf->recovery_disabled=
                                             mddev->recovery_disabled;
                                            return busy

                        remove_and_add_spares
                        raid1_remove_disk
                        rdev->nr_pending == 0

atomic_inc(&rdev->nr_pending);
rcu_read_unlock()

                        p->rdev=NULL

conf->mirrors[i].rdev->data_offset
NULL pointer deref!!!

                        if (!test_bit(RemoveSynchronized,
                          &rdev->flags))
                         synchronize_rcu();
                         p->rdev=rdev

To fix the race condition, we add a new flag 'WantRemove' for rdev.
Before access conf->mirrors[i].rdev, we need to ensure the rdev
without 'WantRemove' bit.

Link: https://marc.info/?l=linux-raid&m=156412052717709&w=2



Reported-by: default avatarZou Wei <zou_wei@huawei.com>
Signed-off-by: default avatarYufen Yu <yuyufen@huawei.com>
Confilct:
        drivers/md/md.h
Signed-off-by: default avatarLaibin Qiu <qiulaibin@huawei.com>
Reviewed-by: default avataryuyufen <yuyufen@huawei.com>
Reviewed-by: default avatarJason Yan <yanaijie@huawei.com>
Signed-off-by: default avatarZheng Zengkai <zhengzengkai@huawei.com>
parent 78987190
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -213,6 +213,10 @@ enum flag_bits {
				 * check if there is collision between raid1
				 * serial bios.
				 */
	WantRemove,		/* Before set conf->mirrors[i] as NULL,
				 * we set the bit first, avoiding access the
				 * conf->mirrors[i] after it set NULL.
				 */
};

static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors,
+22 −6
Original line number Diff line number Diff line
@@ -641,7 +641,8 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
		rdev = rcu_dereference(conf->mirrors[disk].rdev);
		if (r1_bio->bios[disk] == IO_BLOCKED
		    || rdev == NULL
		    || test_bit(Faulty, &rdev->flags))
		    || test_bit(Faulty, &rdev->flags)
			|| test_bit(WantRemove, &rdev->flags))
			continue;
		if (!test_bit(In_sync, &rdev->flags) &&
		    rdev->recovery_offset < this_sector + sectors)
@@ -770,7 +771,8 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect

	if (best_disk >= 0) {
		rdev = rcu_dereference(conf->mirrors[best_disk].rdev);
		if (!rdev)
		if (!rdev || test_bit(Faulty, &rdev->flags)
				|| test_bit(WantRemove, &rdev->flags))
			goto retry;
		atomic_inc(&rdev->nr_pending);
		sectors = best_good_sectors;
@@ -1382,7 +1384,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
			break;
		}
		r1_bio->bios[i] = NULL;
		if (!rdev || test_bit(Faulty, &rdev->flags)) {
		if (!rdev || test_bit(Faulty, &rdev->flags)
				|| test_bit(WantRemove, &rdev->flags)) {
			if (i < conf->raid_disks)
				set_bit(R1BIO_Degraded, &r1_bio->state);
			continue;
@@ -1759,6 +1762,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)

			p->head_position = 0;
			rdev->raid_disk = mirror;
			clear_bit(WantRemove, &rdev->flags);
			err = 0;
			/* As all devices are equivalent, we don't need a full recovery
			 * if this was recently any drive of the array
@@ -1773,6 +1777,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
			/* Add this device as a replacement */
			clear_bit(In_sync, &rdev->flags);
			set_bit(Replacement, &rdev->flags);
			clear_bit(WantRemove, &rdev->flags);
			rdev->raid_disk = mirror;
			err = 0;
			conf->fullsync = 1;
@@ -1812,16 +1817,26 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
			err = -EBUSY;
			goto abort;
		}
		p->rdev = NULL;

		/*
		 * Before set p->rdev = NULL, we set WantRemove bit avoiding
		 * race between rdev remove and issue bio, which can cause
		 * NULL pointer deference of rdev by conf->mirrors[i].rdev.
		 */
		set_bit(WantRemove, &rdev->flags);

		if (!test_bit(RemoveSynchronized, &rdev->flags)) {
			synchronize_rcu();
			if (atomic_read(&rdev->nr_pending)) {
				/* lost the race, try later */
				err = -EBUSY;
				p->rdev = rdev;
				clear_bit(WantRemove, &rdev->flags);
				goto abort;
			}
		}

		p->rdev = NULL;

		if (conf->mirrors[conf->raid_disks + number].rdev) {
			/* We just removed a device that is being replaced.
			 * Move down the replacement.  We drain all IO before
@@ -2716,7 +2731,8 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,

		rdev = rcu_dereference(conf->mirrors[i].rdev);
		if (rdev == NULL ||
		    test_bit(Faulty, &rdev->flags)) {
		    test_bit(Faulty, &rdev->flags) ||
			test_bit(WantRemove, &rdev->flags)) {
			if (i < conf->raid_disks)
				still_degraded = 1;
		} else if (!test_bit(In_sync, &rdev->flags)) {