writeback, cgroup: support switching multiple inodes at once (f5fbe6b7) · Commits · EulixOS / Software / Kernel

fs/fs-writeback.c

+64 −42

Original line number	Diff line number	Diff line
		@@ -335,10 +335,18 @@ static struct bdi_writeback inode_to_wb_and_lock_list(struct inode inode)
		}

		struct inode_switch_wbs_context {
		struct inode *inode;
		struct bdi_writeback *new_wb;

		struct rcu_work work;

		/*
		* Multiple inodes can be switched at once. The switching procedure
		* consists of two parts, separated by a RCU grace period. To make
		* sure that the second part is executed for each inode gone through
		* the first part, all inode pointers are placed into a NULL-terminated
		* array embedded into struct inode_switch_wbs_context. Otherwise
		* an inode could be left in a non-consistent state.
		*/
		struct bdi_writeback *new_wb;
		struct inode *inodes[];
		};

		static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi)
		@@ -351,39 +359,15 @@ static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi)
		up_write(&bdi->wb_switch_rwsem);
		}

		static void inode_do_switch_wbs(struct inode *inode,
		static bool inode_do_switch_wbs(struct inode *inode,
		struct bdi_writeback *old_wb,
		struct bdi_writeback *new_wb)
		{
		struct backing_dev_info *bdi = inode_to_bdi(inode);
		struct address_space *mapping = inode->i_mapping;
		struct bdi_writeback *old_wb = inode->i_wb;
		XA_STATE(xas, &mapping->i_pages, 0);
		struct page *page;
		bool switched = false;

		/*
		* If @inode switches cgwb membership while sync_inodes_sb() is
		* being issued, sync_inodes_sb() might miss it. Synchronize.
		*/
		down_read(&bdi->wb_switch_rwsem);

		/*
		* By the time control reaches here, RCU grace period has passed
		* since I_WB_SWITCH assertion and all wb stat update transactions
		* between unlocked_inode_to_wb_begin/end() are guaranteed to be
		* synchronizing against the i_pages lock.
		*
		* Grabbing old_wb->list_lock, inode->i_lock and the i_pages lock
		* gives us exclusion against all wb related operations on @inode
		* including IO list manipulations and stat updates.
		*/
		if (old_wb < new_wb) {
		spin_lock(&old_wb->list_lock);
		spin_lock_nested(&new_wb->list_lock, SINGLE_DEPTH_NESTING);
		} else {
		spin_lock(&new_wb->list_lock);
		spin_lock_nested(&old_wb->list_lock, SINGLE_DEPTH_NESTING);
		}
		spin_lock(&inode->i_lock);
		xa_lock_irq(&mapping->i_pages);

		@@ -458,25 +442,63 @@ static void inode_do_switch_wbs(struct inode *inode,

		xa_unlock_irq(&mapping->i_pages);
		spin_unlock(&inode->i_lock);
		spin_unlock(&new_wb->list_lock);
		spin_unlock(&old_wb->list_lock);

		up_read(&bdi->wb_switch_rwsem);

		if (switched) {
		wb_wakeup(new_wb);
		wb_put(old_wb);
		}
		return switched;
		}

		static void inode_switch_wbs_work_fn(struct work_struct *work)
		{
		struct inode_switch_wbs_context *isw =
		container_of(to_rcu_work(work), struct inode_switch_wbs_context, work);
		struct backing_dev_info *bdi = inode_to_bdi(isw->inodes[0]);
		struct bdi_writeback *old_wb = isw->inodes[0]->i_wb;
		struct bdi_writeback *new_wb = isw->new_wb;
		unsigned long nr_switched = 0;
		struct inode **inodep;

		inode_do_switch_wbs(isw->inode, isw->new_wb);
		wb_put(isw->new_wb);
		iput(isw->inode);
		/*
		* If @inode switches cgwb membership while sync_inodes_sb() is
		* being issued, sync_inodes_sb() might miss it. Synchronize.
		*/
		down_read(&bdi->wb_switch_rwsem);

		/*
		* By the time control reaches here, RCU grace period has passed
		* since I_WB_SWITCH assertion and all wb stat update transactions
		* between unlocked_inode_to_wb_begin/end() are guaranteed to be
		* synchronizing against the i_pages lock.
		*
		* Grabbing old_wb->list_lock, inode->i_lock and the i_pages lock
		* gives us exclusion against all wb related operations on @inode
		* including IO list manipulations and stat updates.
		*/
		if (old_wb < new_wb) {
		spin_lock(&old_wb->list_lock);
		spin_lock_nested(&new_wb->list_lock, SINGLE_DEPTH_NESTING);
		} else {
		spin_lock(&new_wb->list_lock);
		spin_lock_nested(&old_wb->list_lock, SINGLE_DEPTH_NESTING);
		}

		for (inodep = isw->inodes; *inodep; inodep++) {
		WARN_ON_ONCE((*inodep)->i_wb != old_wb);
		if (inode_do_switch_wbs(*inodep, old_wb, new_wb))
		nr_switched++;
		}

		spin_unlock(&new_wb->list_lock);
		spin_unlock(&old_wb->list_lock);

		up_read(&bdi->wb_switch_rwsem);

		if (nr_switched) {
		wb_wakeup(new_wb);
		wb_put_many(old_wb, nr_switched);
		}

		for (inodep = isw->inodes; *inodep; inodep++)
		iput(*inodep);
		wb_put(new_wb);
		kfree(isw);
		atomic_dec(&isw_nr_in_flight);
		}
		@@ -503,7 +525,7 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
		if (atomic_read(&isw_nr_in_flight) > WB_FRN_MAX_IN_FLIGHT)
		return;

		isw = kzalloc(sizeof(*isw), GFP_ATOMIC);
		isw = kzalloc(sizeof(isw) + 2 sizeof(struct inode *), GFP_ATOMIC);
		if (!isw)
		return;

		@@ -530,7 +552,7 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
		__iget(inode);
		spin_unlock(&inode->i_lock);

		isw->inode = inode;
		isw->inodes[0] = inode;

		/*
		* In addition to synchronizing among switchers, I_WB_SWITCH tells

include/linux/backing-dev-defs.h

+16 −2

Original line number	Diff line number	Diff line
		@@ -240,8 +240,9 @@ static inline void wb_get(struct bdi_writeback *wb)
		/**
		* wb_put - decrement a wb's refcount
		* @wb: bdi_writeback to put
		* @nr: number of references to put
		*/
		static inline void wb_put(struct bdi_writeback *wb)
		static inline void wb_put_many(struct bdi_writeback *wb, unsigned long nr)
		{
		if (WARN_ON_ONCE(!wb->bdi)) {
		/*
		@@ -252,7 +253,16 @@ static inline void wb_put(struct bdi_writeback *wb)
		}

		if (wb != &wb->bdi->wb)
		percpu_ref_put(&wb->refcnt);
		percpu_ref_put_many(&wb->refcnt, nr);
		}

		/**
		* wb_put - decrement a wb's refcount
		* @wb: bdi_writeback to put
		*/
		static inline void wb_put(struct bdi_writeback *wb)
		{
		wb_put_many(wb, 1);
		}

		/**
		@@ -281,6 +291,10 @@ static inline void wb_put(struct bdi_writeback *wb)
		{
		}

		static inline void wb_put_many(struct bdi_writeback *wb, unsigned long nr)
		{
		}

		static inline bool wb_dying(struct bdi_writeback *wb)
		{
		return false;