!11094 v4 Backport bugfix of folio from v6.11 (e80cd374) · Commits · EulixOS / Software / Kernel

Documentation/admin-guide/mm/transhuge.rst

+7 −8

Original line number	Diff line number	Diff line
		@@ -213,8 +213,8 @@ possible to enable/disable it by configurate the corresponding bit::

		The kernel could try to enable mappings for different sizes, eg, 64K on
		arm64, BIT0 for file mapping, BIT1 for anonymous mapping, and THP size
		page, BIT3 for anonymous mapping, where 64K anonymous mapping for arm64
		is dependent on BIT3 being turned on, the above feature are disabled by
		page, BIT2 for anonymous mapping, where 2M anonymous mapping for arm64
		is dependent on BIT2 being turned on, the above feature are disabled by
		default, and could enable the above feature by writing the corresponding
		bit to 1::

		@@ -232,12 +232,11 @@ it back by writing 0::
		echo 0 >/sys/kernel/mm/transparent_hugepage/pcp_allow_high_order
		echo 4 >/sys/kernel/mm/transparent_hugepage/pcp_allow_high_order

		khugepaged will be automatically started when one or more hugepage
		sizes are enabled (either by directly setting "always" or "madvise",
		or by setting "inherit" while the top-level enabled is set to "always"
		or "madvise"), and it'll be automatically shutdown when the last
		hugepage size is disabled (either by directly setting "never", or by
		setting "inherit" while the top-level enabled is set to "never").
		khugepaged will be automatically started when PMD-sized THP is enabled
		(either of the per-size anon control or the top-level control are set
		to "always" or "madvise"), and it'll be automatically shutdown when
		PMD-sized THP is disabled (when both the per-size anon control and the
		top-level control are "never")

		Khugepaged controls
		-------------------

fs/ext4/inode.c

+5 −4

Original line number	Diff line number	Diff line
		@@ -3976,18 +3976,19 @@ static int ext4_iomap_write_begin(struct file *file,
		fsdata = delalloc ? (void )0 : (void *)FALL_BACK_TO_NONDELALLOC;

		retry:
		iter.pos = pos;
		iter.len = len;

		folio = iomap_get_folio(&iter, pos, len);
		if (IS_ERR(folio))
		return PTR_ERR(folio);

		WARN_ON_ONCE(pos + len > folio_pos(folio) + folio_size(folio));
		if (pos + len > folio_pos(folio) + folio_size(folio))
		len = folio_pos(folio) + folio_size(folio) - pos;

		if (iomap_is_fully_dirty(folio, offset_in_folio(folio, pos), len))
		goto out;

		iter.pos = pos;
		iter.len = len;

		do {
		int length;

include/linux/huge_mm.h

+9 −15

Original line number	Diff line number	Diff line
		@@ -80,14 +80,20 @@ extern struct kobj_attribute shmem_enabled_attr;
		#define THP_ORDERS_ALL_ANON ((BIT(PMD_ORDER + 1) - 1) & ~(BIT(0) \| BIT(1)))

		/*
		* Mask of all large folio orders supported for file THP.
		* Mask of all large folio orders supported for file THP. Folios in a DAX
		* file is never split and the MAX_PAGECACHE_ORDER limit does not apply to
		* it.
		*/
		#define THP_ORDERS_ALL_FILE (BIT(PMD_ORDER) \| BIT(PUD_ORDER))
		#define THP_ORDERS_ALL_FILE_DAX \
		(BIT(PMD_ORDER) \| BIT(PUD_ORDER))
		#define THP_ORDERS_ALL_FILE_DEFAULT \
		((BIT(MAX_PAGECACHE_ORDER + 1) - 1) & ~BIT(0))

		/*
		* Mask of all large folio orders supported for THP.
		*/
		#define THP_ORDERS_ALL (THP_ORDERS_ALL_ANON \| THP_ORDERS_ALL_FILE)
		#define THP_ORDERS_ALL \
		(THP_ORDERS_ALL_ANON \| THP_ORDERS_ALL_FILE_DAX \| THP_ORDERS_ALL_FILE_DEFAULT)

		#define TVA_SMAPS (1 << 0) /* Will be used for procfs */
		#define TVA_IN_PF (1 << 1) /* Page fault handler */
		@@ -124,18 +130,6 @@ static inline bool hugepage_global_always(void)
		(1<<TRANSPARENT_HUGEPAGE_FLAG);
		}

		static inline bool hugepage_flags_enabled(void)
		{
		/*
		* We cover both the anon and the file-backed case here; we must return
		* true if globally enabled, even when all anon sizes are set to never.
		* So we don't need to look at huge_anon_orders_inherit.
		*/
		return hugepage_global_enabled() \|\|
		huge_anon_orders_always \|\|
		huge_anon_orders_madvise;
		}

		static inline int highest_order(unsigned long orders)
		{
		return fls_long(orders) - 1;

mm/filemap.c

+25 −15

Original line number	Diff line number	Diff line
		@@ -4038,21 +4038,24 @@ ssize_t generic_perform_write(struct kiocb iocb, struct iov_iter i)
		loff_t pos = iocb->ki_pos;
		struct address_space *mapping = file->f_mapping;
		const struct address_space_operations *a_ops = mapping->a_ops;
		size_t chunk = mapping_max_folio_size(mapping);
		long status = 0;
		ssize_t written = 0;

		do {
		struct page *page;
		unsigned long offset; /* Offset into pagecache page */
		unsigned long bytes; /* Bytes to write to page */
		struct folio *folio;
		size_t offset; /* Offset into folio */
		size_t bytes; /* Bytes to write to folio */
		size_t copied; /* Bytes copied from user */
		void *fsdata = NULL;

		offset = (pos & (PAGE_SIZE - 1));
		bytes = min_t(unsigned long, PAGE_SIZE - offset,
		iov_iter_count(i));
		bytes = iov_iter_count(i);
		retry:
		offset = pos & (chunk - 1);
		bytes = min(chunk - offset, bytes);
		balance_dirty_pages_ratelimited(mapping);

		again:
		/*
		* Bring in the user page that we will copy from _first_.
		* Otherwise there's a nasty deadlock on copying from the
		@@ -4074,11 +4077,16 @@ ssize_t generic_perform_write(struct kiocb iocb, struct iov_iter i)
		if (unlikely(status < 0))
		break;

		folio = page_folio(page);
		offset = offset_in_folio(folio, pos);
		if (bytes > folio_size(folio) - offset)
		bytes = folio_size(folio) - offset;

		if (mapping_writably_mapped(mapping))
		flush_dcache_page(page);
		flush_dcache_folio(folio);

		copied = copy_page_from_iter_atomic(page, offset, bytes, i);
		flush_dcache_page(page);
		copied = copy_folio_from_iter_atomic(folio, offset, bytes, i);
		flush_dcache_folio(folio);

		status = a_ops->write_end(file, mapping, pos, bytes, copied,
		page, fsdata);
		@@ -4096,14 +4104,16 @@ ssize_t generic_perform_write(struct kiocb iocb, struct iov_iter i)
		* halfway through, might be a race with munmap,
		* might be severe memory pressure.
		*/
		if (copied)
		if (chunk > PAGE_SIZE)
		chunk /= 2;
		if (copied) {
		bytes = copied;
		goto again;
		goto retry;
		}
		} else {
		pos += status;
		written += status;

		balance_dirty_pages_ratelimited(mapping);
		}
		} while (iov_iter_count(i));

		if (!written)

mm/huge_memory.c

+18 −14

Original line number	Diff line number	Diff line
		@@ -84,9 +84,17 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
		bool smaps = tva_flags & TVA_SMAPS;
		bool in_pf = tva_flags & TVA_IN_PF;
		bool enforce_sysfs = tva_flags & TVA_ENFORCE_SYSFS;
		unsigned long supported_orders;

		/* Check the intersection of requested and supported orders. */
		orders &= vma_is_anonymous(vma) ?
		THP_ORDERS_ALL_ANON : THP_ORDERS_ALL_FILE;
		if (vma_is_anonymous(vma))
		supported_orders = THP_ORDERS_ALL_ANON;
		else if (vma_is_dax(vma))
		supported_orders = THP_ORDERS_ALL_FILE_DAX;
		else
		supported_orders = THP_ORDERS_ALL_FILE_DEFAULT;

		orders &= supported_orders;
		if (!orders)
		return 0;

		@@ -652,6 +660,13 @@ static ssize_t thpsize_enabled_store(struct kobject *kobj,
		} else
		ret = -EINVAL;

		if (ret > 0) {
		int err;

		err = start_stop_khugepaged();
		if (err)
		ret = err;
		}
		return ret;
		}

		@@ -3348,22 +3363,11 @@ int split_huge_page_to_list_to_order(struct page page, struct list_head list,
		return ret;
		}

		void folio_undo_large_rmappable(struct folio *folio)
		void __folio_undo_large_rmappable(struct folio *folio)
		{
		struct deferred_split *ds_queue;
		unsigned long flags;

		if (folio_order(folio) <= 1)
		return;

		/*
		* At this point, there is no one trying to add the folio to
		* deferred_list. If folio is not in deferred_list, it's safe
		* to check without acquiring the split_queue_lock.
		*/
		if (data_race(list_empty(&folio->_deferred_list)))
		return;

		ds_queue = get_deferred_split_queue(folio);
		spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
		if (!list_empty(&folio->_deferred_list)) {