Commit e700898f authored by Mike Kravetz's avatar Mike Kravetz Committed by Andrew Morton
Browse files

hugetlb: really allocate vma lock for all sharable vmas

Commit bbff39cc ("hugetlb: allocate vma lock for all sharable vmas")
removed the pmd sharable checks in the vma lock helper routines.  However,
it left the functional version of helper routines behind #ifdef
CONFIG_ARCH_WANT_HUGE_PMD_SHARE.  Therefore, the vma lock is not being
used for sharable vmas on architectures that do not support pmd sharing. 
On these architectures, a potential fault/truncation race is exposed that
could leave pages in a hugetlb file past i_size until the file is removed.

Move the functional vma lock helpers outside the ifdef, and remove the
non-functional stubs.  Since the vma lock is not just for pmd sharing,
rename the routine __vma_shareable_flags_pmd.

Link: https://lkml.kernel.org/r/20221212235042.178355-1-mike.kravetz@oracle.com


Fixes: bbff39cc ("hugetlb: allocate vma lock for all sharable vmas")
Signed-off-by: default avatarMike Kravetz <mike.kravetz@oracle.com>
Reviewed-by: default avatarMiaohe Lin <linmiaohe@huawei.com>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: James Houghton <jthoughton@google.com>
Cc: Mina Almasry <almasrymina@google.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Naoya Horiguchi <naoya.horiguchi@linux.dev>
Cc: Peter Xu <peterx@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 7ba594d7
Loading
Loading
Loading
Loading
+148 −185
Original line number Diff line number Diff line
@@ -255,6 +255,152 @@ static inline struct hugepage_subpool *subpool_vma(struct vm_area_struct *vma)
	return subpool_inode(file_inode(vma->vm_file));
}

/*
 * hugetlb vma_lock helper routines
 */
static bool __vma_shareable_lock(struct vm_area_struct *vma)
{
	return vma->vm_flags & (VM_MAYSHARE | VM_SHARED) &&
		vma->vm_private_data;
}

void hugetlb_vma_lock_read(struct vm_area_struct *vma)
{
	if (__vma_shareable_lock(vma)) {
		struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;

		down_read(&vma_lock->rw_sema);
	}
}

void hugetlb_vma_unlock_read(struct vm_area_struct *vma)
{
	if (__vma_shareable_lock(vma)) {
		struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;

		up_read(&vma_lock->rw_sema);
	}
}

void hugetlb_vma_lock_write(struct vm_area_struct *vma)
{
	if (__vma_shareable_lock(vma)) {
		struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;

		down_write(&vma_lock->rw_sema);
	}
}

void hugetlb_vma_unlock_write(struct vm_area_struct *vma)
{
	if (__vma_shareable_lock(vma)) {
		struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;

		up_write(&vma_lock->rw_sema);
	}
}

int hugetlb_vma_trylock_write(struct vm_area_struct *vma)
{
	struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;

	if (!__vma_shareable_lock(vma))
		return 1;

	return down_write_trylock(&vma_lock->rw_sema);
}

void hugetlb_vma_assert_locked(struct vm_area_struct *vma)
{
	if (__vma_shareable_lock(vma)) {
		struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;

		lockdep_assert_held(&vma_lock->rw_sema);
	}
}

void hugetlb_vma_lock_release(struct kref *kref)
{
	struct hugetlb_vma_lock *vma_lock = container_of(kref,
			struct hugetlb_vma_lock, refs);

	kfree(vma_lock);
}

static void __hugetlb_vma_unlock_write_put(struct hugetlb_vma_lock *vma_lock)
{
	struct vm_area_struct *vma = vma_lock->vma;

	/*
	 * vma_lock structure may or not be released as a result of put,
	 * it certainly will no longer be attached to vma so clear pointer.
	 * Semaphore synchronizes access to vma_lock->vma field.
	 */
	vma_lock->vma = NULL;
	vma->vm_private_data = NULL;
	up_write(&vma_lock->rw_sema);
	kref_put(&vma_lock->refs, hugetlb_vma_lock_release);
}

static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma)
{
	if (__vma_shareable_lock(vma)) {
		struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;

		__hugetlb_vma_unlock_write_put(vma_lock);
	}
}

static void hugetlb_vma_lock_free(struct vm_area_struct *vma)
{
	/*
	 * Only present in sharable vmas.
	 */
	if (!vma || !__vma_shareable_lock(vma))
		return;

	if (vma->vm_private_data) {
		struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;

		down_write(&vma_lock->rw_sema);
		__hugetlb_vma_unlock_write_put(vma_lock);
	}
}

static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma)
{
	struct hugetlb_vma_lock *vma_lock;

	/* Only establish in (flags) sharable vmas */
	if (!vma || !(vma->vm_flags & VM_MAYSHARE))
		return;

	/* Should never get here with non-NULL vm_private_data */
	if (vma->vm_private_data)
		return;

	vma_lock = kmalloc(sizeof(*vma_lock), GFP_KERNEL);
	if (!vma_lock) {
		/*
		 * If we can not allocate structure, then vma can not
		 * participate in pmd sharing.  This is only a possible
		 * performance enhancement and memory saving issue.
		 * However, the lock is also used to synchronize page
		 * faults with truncation.  If the lock is not present,
		 * unlikely races could leave pages in a file past i_size
		 * until the file is removed.  Warn in the unlikely case of
		 * allocation failure.
		 */
		pr_warn_once("HugeTLB: unable to allocate vma specific lock\n");
		return;
	}

	kref_init(&vma_lock->refs);
	init_rwsem(&vma_lock->rw_sema);
	vma_lock->vma = vma;
	vma->vm_private_data = vma_lock;
}

/* Helper that removes a struct file_region from the resv_map cache and returns
 * it for use.
 */
@@ -6613,7 +6759,8 @@ bool hugetlb_reserve_pages(struct inode *inode,
	}

	/*
	 * vma specific semaphore used for pmd sharing synchronization
	 * vma specific semaphore used for pmd sharing and fault/truncation
	 * synchronization
	 */
	hugetlb_vma_lock_alloc(vma);

@@ -6869,149 +7016,6 @@ void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
		*end = ALIGN(*end, PUD_SIZE);
}

static bool __vma_shareable_flags_pmd(struct vm_area_struct *vma)
{
	return vma->vm_flags & (VM_MAYSHARE | VM_SHARED) &&
		vma->vm_private_data;
}

void hugetlb_vma_lock_read(struct vm_area_struct *vma)
{
	if (__vma_shareable_flags_pmd(vma)) {
		struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;

		down_read(&vma_lock->rw_sema);
	}
}

void hugetlb_vma_unlock_read(struct vm_area_struct *vma)
{
	if (__vma_shareable_flags_pmd(vma)) {
		struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;

		up_read(&vma_lock->rw_sema);
	}
}

void hugetlb_vma_lock_write(struct vm_area_struct *vma)
{
	if (__vma_shareable_flags_pmd(vma)) {
		struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;

		down_write(&vma_lock->rw_sema);
	}
}

void hugetlb_vma_unlock_write(struct vm_area_struct *vma)
{
	if (__vma_shareable_flags_pmd(vma)) {
		struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;

		up_write(&vma_lock->rw_sema);
	}
}

int hugetlb_vma_trylock_write(struct vm_area_struct *vma)
{
	struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;

	if (!__vma_shareable_flags_pmd(vma))
		return 1;

	return down_write_trylock(&vma_lock->rw_sema);
}

void hugetlb_vma_assert_locked(struct vm_area_struct *vma)
{
	if (__vma_shareable_flags_pmd(vma)) {
		struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;

		lockdep_assert_held(&vma_lock->rw_sema);
	}
}

void hugetlb_vma_lock_release(struct kref *kref)
{
	struct hugetlb_vma_lock *vma_lock = container_of(kref,
			struct hugetlb_vma_lock, refs);

	kfree(vma_lock);
}

static void __hugetlb_vma_unlock_write_put(struct hugetlb_vma_lock *vma_lock)
{
	struct vm_area_struct *vma = vma_lock->vma;

	/*
	 * vma_lock structure may or not be released as a result of put,
	 * it certainly will no longer be attached to vma so clear pointer.
	 * Semaphore synchronizes access to vma_lock->vma field.
	 */
	vma_lock->vma = NULL;
	vma->vm_private_data = NULL;
	up_write(&vma_lock->rw_sema);
	kref_put(&vma_lock->refs, hugetlb_vma_lock_release);
}

static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma)
{
	if (__vma_shareable_flags_pmd(vma)) {
		struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;

		__hugetlb_vma_unlock_write_put(vma_lock);
	}
}

static void hugetlb_vma_lock_free(struct vm_area_struct *vma)
{
	/*
	 * Only present in sharable vmas.
	 */
	if (!vma || !__vma_shareable_flags_pmd(vma))
		return;

	if (vma->vm_private_data) {
		struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;

		down_write(&vma_lock->rw_sema);
		__hugetlb_vma_unlock_write_put(vma_lock);
	}
}

static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma)
{
	struct hugetlb_vma_lock *vma_lock;

	/* Only establish in (flags) sharable vmas */
	if (!vma || !(vma->vm_flags & VM_MAYSHARE))
		return;

	/* Should never get here with non-NULL vm_private_data */
	if (vma->vm_private_data)
		return;

	vma_lock = kmalloc(sizeof(*vma_lock), GFP_KERNEL);
	if (!vma_lock) {
		/*
		 * If we can not allocate structure, then vma can not
		 * participate in pmd sharing.  This is only a possible
		 * performance enhancement and memory saving issue.
		 * However, the lock is also used to synchronize page
		 * faults with truncation.  If the lock is not present,
		 * unlikely races could leave pages in a file past i_size
		 * until the file is removed.  Warn in the unlikely case of
		 * allocation failure.
		 */
		pr_warn_once("HugeTLB: unable to allocate vma specific lock\n");
		return;
	}

	kref_init(&vma_lock->refs);
	init_rwsem(&vma_lock->rw_sema);
	vma_lock->vma = vma;
	vma->vm_private_data = vma_lock;
}

/*
 * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc()
 * and returns the corresponding pte. While this is not necessary for the
@@ -7100,47 +7104,6 @@ int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,

#else /* !CONFIG_ARCH_WANT_HUGE_PMD_SHARE */

void hugetlb_vma_lock_read(struct vm_area_struct *vma)
{
}

void hugetlb_vma_unlock_read(struct vm_area_struct *vma)
{
}

void hugetlb_vma_lock_write(struct vm_area_struct *vma)
{
}

void hugetlb_vma_unlock_write(struct vm_area_struct *vma)
{
}

int hugetlb_vma_trylock_write(struct vm_area_struct *vma)
{
	return 1;
}

void hugetlb_vma_assert_locked(struct vm_area_struct *vma)
{
}

void hugetlb_vma_lock_release(struct kref *kref)
{
}

static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma)
{
}

static void hugetlb_vma_lock_free(struct vm_area_struct *vma)
{
}

static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma)
{
}

pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma,
		      unsigned long addr, pud_t *pud)
{