Commit a0d198f7 authored by Patrick Kelsey's avatar Patrick Kelsey Committed by Jason Gunthorpe
Browse files

IB/hfi1: Fix math bugs in hfi1_can_pin_pages()

Fix arithmetic and logic errors in hfi1_can_pin_pages() that  would allow
hfi1 to attempt pinning pages in cases where it should not because of
resource limits or lack of required capability.

Fixes: 2c97ce4f ("IB/hfi1: Add pin query function")
Link: https://lore.kernel.org/r/167656658362.2223096.10954762619837718026.stgit@awfm-02.cornelisnetworks.com


Signed-off-by: default avatarBrendan Cunningham <bcunningham@cornelisnetworks.com>
Signed-off-by: default avatarPatrick Kelsey <pat.kelsey@cornelisnetworks.com>
Signed-off-by: default avatarDennis Dalessandro <dennis.dalessandro@cornelisnetworks.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@nvidia.com>
parent d2225b83
Loading
Loading
Loading
Loading
+40 −21
Original line number Diff line number Diff line
@@ -29,33 +29,52 @@ MODULE_PARM_DESC(cache_size, "Send and receive side cache size limit (in MB)");
bool hfi1_can_pin_pages(struct hfi1_devdata *dd, struct mm_struct *mm,
			u32 nlocked, u32 npages)
{
	unsigned long ulimit = rlimit(RLIMIT_MEMLOCK), pinned, cache_limit,
		size = (cache_size * (1UL << 20)); /* convert to bytes */
	unsigned int usr_ctxts =
			dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt;
	bool can_lock = capable(CAP_IPC_LOCK);
	unsigned long ulimit_pages;
	unsigned long cache_limit_pages;
	unsigned int usr_ctxts;

	/*
	 * Calculate per-cache size. The calculation below uses only a quarter
	 * of the available per-context limit. This leaves space for other
	 * pinning. Should we worry about shared ctxts?
	 * Perform RLIMIT_MEMLOCK based checks unless CAP_IPC_LOCK is present.
	 */
	cache_limit = (ulimit / usr_ctxts) / 4;
	if (!capable(CAP_IPC_LOCK)) {
		ulimit_pages =
			DIV_ROUND_DOWN_ULL(rlimit(RLIMIT_MEMLOCK), PAGE_SIZE);

	/* If ulimit isn't set to "unlimited" and is smaller than cache_size. */
	if (ulimit != (-1UL) && size > cache_limit)
		size = cache_limit;

	/* Convert to number of pages */
	size = DIV_ROUND_UP(size, PAGE_SIZE);
		/*
		 * Pinning these pages would exceed this process's locked memory
		 * limit.
		 */
		if (atomic64_read(&mm->pinned_vm) + npages > ulimit_pages)
			return false;

	pinned = atomic64_read(&mm->pinned_vm);
		/*
		 * Only allow 1/4 of the user's RLIMIT_MEMLOCK to be used for HFI
		 * caches.  This fraction is then equally distributed among all
		 * existing user contexts.  Note that if RLIMIT_MEMLOCK is
		 * 'unlimited' (-1), the value of this limit will be > 2^42 pages
		 * (2^64 / 2^12 / 2^8 / 2^2).
		 *
		 * The effectiveness of this check may be reduced if I/O occurs on
		 * some user contexts before all user contexts are created.  This
		 * check assumes that this process is the only one using this
		 * context (e.g., the corresponding fd was not passed to another
		 * process for concurrent access) as there is no per-context,
		 * per-process tracking of pinned pages.  It also assumes that each
		 * user context has only one cache to limit.
		 */
		usr_ctxts = dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt;
		if (nlocked + npages > (ulimit_pages / usr_ctxts / 4))
			return false;
	}

	/* First, check the absolute limit against all pinned pages. */
	if (pinned + npages >= ulimit && !can_lock)
	/*
	 * Pinning these pages would exceed the size limit for this cache.
	 */
	cache_limit_pages = cache_size * (1024 * 1024) / PAGE_SIZE;
	if (nlocked + npages > cache_limit_pages)
		return false;

	return ((nlocked + npages) <= size) || can_lock;
	return true;
}

int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr, size_t npages,