Commit ed29c269 authored by Maarten Lankhorst's avatar Maarten Lankhorst Committed by Daniel Vetter
Browse files

drm/i915: Fix userptr so we do not have to worry about obj->mm.lock, v7.



Instead of doing what we do currently, which will never work with
PROVE_LOCKING, do the same as AMD does, and something similar to
relocation slowpath. When all locks are dropped, we acquire the
pages for pinning. When the locks are taken, we transfer those
pages in .get_pages() to the bo. As a final check before installing
the fences, we ensure that the mmu notifier was not called; if it is,
we return -EAGAIN to userspace to signal it has to start over.

Changes since v1:
- Unbinding is done in submit_init only. submit_begin() removed.
- MMU_NOTFIER -> MMU_NOTIFIER
Changes since v2:
- Make i915->mm.notifier a spinlock.
Changes since v3:
- Add WARN_ON if there are any page references left, should have been 0.
- Return 0 on success in submit_init(), bug from spinlock conversion.
- Release pvec outside of notifier_lock (Thomas).
Changes since v4:
- Mention why we're clearing eb->[i + 1].vma in the code. (Thomas)
- Actually check all invalidations in eb_move_to_gpu. (Thomas)
- Do not wait when process is exiting to fix gem_ctx_persistence.userptr.
Changes since v5:
- Clarify why check on PF_EXITING is (temporarily) required.
Changes since v6:
- Ensure userptr validity is checked in set_domain through a special path.

Signed-off-by: default avatarMaarten Lankhorst <maarten.lankhorst@linux.intel.com>
Acked-by: default avatarDave Airlie <airlied@redhat.com>
[danvet: s/kfree/kvfree/ in i915_gem_object_userptr_drop_ref in the
previous review round, but which got lost. The other open questions
around page refcount are imo better discussed in a separate series,
with amdgpu folks involved].
Reviewed-by: default avatarThomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: default avatarDaniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20210323155059.628690-17-maarten.lankhorst@linux.intel.com
parent 20ee27bd
Loading
Loading
Loading
Loading
+16 −2
Original line number Diff line number Diff line
@@ -533,14 +533,28 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
	if (err)
		goto out;

	if (i915_gem_object_is_userptr(obj)) {
		/*
		 * Try to grab userptr pages, iris uses set_domain to check
		 * userptr validity
		 */
		err = i915_gem_object_userptr_validate(obj);
		if (!err)
			err = i915_gem_object_wait(obj,
						   I915_WAIT_INTERRUPTIBLE |
						   I915_WAIT_PRIORITY |
						   (write_domain ? I915_WAIT_ALL : 0),
						   MAX_SCHEDULE_TIMEOUT);
		goto out;
	}

	/*
	 * Proxy objects do not control access to the backing storage, ergo
	 * they cannot be used as a means to manipulate the cache domain
	 * tracking for that backing storage. The proxy object is always
	 * considered to be outside of any cache domain.
	 */
	if (i915_gem_object_is_proxy(obj) &&
	    !i915_gem_object_is_userptr(obj)) {
	if (i915_gem_object_is_proxy(obj)) {
		err = -ENXIO;
		goto out;
	}
+88 −13
Original line number Diff line number Diff line
@@ -53,14 +53,16 @@ enum {
/* __EXEC_OBJECT_NO_RESERVE is BIT(31), defined in i915_vma.h */
#define __EXEC_OBJECT_HAS_PIN		BIT(30)
#define __EXEC_OBJECT_HAS_FENCE		BIT(29)
#define __EXEC_OBJECT_NEEDS_MAP		BIT(28)
#define __EXEC_OBJECT_NEEDS_BIAS	BIT(27)
#define __EXEC_OBJECT_INTERNAL_FLAGS	(~0u << 27) /* all of the above + */
#define __EXEC_OBJECT_USERPTR_INIT	BIT(28)
#define __EXEC_OBJECT_NEEDS_MAP		BIT(27)
#define __EXEC_OBJECT_NEEDS_BIAS	BIT(26)
#define __EXEC_OBJECT_INTERNAL_FLAGS	(~0u << 26) /* all of the above + */
#define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE)

#define __EXEC_HAS_RELOC	BIT(31)
#define __EXEC_ENGINE_PINNED	BIT(30)
#define __EXEC_INTERNAL_FLAGS	(~0u << 30)
#define __EXEC_USERPTR_USED	BIT(29)
#define __EXEC_INTERNAL_FLAGS	(~0u << 29)
#define UPDATE			PIN_OFFSET_FIXED

#define BATCH_OFFSET_BIAS (256*1024)
@@ -871,6 +873,26 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
		}

		eb_add_vma(eb, i, batch, vma);

		if (i915_gem_object_is_userptr(vma->obj)) {
			err = i915_gem_object_userptr_submit_init(vma->obj);
			if (err) {
				if (i + 1 < eb->buffer_count) {
					/*
					 * Execbuffer code expects last vma entry to be NULL,
					 * since we already initialized this entry,
					 * set the next value to NULL or we mess up
					 * cleanup handling.
					 */
					eb->vma[i + 1].vma = NULL;
				}

				return err;
			}

			eb->vma[i].flags |= __EXEC_OBJECT_USERPTR_INIT;
			eb->args->flags |= __EXEC_USERPTR_USED;
		}
	}

	if (unlikely(eb->batch->flags & EXEC_OBJECT_WRITE)) {
@@ -972,7 +994,7 @@ eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle)
	}
}

static void eb_release_vmas(struct i915_execbuffer *eb, bool final)
static void eb_release_vmas(struct i915_execbuffer *eb, bool final, bool release_userptr)
{
	const unsigned int count = eb->buffer_count;
	unsigned int i;
@@ -986,6 +1008,11 @@ static void eb_release_vmas(struct i915_execbuffer *eb, bool final)

		eb_unreserve_vma(ev);

		if (release_userptr && ev->flags & __EXEC_OBJECT_USERPTR_INIT) {
			ev->flags &= ~__EXEC_OBJECT_USERPTR_INIT;
			i915_gem_object_userptr_submit_fini(vma->obj);
		}

		if (final)
			i915_vma_put(vma);
	}
@@ -1923,6 +1950,31 @@ static int eb_prefault_relocations(const struct i915_execbuffer *eb)
	return 0;
}

static int eb_reinit_userptr(struct i915_execbuffer *eb)
{
	const unsigned int count = eb->buffer_count;
	unsigned int i;
	int ret;

	if (likely(!(eb->args->flags & __EXEC_USERPTR_USED)))
		return 0;

	for (i = 0; i < count; i++) {
		struct eb_vma *ev = &eb->vma[i];

		if (!i915_gem_object_is_userptr(ev->vma->obj))
			continue;

		ret = i915_gem_object_userptr_submit_init(ev->vma->obj);
		if (ret)
			return ret;

		ev->flags |= __EXEC_OBJECT_USERPTR_INIT;
	}

	return 0;
}

static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb,
					   struct i915_request *rq)
{
@@ -1937,7 +1989,7 @@ static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb,
	}

	/* We may process another execbuffer during the unlock... */
	eb_release_vmas(eb, false);
	eb_release_vmas(eb, false, true);
	i915_gem_ww_ctx_fini(&eb->ww);

	if (rq) {
@@ -1978,10 +2030,8 @@ static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb,
		err = 0;
	}

#ifdef CONFIG_MMU_NOTIFIER
	if (!err)
		flush_workqueue(eb->i915->mm.userptr_wq);
#endif
		err = eb_reinit_userptr(eb);

err_relock:
	i915_gem_ww_ctx_init(&eb->ww, true);
@@ -2043,7 +2093,7 @@ static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb,

err:
	if (err == -EDEADLK) {
		eb_release_vmas(eb, false);
		eb_release_vmas(eb, false, false);
		err = i915_gem_ww_ctx_backoff(&eb->ww);
		if (!err)
			goto repeat_validate;
@@ -2140,7 +2190,7 @@ static int eb_relocate_parse(struct i915_execbuffer *eb)

err:
	if (err == -EDEADLK) {
		eb_release_vmas(eb, false);
		eb_release_vmas(eb, false, false);
		err = i915_gem_ww_ctx_backoff(&eb->ww);
		if (!err)
			goto retry;
@@ -2215,6 +2265,30 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
						      flags | __EXEC_OBJECT_NO_RESERVE);
	}

#ifdef CONFIG_MMU_NOTIFIER
	if (!err && (eb->args->flags & __EXEC_USERPTR_USED)) {
		spin_lock(&eb->i915->mm.notifier_lock);

		/*
		 * count is always at least 1, otherwise __EXEC_USERPTR_USED
		 * could not have been set
		 */
		for (i = 0; i < count; i++) {
			struct eb_vma *ev = &eb->vma[i];
			struct drm_i915_gem_object *obj = ev->vma->obj;

			if (!i915_gem_object_is_userptr(obj))
				continue;

			err = i915_gem_object_userptr_submit_done(obj);
			if (err)
				break;
		}

		spin_unlock(&eb->i915->mm.notifier_lock);
	}
#endif

	if (unlikely(err))
		goto err_skip;

@@ -3359,7 +3433,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,

	err = eb_lookup_vmas(&eb);
	if (err) {
		eb_release_vmas(&eb, true);
		eb_release_vmas(&eb, true, true);
		goto err_engine;
	}

@@ -3431,6 +3505,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,

	trace_i915_request_queue(eb.request, eb.batch_flags);
	err = eb_submit(&eb, batch);

err_request:
	i915_request_get(eb.request);
	err = eb_request_add(&eb, err);
@@ -3451,7 +3526,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
	i915_request_put(eb.request);

err_vma:
	eb_release_vmas(&eb, true);
	eb_release_vmas(&eb, true, true);
	if (eb.trampoline)
		i915_vma_unpin(eb.trampoline);
	WARN_ON(err == -EDEADLK);
+22 −16
Original line number Diff line number Diff line
@@ -33,6 +33,7 @@ i915_gem_object_create_shmem_from_data(struct drm_i915_private *i915,
				       const void *data, resource_size_t size);

extern const struct drm_i915_gem_object_ops i915_gem_shmem_ops;

void __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
				     struct sg_table *pages,
				     bool needs_clflush);
@@ -252,12 +253,6 @@ i915_gem_object_never_mmap(const struct drm_i915_gem_object *obj)
	return i915_gem_object_type_has(obj, I915_GEM_OBJECT_NO_MMAP);
}

static inline bool
i915_gem_object_needs_async_cancel(const struct drm_i915_gem_object *obj)
{
	return i915_gem_object_type_has(obj, I915_GEM_OBJECT_ASYNC_CANCEL);
}

static inline bool
i915_gem_object_is_framebuffer(const struct drm_i915_gem_object *obj)
{
@@ -548,16 +543,6 @@ void __i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj,
void __i915_gem_object_invalidate_frontbuffer(struct drm_i915_gem_object *obj,
					      enum fb_op_origin origin);

static inline bool
i915_gem_object_is_userptr(struct drm_i915_gem_object *obj)
{
#ifdef CONFIG_MMU_NOTIFIER
	return obj->userptr.mm;
#else
	return false;
#endif
}

static inline void
i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj,
				  enum fb_op_origin origin)
@@ -578,4 +563,25 @@ int i915_gem_object_read_from_page(struct drm_i915_gem_object *obj, u64 offset,

bool i915_gem_object_is_shmem(const struct drm_i915_gem_object *obj);

#ifdef CONFIG_MMU_NOTIFIER
static inline bool
i915_gem_object_is_userptr(struct drm_i915_gem_object *obj)
{
	return obj->userptr.notifier.mm;
}

int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj);
int i915_gem_object_userptr_submit_done(struct drm_i915_gem_object *obj);
void i915_gem_object_userptr_submit_fini(struct drm_i915_gem_object *obj);
int i915_gem_object_userptr_validate(struct drm_i915_gem_object *obj);
#else
static inline bool i915_gem_object_is_userptr(struct drm_i915_gem_object *obj) { return false; }

static inline int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj) { GEM_BUG_ON(1); return -ENODEV; }
static inline int i915_gem_object_userptr_submit_done(struct drm_i915_gem_object *obj) { GEM_BUG_ON(1); return -ENODEV; }
static inline void i915_gem_object_userptr_submit_fini(struct drm_i915_gem_object *obj) { GEM_BUG_ON(1); }
static inline int i915_gem_object_userptr_validate(struct drm_i915_gem_object *obj) { GEM_BUG_ON(1); return -ENODEV; }

#endif

#endif
+6 −4
Original line number Diff line number Diff line
@@ -7,6 +7,8 @@
#ifndef __I915_GEM_OBJECT_TYPES_H__
#define __I915_GEM_OBJECT_TYPES_H__

#include <linux/mmu_notifier.h>

#include <drm/drm_gem.h>
#include <uapi/drm/i915_drm.h>

@@ -34,7 +36,6 @@ struct drm_i915_gem_object_ops {
#define I915_GEM_OBJECT_IS_SHRINKABLE	BIT(2)
#define I915_GEM_OBJECT_IS_PROXY	BIT(3)
#define I915_GEM_OBJECT_NO_MMAP		BIT(4)
#define I915_GEM_OBJECT_ASYNC_CANCEL	BIT(5)

	/* Interface between the GEM object and its backing storage.
	 * get_pages() is called once prior to the use of the associated set
@@ -293,10 +294,11 @@ struct drm_i915_gem_object {
#ifdef CONFIG_MMU_NOTIFIER
		struct i915_gem_userptr {
			uintptr_t ptr;
			unsigned long notifier_seq;

			struct i915_mm_struct *mm;
			struct i915_mmu_object *mmu_object;
			struct work_struct *work;
			struct mmu_interval_notifier notifier;
			struct page **pvec;
			int page_ref;
		} userptr;
#endif

+1 −1
Original line number Diff line number Diff line
@@ -226,7 +226,7 @@ int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
	 * get_pages backends we should be better able to handle the
	 * cancellation of the async task in a more uniform manner.
	 */
	if (!pages && !i915_gem_object_needs_async_cancel(obj))
	if (!pages)
		pages = ERR_PTR(-EINVAL);

	if (!IS_ERR(pages))
Loading