Commit 5273e82c authored by Felix Kuehling's avatar Felix Kuehling Committed by Alex Deucher
Browse files

drm/amdkfd: Improve concurrency of event handling



Use rcu_read_lock to read p->event_idr concurrently with other readers
and writers. Use p->event_mutex only for creating and destroying events
and in kfd_wait_on_events.

Protect the contents of the kfd_event structure with a per-event
spinlock that can be taken inside the rcu_read_lock critical section.

This eliminates contention of p->event_mutex in set_event, which tends
to be on the critical path for dispatch latency even when busy waiting
is used. It also eliminates lock contention in event interrupt handlers.
Since the p->event_mutex is now used much less, the impact of requiring
it in kfd_wait_on_events should also be much smaller.

This should improve event handling latency for processes using multiple
GPUs concurrently.

v2: Reschedule the worker periodically to avoid soft lockup warnings

Signed-off-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Sean Keely <Sean.Keely@amd.com> # v1
Tested-by: default avatarSanjay Tripathi <sanjay.tripathi@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 8d2aad98
Loading
Loading
Loading
Loading
+77 −42
Original line number Diff line number Diff line
@@ -128,8 +128,8 @@ static int allocate_event_notification_slot(struct kfd_process *p,
}

/*
 * Assumes that p->event_mutex is held and of course that p is not going
 * away (current or locked).
 * Assumes that p->event_mutex or rcu_readlock is held and of course that p is
 * not going away.
 */
static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id)
{
@@ -251,15 +251,18 @@ static void destroy_event(struct kfd_process *p, struct kfd_event *ev)
	struct kfd_event_waiter *waiter;

	/* Wake up pending waiters. They will return failure */
	spin_lock(&ev->lock);
	list_for_each_entry(waiter, &ev->wq.head, wait.entry)
		waiter->event = NULL;
		WRITE_ONCE(waiter->event, NULL);
	wake_up_all(&ev->wq);
	spin_unlock(&ev->lock);

	if (ev->type == KFD_EVENT_TYPE_SIGNAL ||
	    ev->type == KFD_EVENT_TYPE_DEBUG)
		p->signal_event_count--;

	idr_remove(&p->event_idr, ev->event_id);
	synchronize_rcu();
	kfree(ev);
}

@@ -392,6 +395,7 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
	ev->auto_reset = auto_reset;
	ev->signaled = false;

	spin_lock_init(&ev->lock);
	init_waitqueue_head(&ev->wq);

	*event_page_offset = 0;
@@ -466,6 +470,7 @@ int kfd_criu_restore_event(struct file *devkfd,
	ev->auto_reset = ev_priv->auto_reset;
	ev->signaled = ev_priv->signaled;

	spin_lock_init(&ev->lock);
	init_waitqueue_head(&ev->wq);

	mutex_lock(&p->event_mutex);
@@ -609,13 +614,13 @@ static void set_event(struct kfd_event *ev)

	/* Auto reset if the list is non-empty and we're waking
	 * someone. waitqueue_active is safe here because we're
	 * protected by the p->event_mutex, which is also held when
	 * protected by the ev->lock, which is also held when
	 * updating the wait queues in kfd_wait_on_events.
	 */
	ev->signaled = !ev->auto_reset || !waitqueue_active(&ev->wq);

	list_for_each_entry(waiter, &ev->wq.head, wait.entry)
		waiter->activated = true;
		WRITE_ONCE(waiter->activated, true);

	wake_up_all(&ev->wq);
}
@@ -626,16 +631,18 @@ int kfd_set_event(struct kfd_process *p, uint32_t event_id)
	int ret = 0;
	struct kfd_event *ev;

	mutex_lock(&p->event_mutex);
	rcu_read_lock();

	ev = lookup_event_by_id(p, event_id);
	spin_lock(&ev->lock);

	if (ev && event_can_be_cpu_signaled(ev))
		set_event(ev);
	else
		ret = -EINVAL;

	mutex_unlock(&p->event_mutex);
	spin_unlock(&ev->lock);
	rcu_read_unlock();
	return ret;
}

@@ -650,23 +657,25 @@ int kfd_reset_event(struct kfd_process *p, uint32_t event_id)
	int ret = 0;
	struct kfd_event *ev;

	mutex_lock(&p->event_mutex);
	rcu_read_lock();

	ev = lookup_event_by_id(p, event_id);
	spin_lock(&ev->lock);

	if (ev && event_can_be_cpu_signaled(ev))
		reset_event(ev);
	else
		ret = -EINVAL;

	mutex_unlock(&p->event_mutex);
	spin_unlock(&ev->lock);
	rcu_read_unlock();
	return ret;

}

static void acknowledge_signal(struct kfd_process *p, struct kfd_event *ev)
{
	page_slots(p->signal_page)[ev->event_id] = UNSIGNALED_EVENT_SLOT;
	WRITE_ONCE(page_slots(p->signal_page)[ev->event_id], UNSIGNALED_EVENT_SLOT);
}

static void set_event_from_interrupt(struct kfd_process *p,
@@ -674,7 +683,9 @@ static void set_event_from_interrupt(struct kfd_process *p,
{
	if (ev && event_can_be_gpu_signaled(ev)) {
		acknowledge_signal(p, ev);
		spin_lock(&ev->lock);
		set_event(ev);
		spin_unlock(&ev->lock);
	}
}

@@ -693,7 +704,7 @@ void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id,
	if (!p)
		return; /* Presumably process exited. */

	mutex_lock(&p->event_mutex);
	rcu_read_lock();

	if (valid_id_bits)
		ev = lookup_signaled_event_by_partial_id(p, partial_id,
@@ -721,7 +732,7 @@ void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id,
				if (id >= KFD_SIGNAL_EVENT_LIMIT)
					break;

				if (slots[id] != UNSIGNALED_EVENT_SLOT)
				if (READ_ONCE(slots[id]) != UNSIGNALED_EVENT_SLOT)
					set_event_from_interrupt(p, ev);
			}
		} else {
@@ -730,14 +741,14 @@ void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id,
			 * only signaled events from the IDR.
			 */
			for (id = 0; id < KFD_SIGNAL_EVENT_LIMIT; id++)
				if (slots[id] != UNSIGNALED_EVENT_SLOT) {
				if (READ_ONCE(slots[id]) != UNSIGNALED_EVENT_SLOT) {
					ev = lookup_event_by_id(p, id);
					set_event_from_interrupt(p, ev);
				}
		}
	}

	mutex_unlock(&p->event_mutex);
	rcu_read_unlock();
	kfd_unref_process(p);
}

@@ -769,9 +780,11 @@ static int init_event_waiter_get_status(struct kfd_process *p,
	if (!ev)
		return -EINVAL;

	spin_lock(&ev->lock);
	waiter->event = ev;
	waiter->activated = ev->signaled;
	ev->signaled = ev->signaled && !ev->auto_reset;
	spin_unlock(&ev->lock);

	return 0;
}
@@ -783,8 +796,11 @@ static void init_event_waiter_add_to_waitlist(struct kfd_event_waiter *waiter)
	/* Only add to the wait list if we actually need to
	 * wait on this event.
	 */
	if (!waiter->activated)
	if (!waiter->activated) {
		spin_lock(&ev->lock);
		add_wait_queue(&ev->wq, &waiter->wait);
		spin_unlock(&ev->lock);
	}
}

/* test_event_condition - Test condition of events being waited for
@@ -804,10 +820,10 @@ static uint32_t test_event_condition(bool all, uint32_t num_events,
	uint32_t activated_count = 0;

	for (i = 0; i < num_events; i++) {
		if (!event_waiters[i].event)
		if (!READ_ONCE(event_waiters[i].event))
			return KFD_IOC_WAIT_RESULT_FAIL;

		if (event_waiters[i].activated) {
		if (READ_ONCE(event_waiters[i].activated)) {
			if (!all)
				return KFD_IOC_WAIT_RESULT_COMPLETE;

@@ -836,6 +852,8 @@ static int copy_signaled_event_data(uint32_t num_events,
	for (i = 0; i < num_events; i++) {
		waiter = &event_waiters[i];
		event = waiter->event;
		if (!event)
			return -EINVAL; /* event was destroyed */
		if (waiter->activated && event->type == KFD_EVENT_TYPE_MEMORY) {
			dst = &data[i].memory_exception_data;
			src = &event->memory_exception_data;
@@ -846,11 +864,8 @@ static int copy_signaled_event_data(uint32_t num_events,
	}

	return 0;

}



static long user_timeout_to_jiffies(uint32_t user_timeout_ms)
{
	if (user_timeout_ms == KFD_EVENT_TIMEOUT_IMMEDIATE)
@@ -874,9 +889,12 @@ static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters)
	uint32_t i;

	for (i = 0; i < num_events; i++)
		if (waiters[i].event)
		if (waiters[i].event) {
			spin_lock(&waiters[i].event->lock);
			remove_wait_queue(&waiters[i].event->wq,
					  &waiters[i].wait);
			spin_unlock(&waiters[i].event->lock);
		}

	kfree(waiters);
}
@@ -900,6 +918,9 @@ int kfd_wait_on_events(struct kfd_process *p,
		goto out;
	}

	/* Use p->event_mutex here to protect against concurrent creation and
	 * destruction of events while we initialize event_waiters.
	 */
	mutex_lock(&p->event_mutex);

	for (i = 0; i < num_events; i++) {
@@ -978,14 +999,19 @@ int kfd_wait_on_events(struct kfd_process *p,
	}
	__set_current_state(TASK_RUNNING);

	mutex_lock(&p->event_mutex);
	/* copy_signaled_event_data may sleep. So this has to happen
	 * after the task state is set back to RUNNING.
	 *
	 * The event may also have been destroyed after signaling. So
	 * copy_signaled_event_data also must confirm that the event
	 * still exists. Therefore this must be under the p->event_mutex
	 * which is also held when events are destroyed.
	 */
	if (!ret && *wait_result == KFD_IOC_WAIT_RESULT_COMPLETE)
		ret = copy_signaled_event_data(num_events,
					       event_waiters, events);

	mutex_lock(&p->event_mutex);
out_unlock:
	free_waiters(num_events, event_waiters);
	mutex_unlock(&p->event_mutex);
@@ -1044,8 +1070,7 @@ int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma)
}

/*
 * Assumes that p->event_mutex is held and of course
 * that p is not going away (current or locked).
 * Assumes that p is not going away.
 */
static void lookup_events_by_type_and_signal(struct kfd_process *p,
		int type, void *event_data)
@@ -1057,6 +1082,8 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,

	ev_data = (struct kfd_hsa_memory_exception_data *) event_data;

	rcu_read_lock();

	id = KFD_FIRST_NONSIGNAL_EVENT_ID;
	idr_for_each_entry_continue(&p->event_idr, ev, id)
		if (ev->type == type) {
@@ -1064,9 +1091,11 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
			dev_dbg(kfd_device,
					"Event found: id %X type %d",
					ev->event_id, ev->type);
			spin_lock(&ev->lock);
			set_event(ev);
			if (ev->type == KFD_EVENT_TYPE_MEMORY && ev_data)
				ev->memory_exception_data = *ev_data;
			spin_unlock(&ev->lock);
		}

	if (type == KFD_EVENT_TYPE_MEMORY) {
@@ -1089,6 +1118,8 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
				p->lead_thread->pid, p->pasid);
		}
	}

	rcu_read_unlock();
}

#ifdef KFD_SUPPORT_IOMMU_V2
@@ -1164,16 +1195,10 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, u32 pasid,

	if (KFD_GC_VERSION(dev) != IP_VERSION(9, 1, 0) &&
	    KFD_GC_VERSION(dev) != IP_VERSION(9, 2, 2) &&
	    KFD_GC_VERSION(dev) != IP_VERSION(9, 3, 0)) {
		mutex_lock(&p->event_mutex);

		/* Lookup events by type and signal them */
	    KFD_GC_VERSION(dev) != IP_VERSION(9, 3, 0))
		lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_MEMORY,
				&memory_exception_data);

		mutex_unlock(&p->event_mutex);
	}

	kfd_unref_process(p);
}
#endif /* KFD_SUPPORT_IOMMU_V2 */
@@ -1190,12 +1215,7 @@ void kfd_signal_hw_exception_event(u32 pasid)
	if (!p)
		return; /* Presumably process exited. */

	mutex_lock(&p->event_mutex);

	/* Lookup events by type and signal them */
	lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_HW_EXCEPTION, NULL);

	mutex_unlock(&p->event_mutex);
	kfd_unref_process(p);
}

@@ -1231,16 +1251,19 @@ void kfd_signal_vm_fault_event(struct kfd_dev *dev, u32 pasid,
			info->prot_write ? 1 : 0;
		memory_exception_data.failure.imprecise = 0;
	}
	mutex_lock(&p->event_mutex);

	rcu_read_lock();

	id = KFD_FIRST_NONSIGNAL_EVENT_ID;
	idr_for_each_entry_continue(&p->event_idr, ev, id)
		if (ev->type == KFD_EVENT_TYPE_MEMORY) {
			spin_lock(&ev->lock);
			ev->memory_exception_data = memory_exception_data;
			set_event(ev);
			spin_unlock(&ev->lock);
		}

	mutex_unlock(&p->event_mutex);
	rcu_read_unlock();
	kfd_unref_process(p);
}

@@ -1274,22 +1297,28 @@ void kfd_signal_reset_event(struct kfd_dev *dev)
			continue;
		}

		mutex_lock(&p->event_mutex);
		rcu_read_lock();

		id = KFD_FIRST_NONSIGNAL_EVENT_ID;
		idr_for_each_entry_continue(&p->event_idr, ev, id) {
			if (ev->type == KFD_EVENT_TYPE_HW_EXCEPTION) {
				spin_lock(&ev->lock);
				ev->hw_exception_data = hw_exception_data;
				ev->hw_exception_data.gpu_id = user_gpu_id;
				set_event(ev);
				spin_unlock(&ev->lock);
			}
			if (ev->type == KFD_EVENT_TYPE_MEMORY &&
			    reset_cause == KFD_HW_EXCEPTION_ECC) {
				spin_lock(&ev->lock);
				ev->memory_exception_data = memory_exception_data;
				ev->memory_exception_data.gpu_id = user_gpu_id;
				set_event(ev);
				spin_unlock(&ev->lock);
			}
		}
		mutex_unlock(&p->event_mutex);

		rcu_read_unlock();
	}
	srcu_read_unlock(&kfd_processes_srcu, idx);
}
@@ -1322,19 +1351,25 @@ void kfd_signal_poison_consumed_event(struct kfd_dev *dev, u32 pasid)
	memory_exception_data.gpu_id = user_gpu_id;
	memory_exception_data.failure.imprecise = true;

	mutex_lock(&p->event_mutex);
	rcu_read_lock();

	idr_for_each_entry_continue(&p->event_idr, ev, id) {
		if (ev->type == KFD_EVENT_TYPE_HW_EXCEPTION) {
			spin_lock(&ev->lock);
			ev->hw_exception_data = hw_exception_data;
			set_event(ev);
			spin_unlock(&ev->lock);
		}

		if (ev->type == KFD_EVENT_TYPE_MEMORY) {
			spin_lock(&ev->lock);
			ev->memory_exception_data = memory_exception_data;
			set_event(ev);
			spin_unlock(&ev->lock);
		}
	}
	mutex_unlock(&p->event_mutex);

	rcu_read_unlock();

	/* user application will handle SIGBUS signal */
	send_sig(SIGBUS, p->lead_thread, 0);
+1 −0
Original line number Diff line number Diff line
@@ -59,6 +59,7 @@ struct kfd_event {

	int type;

	spinlock_t lock;
	wait_queue_head_t wq; /* List of event waiters. */

	/* Only for signal events. */
+10 −1
Original line number Diff line number Diff line
@@ -146,15 +146,24 @@ static void interrupt_wq(struct work_struct *work)
	struct kfd_dev *dev = container_of(work, struct kfd_dev,
						interrupt_work);
	uint32_t ih_ring_entry[KFD_MAX_RING_ENTRY_SIZE];
	long start_jiffies = jiffies;

	if (dev->device_info.ih_ring_entry_size > sizeof(ih_ring_entry)) {
		dev_err_once(dev->adev->dev, "Ring entry too small\n");
		return;
	}

	while (dequeue_ih_ring_entry(dev, ih_ring_entry))
	while (dequeue_ih_ring_entry(dev, ih_ring_entry)) {
		dev->device_info.event_interrupt_class->interrupt_wq(dev,
								ih_ring_entry);
		if (jiffies - start_jiffies > HZ) {
			/* If we spent more than a second processing signals,
			 * reschedule the worker to avoid soft-lockup warnings
			 */
			queue_work(dev->ih_wq, &dev->interrupt_work);
			break;
		}
	}
}

bool interrupt_is_wanted(struct kfd_dev *dev,