Commit 81eef890 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'for-linus-6.5a-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip

Pull xen fixes from Juergen Gross:

 - A fix for a performance problem in QubesOS, adding a way to drain the
   queue of grants experiencing delayed unmaps faster

 - A patch enabling the use of static event channels from user mode,
   which was omitted when introducing supporting static event channels

 - A fix for a problem where Xen related code didn't check properly for
   running in a Xen environment, resulting in a WARN splat

* tag 'for-linus-6.5a-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip:
  xen: speed up grant-table reclaim
  xen/evtchn: Introduce new IOCTL to bind static evtchn
  xenbus: check xen_domain in xenbus_probe_initcall
parents e62e26d3 c04e9894
Loading
Loading
Loading
Loading
+11 −0
Original line number Diff line number Diff line
@@ -60,3 +60,14 @@ Description: Module taint flags:
			C   staging driver module
			E   unsigned module
			==  =====================

What:		/sys/module/grant_table/parameters/free_per_iteration
Date:		July 2023
KernelVersion:	6.5 but backported to all supported stable branches
Contact:	Xen developer discussion <xen-devel@lists.xenproject.org>
Description:	Read and write number of grant entries to attempt to free per iteration.

		Note: Future versions of Xen and Linux may provide a better
		interface for controlling the rate of deferred grant reclaim
		or may not need it at all.
Users:		Qubes OS (https://www.qubes-os.org)
+5 −11
Original line number Diff line number Diff line
@@ -112,6 +112,7 @@ struct irq_info {
	unsigned int irq_epoch; /* If eoi_cpu valid: irq_epoch of event */
	u64 eoi_time;           /* Time in jiffies when to EOI. */
	raw_spinlock_t lock;
	bool is_static;           /* Is event channel static */

	union {
		unsigned short virq;
@@ -815,15 +816,6 @@ static void xen_free_irq(unsigned irq)
	irq_free_desc(irq);
}

static void xen_evtchn_close(evtchn_port_t port)
{
	struct evtchn_close close;

	close.port = port;
	if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
		BUG();
}

/* Not called for lateeoi events. */
static void event_handler_exit(struct irq_info *info)
{
@@ -982,6 +974,7 @@ static void __unbind_from_irq(unsigned int irq)
		unsigned int cpu = cpu_from_irq(irq);
		struct xenbus_device *dev;

		if (!info->is_static)
			xen_evtchn_close(evtchn);

		switch (type_from_irq(irq)) {
@@ -1574,7 +1567,7 @@ int xen_set_irq_priority(unsigned irq, unsigned priority)
}
EXPORT_SYMBOL_GPL(xen_set_irq_priority);

int evtchn_make_refcounted(evtchn_port_t evtchn)
int evtchn_make_refcounted(evtchn_port_t evtchn, bool is_static)
{
	int irq = get_evtchn_to_irq(evtchn);
	struct irq_info *info;
@@ -1590,6 +1583,7 @@ int evtchn_make_refcounted(evtchn_port_t evtchn)
	WARN_ON(info->refcnt != -1);

	info->refcnt = 1;
	info->is_static = is_static;

	return 0;
}
+26 −9
Original line number Diff line number Diff line
@@ -366,10 +366,10 @@ static int evtchn_resize_ring(struct per_user_data *u)
	return 0;
}

static int evtchn_bind_to_user(struct per_user_data *u, evtchn_port_t port)
static int evtchn_bind_to_user(struct per_user_data *u, evtchn_port_t port,
			       bool is_static)
{
	struct user_evtchn *evtchn;
	struct evtchn_close close;
	int rc = 0;

	/*
@@ -402,14 +402,14 @@ static int evtchn_bind_to_user(struct per_user_data *u, evtchn_port_t port)
	if (rc < 0)
		goto err;

	rc = evtchn_make_refcounted(port);
	rc = evtchn_make_refcounted(port, is_static);
	return rc;

err:
	/* bind failed, should close the port now */
	close.port = port;
	if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
		BUG();
	if (!is_static)
		xen_evtchn_close(port);

	del_evtchn(u, evtchn);
	return rc;
}
@@ -456,7 +456,7 @@ static long evtchn_ioctl(struct file *file,
		if (rc != 0)
			break;

		rc = evtchn_bind_to_user(u, bind_virq.port);
		rc = evtchn_bind_to_user(u, bind_virq.port, false);
		if (rc == 0)
			rc = bind_virq.port;
		break;
@@ -482,7 +482,7 @@ static long evtchn_ioctl(struct file *file,
		if (rc != 0)
			break;

		rc = evtchn_bind_to_user(u, bind_interdomain.local_port);
		rc = evtchn_bind_to_user(u, bind_interdomain.local_port, false);
		if (rc == 0)
			rc = bind_interdomain.local_port;
		break;
@@ -507,7 +507,7 @@ static long evtchn_ioctl(struct file *file,
		if (rc != 0)
			break;

		rc = evtchn_bind_to_user(u, alloc_unbound.port);
		rc = evtchn_bind_to_user(u, alloc_unbound.port, false);
		if (rc == 0)
			rc = alloc_unbound.port;
		break;
@@ -536,6 +536,23 @@ static long evtchn_ioctl(struct file *file,
		break;
	}

	case IOCTL_EVTCHN_BIND_STATIC: {
		struct ioctl_evtchn_bind bind;
		struct user_evtchn *evtchn;

		rc = -EFAULT;
		if (copy_from_user(&bind, uarg, sizeof(bind)))
			break;

		rc = -EISCONN;
		evtchn = find_evtchn(u, bind.port);
		if (evtchn)
			break;

		rc = evtchn_bind_to_user(u, bind.port, true);
		break;
	}

	case IOCTL_EVTCHN_NOTIFY: {
		struct ioctl_evtchn_notify notify;
		struct user_evtchn *evtchn;
+29 −11
Original line number Diff line number Diff line
@@ -498,14 +498,21 @@ static LIST_HEAD(deferred_list);
static void gnttab_handle_deferred(struct timer_list *);
static DEFINE_TIMER(deferred_timer, gnttab_handle_deferred);

static atomic64_t deferred_count;
static atomic64_t leaked_count;
static unsigned int free_per_iteration = 10;
module_param(free_per_iteration, uint, 0600);

static void gnttab_handle_deferred(struct timer_list *unused)
{
	unsigned int nr = 10;
	unsigned int nr = READ_ONCE(free_per_iteration);
	const bool ignore_limit = nr == 0;
	struct deferred_entry *first = NULL;
	unsigned long flags;
	size_t freed = 0;

	spin_lock_irqsave(&gnttab_list_lock, flags);
	while (nr--) {
	while ((ignore_limit || nr--) && !list_empty(&deferred_list)) {
		struct deferred_entry *entry
			= list_first_entry(&deferred_list,
					   struct deferred_entry, list);
@@ -515,10 +522,14 @@ static void gnttab_handle_deferred(struct timer_list *unused)
		list_del(&entry->list);
		spin_unlock_irqrestore(&gnttab_list_lock, flags);
		if (_gnttab_end_foreign_access_ref(entry->ref)) {
			uint64_t ret = atomic64_dec_return(&deferred_count);

			put_free_entry(entry->ref);
			pr_debug("freeing g.e. %#x (pfn %#lx)\n",
				 entry->ref, page_to_pfn(entry->page));
			pr_debug("freeing g.e. %#x (pfn %#lx), %llu remaining\n",
				 entry->ref, page_to_pfn(entry->page),
				 (unsigned long long)ret);
			put_page(entry->page);
			freed++;
			kfree(entry);
			entry = NULL;
		} else {
@@ -530,21 +541,22 @@ static void gnttab_handle_deferred(struct timer_list *unused)
		spin_lock_irqsave(&gnttab_list_lock, flags);
		if (entry)
			list_add_tail(&entry->list, &deferred_list);
		else if (list_empty(&deferred_list))
			break;
	}
	if (!list_empty(&deferred_list) && !timer_pending(&deferred_timer)) {
	if (list_empty(&deferred_list))
		WARN_ON(atomic64_read(&deferred_count));
	else if (!timer_pending(&deferred_timer)) {
		deferred_timer.expires = jiffies + HZ;
		add_timer(&deferred_timer);
	}
	spin_unlock_irqrestore(&gnttab_list_lock, flags);
	pr_debug("Freed %zu references", freed);
}

static void gnttab_add_deferred(grant_ref_t ref, struct page *page)
{
	struct deferred_entry *entry;
	gfp_t gfp = (in_atomic() || irqs_disabled()) ? GFP_ATOMIC : GFP_KERNEL;
	const char *what = KERN_WARNING "leaking";
	uint64_t leaked, deferred;

	entry = kmalloc(sizeof(*entry), gfp);
	if (!page) {
@@ -567,10 +579,16 @@ static void gnttab_add_deferred(grant_ref_t ref, struct page *page)
			add_timer(&deferred_timer);
		}
		spin_unlock_irqrestore(&gnttab_list_lock, flags);
		what = KERN_DEBUG "deferring";
		deferred = atomic64_inc_return(&deferred_count);
		leaked = atomic64_read(&leaked_count);
		pr_debug("deferring g.e. %#x (pfn %#lx) (total deferred %llu, total leaked %llu)\n",
			 ref, page ? page_to_pfn(page) : -1, deferred, leaked);
	} else {
		deferred = atomic64_read(&deferred_count);
		leaked = atomic64_inc_return(&leaked_count);
		pr_warn("leaking g.e. %#x (pfn %#lx) (total deferred %llu, total leaked %llu)\n",
			ref, page ? page_to_pfn(page) : -1, deferred, leaked);
	}
	printk("%s g.e. %#x (pfn %#lx)\n",
	       what, ref, page ? page_to_pfn(page) : -1);
}

int gnttab_try_end_foreign_access(grant_ref_t ref)
+3 −0
Original line number Diff line number Diff line
@@ -811,6 +811,9 @@ static int xenbus_probe_thread(void *unused)

static int __init xenbus_probe_initcall(void)
{
	if (!xen_domain())
		return -ENODEV;

	/*
	 * Probe XenBus here in the XS_PV case, and also XS_HVM unless we
	 * need to wait for the platform PCI device to come up or
Loading