Commit 55e6be65 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull cgroup changes from Tejun Heo:
 "The only notable change is Vipin's new misc cgroup controller.

  This implements generic support for resources which can be controlled
  by simply counting and limiting the number of resource instances - ie
  there's X number of these on the system and this cgroup subtree can
  have upto Y of those.

  The first user is the address space IDs used for virtual machine
  memory encryption and expected future usages are similar - niche
  hardware features with concrete resource limits and simple usage
  models"

* 'for-5.13' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
  cgroup: use tsk->in_iowait instead of delayacct_is_task_waiting_on_io()
  cgroup/cpuset: fix typos in comments
  cgroup: misc: mark dummy misc_cg_res_total_usage() static inline
  svm/sev: Register SEV and SEV-ES ASIDs to the misc controller
  cgroup: Miscellaneous cgroup documentation.
  cgroup: Add misc cgroup controller
parents eb6bbacc ffeee417
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -17,6 +17,7 @@ Control Groups version 1
    hugetlb
    memcg_test
    memory
    misc
    net_cls
    net_prio
    pids
+4 −0
Original line number Diff line number Diff line
===============
Misc controller
===============
Please refer "Misc" documentation in Documentation/admin-guide/cgroup-v2.rst
+71 −2
Original line number Diff line number Diff line
@@ -65,8 +65,11 @@ v1 is available under :ref:`Documentation/admin-guide/cgroup-v1/index.rst <cgrou
       5-7-1. RDMA Interface Files
     5-8. HugeTLB
       5.8-1. HugeTLB Interface Files
     5-8. Misc
       5-8-1. perf_event
     5-9. Misc
       5.9-1 Miscellaneous cgroup Interface Files
       5.9-2 Migration and Ownership
     5-10. Others
       5-10-1. perf_event
     5-N. Non-normative information
       5-N-1. CPU controller root cgroup process behaviour
       5-N-2. IO controller root cgroup process behaviour
@@ -2171,6 +2174,72 @@ HugeTLB Interface Files
Misc
----

The Miscellaneous cgroup provides the resource limiting and tracking
mechanism for the scalar resources which cannot be abstracted like the other
cgroup resources. Controller is enabled by the CONFIG_CGROUP_MISC config
option.

A resource can be added to the controller via enum misc_res_type{} in the
include/linux/misc_cgroup.h file and the corresponding name via misc_res_name[]
in the kernel/cgroup/misc.c file. Provider of the resource must set its
capacity prior to using the resource by calling misc_cg_set_capacity().

Once a capacity is set then the resource usage can be updated using charge and
uncharge APIs. All of the APIs to interact with misc controller are in
include/linux/misc_cgroup.h.

Misc Interface Files
~~~~~~~~~~~~~~~~~~~~

Miscellaneous controller provides 3 interface files. If two misc resources (res_a and res_b) are registered then:

  misc.capacity
        A read-only flat-keyed file shown only in the root cgroup.  It shows
        miscellaneous scalar resources available on the platform along with
        their quantities::

	  $ cat misc.capacity
	  res_a 50
	  res_b 10

  misc.current
        A read-only flat-keyed file shown in the non-root cgroups.  It shows
        the current usage of the resources in the cgroup and its children.::

	  $ cat misc.current
	  res_a 3
	  res_b 0

  misc.max
        A read-write flat-keyed file shown in the non root cgroups. Allowed
        maximum usage of the resources in the cgroup and its children.::

	  $ cat misc.max
	  res_a max
	  res_b 4

	Limit can be set by::

	  # echo res_a 1 > misc.max

	Limit can be set to max by::

	  # echo res_a max > misc.max

        Limits can be set higher than the capacity value in the misc.capacity
        file.

Migration and Ownership
~~~~~~~~~~~~~~~~~~~~~~~

A miscellaneous scalar resource is charged to the cgroup in which it is used
first, and stays charged to that cgroup until that resource is freed. Migrating
a process to a different cgroup does not move the charge to the destination
cgroup where the process has moved.

Others
------

perf_event
~~~~~~~~~~

+60 −10
Original line number Diff line number Diff line
@@ -14,6 +14,7 @@
#include <linux/psp-sev.h>
#include <linux/pagemap.h>
#include <linux/swap.h>
#include <linux/misc_cgroup.h>
#include <linux/processor.h>
#include <linux/trace_events.h>
#include <asm/fpu/internal.h>
@@ -28,6 +29,21 @@

#define __ex(x) __kvm_handle_fault_on_reboot(x)

#ifndef CONFIG_KVM_AMD_SEV
/*
 * When this config is not defined, SEV feature is not supported and APIs in
 * this file are not used but this file still gets compiled into the KVM AMD
 * module.
 *
 * We will not have MISC_CG_RES_SEV and MISC_CG_RES_SEV_ES entries in the enum
 * misc_res_type {} defined in linux/misc_cgroup.h.
 *
 * Below macros allow compilation to succeed.
 */
#define MISC_CG_RES_SEV MISC_CG_RES_TYPES
#define MISC_CG_RES_SEV_ES MISC_CG_RES_TYPES
#endif

static u8 sev_enc_bit;
static int sev_flush_asids(void);
static DECLARE_RWSEM(sev_deactivate_lock);
@@ -89,8 +105,19 @@ static bool __sev_recycle_asids(int min_asid, int max_asid)

static int sev_asid_new(struct kvm_sev_info *sev)
{
	int pos, min_asid, max_asid;
	int pos, min_asid, max_asid, ret;
	bool retry = true;
	enum misc_res_type type;

	type = sev->es_active ? MISC_CG_RES_SEV_ES : MISC_CG_RES_SEV;
	WARN_ON(sev->misc_cg);
	sev->misc_cg = get_current_misc_cg();
	ret = misc_cg_try_charge(type, sev->misc_cg, 1);
	if (ret) {
		put_misc_cg(sev->misc_cg);
		sev->misc_cg = NULL;
		return ret;
	}

	mutex_lock(&sev_bitmap_lock);

@@ -108,7 +135,8 @@ static int sev_asid_new(struct kvm_sev_info *sev)
			goto again;
		}
		mutex_unlock(&sev_bitmap_lock);
		return -EBUSY;
		ret = -EBUSY;
		goto e_uncharge;
	}

	__set_bit(pos, sev_asid_bitmap);
@@ -116,6 +144,11 @@ static int sev_asid_new(struct kvm_sev_info *sev)
	mutex_unlock(&sev_bitmap_lock);

	return pos + 1;
e_uncharge:
	misc_cg_uncharge(type, sev->misc_cg, 1);
	put_misc_cg(sev->misc_cg);
	sev->misc_cg = NULL;
	return ret;
}

static int sev_get_asid(struct kvm *kvm)
@@ -125,14 +158,15 @@ static int sev_get_asid(struct kvm *kvm)
	return sev->asid;
}

static void sev_asid_free(int asid)
static void sev_asid_free(struct kvm_sev_info *sev)
{
	struct svm_cpu_data *sd;
	int cpu, pos;
	enum misc_res_type type;

	mutex_lock(&sev_bitmap_lock);

	pos = asid - 1;
	pos = sev->asid - 1;
	__set_bit(pos, sev_reclaim_asid_bitmap);

	for_each_possible_cpu(cpu) {
@@ -141,6 +175,11 @@ static void sev_asid_free(int asid)
	}

	mutex_unlock(&sev_bitmap_lock);

	type = sev->es_active ? MISC_CG_RES_SEV_ES : MISC_CG_RES_SEV;
	misc_cg_uncharge(type, sev->misc_cg, 1);
	put_misc_cg(sev->misc_cg);
	sev->misc_cg = NULL;
}

static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
@@ -188,19 +227,20 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
	asid = sev_asid_new(sev);
	if (asid < 0)
		return ret;
	sev->asid = asid;

	ret = sev_platform_init(&argp->error);
	if (ret)
		goto e_free;

	sev->active = true;
	sev->asid = asid;
	INIT_LIST_HEAD(&sev->regions_list);

	return 0;

e_free:
	sev_asid_free(asid);
	sev_asid_free(sev);
	sev->asid = 0;
	return ret;
}

@@ -1315,12 +1355,12 @@ void sev_vm_destroy(struct kvm *kvm)
	mutex_unlock(&kvm->lock);

	sev_unbind_asid(kvm, sev->handle);
	sev_asid_free(sev->asid);
	sev_asid_free(sev);
}

void __init sev_hardware_setup(void)
{
	unsigned int eax, ebx, ecx, edx;
	unsigned int eax, ebx, ecx, edx, sev_asid_count, sev_es_asid_count;
	bool sev_es_supported = false;
	bool sev_supported = false;

@@ -1352,7 +1392,11 @@ void __init sev_hardware_setup(void)
	if (!sev_reclaim_asid_bitmap)
		goto out;

	pr_info("SEV supported: %u ASIDs\n", max_sev_asid - min_sev_asid + 1);
	sev_asid_count = max_sev_asid - min_sev_asid + 1;
	if (misc_cg_set_capacity(MISC_CG_RES_SEV, sev_asid_count))
		goto out;

	pr_info("SEV supported: %u ASIDs\n", sev_asid_count);
	sev_supported = true;

	/* SEV-ES support requested? */
@@ -1367,7 +1411,11 @@ void __init sev_hardware_setup(void)
	if (min_sev_asid == 1)
		goto out;

	pr_info("SEV-ES supported: %u ASIDs\n", min_sev_asid - 1);
	sev_es_asid_count = min_sev_asid - 1;
	if (misc_cg_set_capacity(MISC_CG_RES_SEV_ES, sev_es_asid_count))
		goto out;

	pr_info("SEV-ES supported: %u ASIDs\n", sev_es_asid_count);
	sev_es_supported = true;

out:
@@ -1382,6 +1430,8 @@ void sev_hardware_teardown(void)

	bitmap_free(sev_asid_bitmap);
	bitmap_free(sev_reclaim_asid_bitmap);
	misc_cg_set_capacity(MISC_CG_RES_SEV, 0);
	misc_cg_set_capacity(MISC_CG_RES_SEV_ES, 0);

	sev_flush_asids();
}
+1 −0
Original line number Diff line number Diff line
@@ -65,6 +65,7 @@ struct kvm_sev_info {
	unsigned long pages_locked; /* Number of pages locked */
	struct list_head regions_list;  /* List of registered regions */
	u64 ap_jump_table;	/* SEV-ES AP Jump Table address */
	struct misc_cg *misc_cg; /* For misc cgroup accounting */
};

struct kvm_svm {
Loading