Loading arch/x86/kvm/mmu/tdp_mmu.c +56 −65 Original line number Diff line number Diff line Loading @@ -40,7 +40,17 @@ static __always_inline bool kvm_lockdep_assert_mmu_lock_held(struct kvm *kvm, void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) { /* Also waits for any queued work items. */ /* * Invalidate all roots, which besides the obvious, schedules all roots * for zapping and thus puts the TDP MMU's reference to each root, i.e. * ultimately frees all roots. */ kvm_tdp_mmu_invalidate_all_roots(kvm); /* * Destroying a workqueue also first flushes the workqueue, i.e. no * need to invoke kvm_tdp_mmu_zap_invalidated_roots(). */ destroy_workqueue(kvm->arch.tdp_mmu_zap_wq); WARN_ON(atomic64_read(&kvm->arch.tdp_mmu_pages)); Loading Loading @@ -116,16 +126,6 @@ static void tdp_mmu_schedule_zap_root(struct kvm *kvm, struct kvm_mmu_page *root queue_work(kvm->arch.tdp_mmu_zap_wq, &root->tdp_mmu_async_work); } static inline bool kvm_tdp_root_mark_invalid(struct kvm_mmu_page *page) { union kvm_mmu_page_role role = page->role; role.invalid = true; /* No need to use cmpxchg, only the invalid bit can change. */ role.word = xchg(&page->role.word, role.word); return role.invalid; } void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root, bool shared) { Loading @@ -134,45 +134,12 @@ void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root, if (!refcount_dec_and_test(&root->tdp_mmu_root_count)) return; WARN_ON(!is_tdp_mmu_page(root)); /* * The root now has refcount=0. It is valid, but readers already * cannot acquire a reference to it because kvm_tdp_mmu_get_root() * rejects it. This remains true for the rest of the execution * of this function, because readers visit valid roots only * (except for tdp_mmu_zap_root_work(), which however * does not acquire any reference itself). * * Even though there are flows that need to visit all roots for * correctness, they all take mmu_lock for write, so they cannot yet * run concurrently. The same is true after kvm_tdp_root_mark_invalid, * since the root still has refcount=0. * * However, tdp_mmu_zap_root can yield, and writers do not expect to * see refcount=0 (see for example kvm_tdp_mmu_invalidate_all_roots()). * So the root temporarily gets an extra reference, going to refcount=1 * while staying invalid. Readers still cannot acquire any reference; * but writers are now allowed to run if tdp_mmu_zap_root yields and * they might take an extra reference if they themselves yield. * Therefore, when the reference is given back by the worker, * there is no guarantee that the refcount is still 1. If not, whoever * puts the last reference will free the page, but they will not have to * zap the root because a root cannot go from invalid to valid. */ if (!kvm_tdp_root_mark_invalid(root)) { refcount_set(&root->tdp_mmu_root_count, 1); /* * Zapping the root in a worker is not just "nice to have"; * it is required because kvm_tdp_mmu_invalidate_all_roots() * skips already-invalid roots. If kvm_tdp_mmu_put_root() did * not add the root to the workqueue, kvm_tdp_mmu_zap_all_fast() * might return with some roots not zapped yet. * The TDP MMU itself holds a reference to each root until the root is * explicitly invalidated, i.e. the final reference should be never be * put for a valid root. */ tdp_mmu_schedule_zap_root(kvm, root); return; } KVM_BUG_ON(!is_tdp_mmu_page(root) || !root->role.invalid, kvm); spin_lock(&kvm->arch.tdp_mmu_pages_lock); list_del_rcu(&root->link); Loading Loading @@ -320,7 +287,14 @@ hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu) root = tdp_mmu_alloc_sp(vcpu); tdp_mmu_init_sp(root, NULL, 0, role); refcount_set(&root->tdp_mmu_root_count, 1); /* * TDP MMU roots are kept until they are explicitly invalidated, either * by a memslot update or by the destruction of the VM. Initialize the * refcount to two; one reference for the vCPU, and one reference for * the TDP MMU itself, which is held until the root is invalidated and * is ultimately put by tdp_mmu_zap_root_work(). */ refcount_set(&root->tdp_mmu_root_count, 2); spin_lock(&kvm->arch.tdp_mmu_pages_lock); list_add_rcu(&root->link, &kvm->arch.tdp_mmu_roots); Loading Loading @@ -946,32 +920,49 @@ void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm) /* * Mark each TDP MMU root as invalid to prevent vCPUs from reusing a root that * is about to be zapped, e.g. in response to a memslots update. The actual * zapping is performed asynchronously, so a reference is taken on all roots. * Using a separate workqueue makes it easy to ensure that the destruction is * performed before the "fast zap" completes, without keeping a separate list * of invalidated roots; the list is effectively the list of work items in * the workqueue. * * Get a reference even if the root is already invalid, the asynchronous worker * assumes it was gifted a reference to the root it processes. Because mmu_lock * is held for write, it should be impossible to observe a root with zero refcount, * i.e. the list of roots cannot be stale. * zapping is performed asynchronously. Using a separate workqueue makes it * easy to ensure that the destruction is performed before the "fast zap" * completes, without keeping a separate list of invalidated roots; the list is * effectively the list of work items in the workqueue. * * This has essentially the same effect for the TDP MMU * as updating mmu_valid_gen does for the shadow MMU. * Note, the asynchronous worker is gifted the TDP MMU's reference. * See kvm_tdp_mmu_get_vcpu_root_hpa(). */ void kvm_tdp_mmu_invalidate_all_roots(struct kvm *kvm) { struct kvm_mmu_page *root; /* * mmu_lock must be held for write to ensure that a root doesn't become * invalid while there are active readers (invalidating a root while * there are active readers may or may not be problematic in practice, * but it's uncharted territory and not supported). * * Waive the assertion if there are no users of @kvm, i.e. the VM is * being destroyed after all references have been put, or if no vCPUs * have been created (which means there are no roots), i.e. the VM is * being destroyed in an error path of KVM_CREATE_VM. */ if (IS_ENABLED(CONFIG_PROVE_LOCKING) && refcount_read(&kvm->users_count) && kvm->created_vcpus) lockdep_assert_held_write(&kvm->mmu_lock); list_for_each_entry(root, &kvm->arch.tdp_mmu_roots, link) { if (!root->role.invalid && !WARN_ON_ONCE(!kvm_tdp_mmu_get_root(root))) { /* * As above, mmu_lock isn't held when destroying the VM! There can't * be other references to @kvm, i.e. nothing else can invalidate roots * or be consuming roots, but walking the list of roots does need to be * guarded against roots being deleted by the asynchronous zap worker. */ rcu_read_lock(); list_for_each_entry_rcu(root, &kvm->arch.tdp_mmu_roots, link) { if (!root->role.invalid) { root->role.invalid = true; tdp_mmu_schedule_zap_root(kvm, root); } } rcu_read_unlock(); } /* Loading Loading
arch/x86/kvm/mmu/tdp_mmu.c +56 −65 Original line number Diff line number Diff line Loading @@ -40,7 +40,17 @@ static __always_inline bool kvm_lockdep_assert_mmu_lock_held(struct kvm *kvm, void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) { /* Also waits for any queued work items. */ /* * Invalidate all roots, which besides the obvious, schedules all roots * for zapping and thus puts the TDP MMU's reference to each root, i.e. * ultimately frees all roots. */ kvm_tdp_mmu_invalidate_all_roots(kvm); /* * Destroying a workqueue also first flushes the workqueue, i.e. no * need to invoke kvm_tdp_mmu_zap_invalidated_roots(). */ destroy_workqueue(kvm->arch.tdp_mmu_zap_wq); WARN_ON(atomic64_read(&kvm->arch.tdp_mmu_pages)); Loading Loading @@ -116,16 +126,6 @@ static void tdp_mmu_schedule_zap_root(struct kvm *kvm, struct kvm_mmu_page *root queue_work(kvm->arch.tdp_mmu_zap_wq, &root->tdp_mmu_async_work); } static inline bool kvm_tdp_root_mark_invalid(struct kvm_mmu_page *page) { union kvm_mmu_page_role role = page->role; role.invalid = true; /* No need to use cmpxchg, only the invalid bit can change. */ role.word = xchg(&page->role.word, role.word); return role.invalid; } void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root, bool shared) { Loading @@ -134,45 +134,12 @@ void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root, if (!refcount_dec_and_test(&root->tdp_mmu_root_count)) return; WARN_ON(!is_tdp_mmu_page(root)); /* * The root now has refcount=0. It is valid, but readers already * cannot acquire a reference to it because kvm_tdp_mmu_get_root() * rejects it. This remains true for the rest of the execution * of this function, because readers visit valid roots only * (except for tdp_mmu_zap_root_work(), which however * does not acquire any reference itself). * * Even though there are flows that need to visit all roots for * correctness, they all take mmu_lock for write, so they cannot yet * run concurrently. The same is true after kvm_tdp_root_mark_invalid, * since the root still has refcount=0. * * However, tdp_mmu_zap_root can yield, and writers do not expect to * see refcount=0 (see for example kvm_tdp_mmu_invalidate_all_roots()). * So the root temporarily gets an extra reference, going to refcount=1 * while staying invalid. Readers still cannot acquire any reference; * but writers are now allowed to run if tdp_mmu_zap_root yields and * they might take an extra reference if they themselves yield. * Therefore, when the reference is given back by the worker, * there is no guarantee that the refcount is still 1. If not, whoever * puts the last reference will free the page, but they will not have to * zap the root because a root cannot go from invalid to valid. */ if (!kvm_tdp_root_mark_invalid(root)) { refcount_set(&root->tdp_mmu_root_count, 1); /* * Zapping the root in a worker is not just "nice to have"; * it is required because kvm_tdp_mmu_invalidate_all_roots() * skips already-invalid roots. If kvm_tdp_mmu_put_root() did * not add the root to the workqueue, kvm_tdp_mmu_zap_all_fast() * might return with some roots not zapped yet. * The TDP MMU itself holds a reference to each root until the root is * explicitly invalidated, i.e. the final reference should be never be * put for a valid root. */ tdp_mmu_schedule_zap_root(kvm, root); return; } KVM_BUG_ON(!is_tdp_mmu_page(root) || !root->role.invalid, kvm); spin_lock(&kvm->arch.tdp_mmu_pages_lock); list_del_rcu(&root->link); Loading Loading @@ -320,7 +287,14 @@ hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu) root = tdp_mmu_alloc_sp(vcpu); tdp_mmu_init_sp(root, NULL, 0, role); refcount_set(&root->tdp_mmu_root_count, 1); /* * TDP MMU roots are kept until they are explicitly invalidated, either * by a memslot update or by the destruction of the VM. Initialize the * refcount to two; one reference for the vCPU, and one reference for * the TDP MMU itself, which is held until the root is invalidated and * is ultimately put by tdp_mmu_zap_root_work(). */ refcount_set(&root->tdp_mmu_root_count, 2); spin_lock(&kvm->arch.tdp_mmu_pages_lock); list_add_rcu(&root->link, &kvm->arch.tdp_mmu_roots); Loading Loading @@ -946,32 +920,49 @@ void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm) /* * Mark each TDP MMU root as invalid to prevent vCPUs from reusing a root that * is about to be zapped, e.g. in response to a memslots update. The actual * zapping is performed asynchronously, so a reference is taken on all roots. * Using a separate workqueue makes it easy to ensure that the destruction is * performed before the "fast zap" completes, without keeping a separate list * of invalidated roots; the list is effectively the list of work items in * the workqueue. * * Get a reference even if the root is already invalid, the asynchronous worker * assumes it was gifted a reference to the root it processes. Because mmu_lock * is held for write, it should be impossible to observe a root with zero refcount, * i.e. the list of roots cannot be stale. * zapping is performed asynchronously. Using a separate workqueue makes it * easy to ensure that the destruction is performed before the "fast zap" * completes, without keeping a separate list of invalidated roots; the list is * effectively the list of work items in the workqueue. * * This has essentially the same effect for the TDP MMU * as updating mmu_valid_gen does for the shadow MMU. * Note, the asynchronous worker is gifted the TDP MMU's reference. * See kvm_tdp_mmu_get_vcpu_root_hpa(). */ void kvm_tdp_mmu_invalidate_all_roots(struct kvm *kvm) { struct kvm_mmu_page *root; /* * mmu_lock must be held for write to ensure that a root doesn't become * invalid while there are active readers (invalidating a root while * there are active readers may or may not be problematic in practice, * but it's uncharted territory and not supported). * * Waive the assertion if there are no users of @kvm, i.e. the VM is * being destroyed after all references have been put, or if no vCPUs * have been created (which means there are no roots), i.e. the VM is * being destroyed in an error path of KVM_CREATE_VM. */ if (IS_ENABLED(CONFIG_PROVE_LOCKING) && refcount_read(&kvm->users_count) && kvm->created_vcpus) lockdep_assert_held_write(&kvm->mmu_lock); list_for_each_entry(root, &kvm->arch.tdp_mmu_roots, link) { if (!root->role.invalid && !WARN_ON_ONCE(!kvm_tdp_mmu_get_root(root))) { /* * As above, mmu_lock isn't held when destroying the VM! There can't * be other references to @kvm, i.e. nothing else can invalidate roots * or be consuming roots, but walking the list of roots does need to be * guarded against roots being deleted by the asynchronous zap worker. */ rcu_read_lock(); list_for_each_entry_rcu(root, &kvm->arch.tdp_mmu_roots, link) { if (!root->role.invalid) { root->role.invalid = true; tdp_mmu_schedule_zap_root(kvm, root); } } rcu_read_unlock(); } /* Loading