diff options
author | Paul Mackerras <paulus@samba.org> | 2014-05-08 13:31:23 +1000 |
---|---|---|
committer | Eli Qiao <taget@linux.vnet.ibm.com> | 2014-05-08 12:31:17 +0800 |
commit | afe34f689bff47714a18fd92c544f4318a8fc586 (patch) | |
tree | 39188943862162629f9bdde2aa557501d7eb78c5 | |
parent | 1beb8571b373cf5c6019029071a52028807287f1 (diff) | |
download | powerpc-afe34f689bff47714a18fd92c544f4318a8fc586.tar.gz |
KVM: PPC: Book3S HV: Fix two bugs in dirty-page tracking
The first bug is that we are testing the C (changed) bit in the hashed
page table without first doing a tlbie. The architecture allows the
update of the C bit to happen at any time up until we do a tlbie for
the page. However, we don't want to do a tlbie for every page on every
pass of a migration operation. Thus we do the tlbie if there are no
vcpus currently running, which would indicate the final phase of
migration. If any vcpus are running then reading the dirty log is
already racy because pages could get dirtied immediately after we
check them. Also, we don't need to do the tlbie if the HPT entry
doesn't allow writing, since in that case the C bit can not get set.
The second bug is that in the case where we see a dirty 16MB page
followed by a dirty 4kB page (both mapping to the same guest real
address), we return 1 rather than 16MB / PAGE_SIZE. The return value,
indicating the number of dirty pages, needs to reflect the largest
dirty page we come across, not the last dirty page we see.
Fixes: 109551 (this time for sure)
Signed-off-by: Paul Mackerras <paulus@samba.org>
-rw-r--r-- | arch/powerpc/kvm/book3s_64_mmu_hv.c | 51 |
1 files changed, 40 insertions, 11 deletions
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index bccc8c39ce5065..4464e485de342a 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -1037,10 +1037,16 @@ void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte) kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); } +static int vcpus_running(struct kvm *kvm) +{ + return atomic_read(&kvm->arch.vcpus_running) != 0; +} + static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp) { struct revmap_entry *rev = kvm->arch.revmap; unsigned long head, i, j; + unsigned long v, r, n; unsigned long *hptep; int ret = 0; @@ -1060,7 +1066,22 @@ static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp) hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4)); j = rev[i].forw; - if (!(hptep[1] & HPTE_R_C)) + /* + * Checking the C (changed) bit here is racy since there + * is no guarantee about when the hardware writes it back. + * If the HPTE is not writable then it is stable since the + * page can't be written to, and we would have done a tlbie + * (which forces the hardware to complete any writeback) + * when making the HPTE read-only. + * If vcpus are running then this call is racy anyway + * since the page could get dirtied subsequently, so we + * expect there to be a further call which would pick up + * any delayed C bit writeback. + * Otherwise we need to do the tlbie even if C==0 in + * order to pick up any delayed writeback of C. + */ + if (!(hptep[1] & HPTE_R_C) && + (!hpte_is_writable(hptep[1]) || vcpus_running(kvm))) continue; if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) { @@ -1072,21 +1093,29 @@ static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp) } /* Now check and modify the HPTE */ - if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_C)) { - /* need to make it temporarily absent to clear C */ - hptep[0] |= HPTE_V_ABSENT; - kvmppc_invalidate_hpte(kvm, hptep, i); - hptep[1] &= ~HPTE_R_C; - eieio(); - hptep[0] = (hptep[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID; + if (!(hptep[0] & HPTE_V_VALID)) + continue; + + /* need to make it temporarily absent so C is stable */ + hptep[0] |= HPTE_V_ABSENT; + kvmppc_invalidate_hpte(kvm, hptep, i); + v = hptep[0]; + r = hptep[1]; + if (r & HPTE_R_C) { + hptep[1] = r & ~HPTE_R_C; if (!(rev[i].guest_rpte & HPTE_R_C)) { rev[i].guest_rpte |= HPTE_R_C; note_hpte_modification(kvm, &rev[i]); } - ret = max(1UL, hpte_page_size(hptep[0], hptep[1]) >> - PAGE_SHIFT); + n = hpte_page_size(v, r); + n = (n + PAGE_SIZE - 1) >> PAGE_SHIFT; + if (n > ret) + ret = n; + eieio(); } - hptep[0] &= ~HPTE_V_HVLOCK; + v &= ~(HPTE_V_ABSENT | HPTE_V_HVLOCK); + v |= HPTE_V_VALID; + hptep[0] = v; } while ((i = j) != head); unlock_rmap(rmapp); |