From 9c783a11214553a54f0915a7260a3ce624d36bf2 Mon Sep 17 00:00:00 2001 From: Lang Yu Date: Sun, 7 Apr 2024 12:36:00 +0800 Subject: drm/amdkfd: make sure VM is ready for updating operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When page table BOs were evicted but not validated before updating page tables, VM is still in evicting state, amdgpu_vm_update_range returns -EBUSY and restore_process_worker runs into a dead loop. v2: Split the BO validation and page table update into two separate loops in amdgpu_amdkfd_restore_process_bos. (Felix) 1.Validate BOs 2.Validate VM (and DMABuf attachments) 3.Update page tables for the BOs validated above Fixes: 50661eb1a2c8 ("drm/amdgpu: Auto-validate DMABuf imports in compute VMs") Signed-off-by: Lang Yu Acked-by: Christian König Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 34 ++++++++++++++---------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 7c23ba19af33a..2131de36e3dac 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -2901,13 +2901,12 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu * amdgpu_sync_create(&sync_obj); - /* Validate BOs and map them to GPUVM (update VM page tables). */ + /* Validate BOs managed by KFD */ list_for_each_entry(mem, &process_info->kfd_bo_list, validate_list) { struct amdgpu_bo *bo = mem->bo; uint32_t domain = mem->domain; - struct kfd_mem_attachment *attachment; struct dma_resv_iter cursor; struct dma_fence *fence; @@ -2932,6 +2931,25 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu * goto validate_map_fail; } } + } + + if (failed_size) + pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size); + + /* Validate PDs, PTs and evicted DMABuf imports last. Otherwise BO + * validations above would invalidate DMABuf imports again. + */ + ret = process_validate_vms(process_info, &exec.ticket); + if (ret) { + pr_debug("Validating VMs failed, ret: %d\n", ret); + goto validate_map_fail; + } + + /* Update mappings managed by KFD. */ + list_for_each_entry(mem, &process_info->kfd_bo_list, + validate_list) { + struct kfd_mem_attachment *attachment; + list_for_each_entry(attachment, &mem->attachments, list) { if (!attachment->is_mapped) continue; @@ -2948,18 +2966,6 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu * } } - if (failed_size) - pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size); - - /* Validate PDs, PTs and evicted DMABuf imports last. Otherwise BO - * validations above would invalidate DMABuf imports again. - */ - ret = process_validate_vms(process_info, &exec.ticket); - if (ret) { - pr_debug("Validating VMs failed, ret: %d\n", ret); - goto validate_map_fail; - } - /* Update mappings not managed by KFD */ list_for_each_entry(peer_vm, &process_info->vm_list_head, vm_list_node) { -- cgit 1.2.3-korg