aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAhmad Rehman <Ahmad.Rehman@amd.com>2024-03-04 15:56:00 -0600
committerAlex Deucher <alexander.deucher@amd.com>2024-03-20 13:12:57 -0400
commitf679fd6057fbf5ab34aaee28d58b7f81af0cbf48 (patch)
tree116d0127d32d288956eff97a75e2ecaa12f29c35
parent6c6064cbe58b43533e3451ad6a8ba9736c109ac3 (diff)
downloadlinux-f679fd6057fbf5ab34aaee28d58b7f81af0cbf48.tar.gz
drm/amdgpu: Init zone device and drm client after mode-1 reset on reload
In passthrough environment, when amdgpu is reloaded after unload, mode-1 is triggered after initializing the necessary IPs, That init does not include KFD, and KFD init waits until the reset is completed. KFD init is called in the reset handler, but in this case, the zone device and drm client is not initialized, causing app to create kernel panic. v2: Removing the init KFD condition from amdgpu_amdkfd_drm_client_create. As the previous version has the potential of creating DRM client twice. v3: v2 patch results in SDMA engine hung as DRM open causes VM clear to SDMA before SDMA init. Adding the condition to in drm client creation, on top of v1, to guard against drm client creation call multiple times. Signed-off-by: Ahmad Rehman <Ahmad.Rehman@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c5
2 files changed, 5 insertions, 2 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index f5f2945711be0..35dd6effa9a34 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -146,7 +146,7 @@ int amdgpu_amdkfd_drm_client_create(struct amdgpu_device *adev)
{
int ret;
- if (!adev->kfd.init_complete)
+ if (!adev->kfd.init_complete || adev->kfd.client.dev)
return 0;
ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd",
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 15b188aaf6818..80b9642f2bc4f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -2479,8 +2479,11 @@ static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work)
}
for (i = 0; i < mgpu_info.num_dgpu; i++) {
adev = mgpu_info.gpu_ins[i].adev;
- if (!adev->kfd.init_complete)
+ if (!adev->kfd.init_complete) {
+ kgd2kfd_init_zone_device(adev);
amdgpu_amdkfd_device_init(adev);
+ amdgpu_amdkfd_drm_client_create(adev);
+ }
amdgpu_ttm_set_buffer_funcs_status(adev, true);
}
}