From fff1386cc889d8fb4089d285f883f8cba62d82ce Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 11 Apr 2024 11:15:09 +1000 Subject: nouveau: fix instmem race condition around ptr stores Running a lot of VK CTS in parallel against nouveau, once every few hours you might see something like this crash. BUG: kernel NULL pointer dereference, address: 0000000000000008 PGD 8000000114e6e067 P4D 8000000114e6e067 PUD 109046067 PMD 0 Oops: 0000 [#1] PREEMPT SMP PTI CPU: 7 PID: 53891 Comm: deqp-vk Not tainted 6.8.0-rc6+ #27 Hardware name: Gigabyte Technology Co., Ltd. Z390 I AORUS PRO WIFI/Z390 I AORUS PRO WIFI-CF, BIOS F8 11/05/2021 RIP: 0010:gp100_vmm_pgt_mem+0xe3/0x180 [nouveau] Code: c7 48 01 c8 49 89 45 58 85 d2 0f 84 95 00 00 00 41 0f b7 46 12 49 8b 7e 08 89 da 42 8d 2c f8 48 8b 47 08 41 83 c7 01 48 89 ee <48> 8b 40 08 ff d0 0f 1f 00 49 8b 7e 08 48 89 d9 48 8d 75 04 48 c1 RSP: 0000:ffffac20c5857838 EFLAGS: 00010202 RAX: 0000000000000000 RBX: 00000000004d8001 RCX: 0000000000000001 RDX: 00000000004d8001 RSI: 00000000000006d8 RDI: ffffa07afe332180 RBP: 00000000000006d8 R08: ffffac20c5857ad0 R09: 0000000000ffff10 R10: 0000000000000001 R11: ffffa07af27e2de0 R12: 000000000000001c R13: ffffac20c5857ad0 R14: ffffa07a96fe9040 R15: 000000000000001c FS: 00007fe395eed7c0(0000) GS:ffffa07e2c980000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000008 CR3: 000000011febe001 CR4: 00000000003706f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ... ? gp100_vmm_pgt_mem+0xe3/0x180 [nouveau] ? gp100_vmm_pgt_mem+0x37/0x180 [nouveau] nvkm_vmm_iter+0x351/0xa20 [nouveau] ? __pfx_nvkm_vmm_ref_ptes+0x10/0x10 [nouveau] ? __pfx_gp100_vmm_pgt_mem+0x10/0x10 [nouveau] ? __pfx_gp100_vmm_pgt_mem+0x10/0x10 [nouveau] ? __lock_acquire+0x3ed/0x2170 ? __pfx_gp100_vmm_pgt_mem+0x10/0x10 [nouveau] nvkm_vmm_ptes_get_map+0xc2/0x100 [nouveau] ? __pfx_nvkm_vmm_ref_ptes+0x10/0x10 [nouveau] ? __pfx_gp100_vmm_pgt_mem+0x10/0x10 [nouveau] nvkm_vmm_map_locked+0x224/0x3a0 [nouveau] Adding any sort of useful debug usually makes it go away, so I hand wrote the function in a line, and debugged the asm. Every so often pt->memory->ptrs is NULL. This ptrs ptr is set in the nv50_instobj_acquire called from nvkm_kmap. If Thread A and Thread B both get to nv50_instobj_acquire around the same time, and Thread A hits the refcount_set line, and in lockstep thread B succeeds at refcount_inc_not_zero, there is a chance the ptrs value won't have been stored since refcount_set is unordered. Force a memory barrier here, I picked smp_mb, since we want it on all CPUs and it's write followed by a read. v2: use paired smp_rmb/smp_wmb. Cc: Fixes: be55287aa5ba ("drm/nouveau/imem/nv50: embed nvkm_instobj directly into nv04_instobj") Signed-off-by: Dave Airlie Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20240411011510.2546857-1-airlied@gmail.com --- drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c index a7f3fc342d87e..dd5b5a17ece0b 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c @@ -222,8 +222,11 @@ nv50_instobj_acquire(struct nvkm_memory *memory) void __iomem *map = NULL; /* Already mapped? */ - if (refcount_inc_not_zero(&iobj->maps)) + if (refcount_inc_not_zero(&iobj->maps)) { + /* read barrier match the wmb on refcount set */ + smp_rmb(); return iobj->map; + } /* Take the lock, and re-check that another thread hasn't * already mapped the object in the meantime. @@ -250,6 +253,8 @@ nv50_instobj_acquire(struct nvkm_memory *memory) iobj->base.memory.ptrs = &nv50_instobj_fast; else iobj->base.memory.ptrs = &nv50_instobj_slow; + /* barrier to ensure the ptrs are written before refcount is set */ + smp_wmb(); refcount_set(&iobj->maps, 1); } -- cgit 1.2.3-korg From cf92bb778eda7830e79452c6917efa8474a30c1e Mon Sep 17 00:00:00 2001 From: Mikhail Kobuk Date: Thu, 11 Apr 2024 14:08:52 +0300 Subject: drm: nv04: Fix out of bounds access When Output Resource (dcb->or) value is assigned in fabricate_dcb_output(), there may be out of bounds access to dac_users array in case dcb->or is zero because ffs(dcb->or) is used as index there. The 'or' argument of fabricate_dcb_output() must be interpreted as a number of bit to set, not value. Utilize macros from 'enum nouveau_or' in calls instead of hardcoding. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 2e5702aff395 ("drm/nouveau: fabricate DCB encoder table for iMac G4") Fixes: 670820c0e6a9 ("drm/nouveau: Workaround incorrect DCB entry on a GeForce3 Ti 200.") Signed-off-by: Mikhail Kobuk Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20240411110854.16701-1-m.kobuk@ispras.ru --- drivers/gpu/drm/nouveau/nouveau_bios.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.c b/drivers/gpu/drm/nouveau/nouveau_bios.c index 479effcf607e2..79cfab53f80e2 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bios.c +++ b/drivers/gpu/drm/nouveau/nouveau_bios.c @@ -23,6 +23,7 @@ */ #include "nouveau_drv.h" +#include "nouveau_bios.h" #include "nouveau_reg.h" #include "dispnv04/hw.h" #include "nouveau_encoder.h" @@ -1677,7 +1678,7 @@ apply_dcb_encoder_quirks(struct drm_device *dev, int idx, u32 *conn, u32 *conf) */ if (nv_match_device(dev, 0x0201, 0x1462, 0x8851)) { if (*conn == 0xf2005014 && *conf == 0xffffffff) { - fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS, 1, 1, 1); + fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS, 1, 1, DCB_OUTPUT_B); return false; } } @@ -1763,26 +1764,26 @@ fabricate_dcb_encoder_table(struct drm_device *dev, struct nvbios *bios) #ifdef __powerpc__ /* Apple iMac G4 NV17 */ if (of_machine_is_compatible("PowerMac4,5")) { - fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS, 0, all_heads, 1); - fabricate_dcb_output(dcb, DCB_OUTPUT_ANALOG, 1, all_heads, 2); + fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS, 0, all_heads, DCB_OUTPUT_B); + fabricate_dcb_output(dcb, DCB_OUTPUT_ANALOG, 1, all_heads, DCB_OUTPUT_C); return; } #endif /* Make up some sane defaults */ fabricate_dcb_output(dcb, DCB_OUTPUT_ANALOG, - bios->legacy.i2c_indices.crt, 1, 1); + bios->legacy.i2c_indices.crt, 1, DCB_OUTPUT_B); if (nv04_tv_identify(dev, bios->legacy.i2c_indices.tv) >= 0) fabricate_dcb_output(dcb, DCB_OUTPUT_TV, bios->legacy.i2c_indices.tv, - all_heads, 0); + all_heads, DCB_OUTPUT_A); else if (bios->tmds.output0_script_ptr || bios->tmds.output1_script_ptr) fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS, bios->legacy.i2c_indices.panel, - all_heads, 1); + all_heads, DCB_OUTPUT_B); } static int -- cgit 1.2.3-korg From b6976f323a8687cc0d55bc92c2086fd934324ed5 Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 15 Apr 2024 15:48:21 +0200 Subject: drm/ttm: stop pooling cached NUMA pages v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We only pool write combined and uncached allocations because they require extra overhead on allocation and release. If we also pool cached NUMA it not only means some extra unnecessary overhead, but also that under memory pressure it can happen that pages from the wrong NUMA node enters the pool and are re-used over and over again. This can lead to performance reduction after running into memory pressure. v2: restructure and cleanup the code a bit from the internal hack to test this. Signed-off-by: Christian König Fixes: 4482d3c94d7f ("drm/ttm: add NUMA node id to the pool") CC: stable@vger.kernel.org Reviewed-by: Felix Kuehling Link: https://patchwork.freedesktop.org/patch/msgid/20240415134821.1919-1-christian.koenig@amd.com --- drivers/gpu/drm/ttm/ttm_pool.c | 38 ++++++++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 112438d965ffb..6e1fd6985ffcb 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -288,17 +288,23 @@ static struct ttm_pool_type *ttm_pool_select_type(struct ttm_pool *pool, enum ttm_caching caching, unsigned int order) { - if (pool->use_dma_alloc || pool->nid != NUMA_NO_NODE) + if (pool->use_dma_alloc) return &pool->caching[caching].orders[order]; #ifdef CONFIG_X86 switch (caching) { case ttm_write_combined: + if (pool->nid != NUMA_NO_NODE) + return &pool->caching[caching].orders[order]; + if (pool->use_dma32) return &global_dma32_write_combined[order]; return &global_write_combined[order]; case ttm_uncached: + if (pool->nid != NUMA_NO_NODE) + return &pool->caching[caching].orders[order]; + if (pool->use_dma32) return &global_dma32_uncached[order]; @@ -566,11 +572,17 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev, pool->use_dma_alloc = use_dma_alloc; pool->use_dma32 = use_dma32; - if (use_dma_alloc || nid != NUMA_NO_NODE) { - for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) - for (j = 0; j < NR_PAGE_ORDERS; ++j) - ttm_pool_type_init(&pool->caching[i].orders[j], - pool, i, j); + for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) { + for (j = 0; j < NR_PAGE_ORDERS; ++j) { + struct ttm_pool_type *pt; + + /* Initialize only pool types which are actually used */ + pt = ttm_pool_select_type(pool, i, j); + if (pt != &pool->caching[i].orders[j]) + continue; + + ttm_pool_type_init(pt, pool, i, j); + } } } EXPORT_SYMBOL(ttm_pool_init); @@ -599,10 +611,16 @@ void ttm_pool_fini(struct ttm_pool *pool) { unsigned int i, j; - if (pool->use_dma_alloc || pool->nid != NUMA_NO_NODE) { - for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) - for (j = 0; j < NR_PAGE_ORDERS; ++j) - ttm_pool_type_fini(&pool->caching[i].orders[j]); + for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) { + for (j = 0; j < NR_PAGE_ORDERS; ++j) { + struct ttm_pool_type *pt; + + pt = ttm_pool_select_type(pool, i, j); + if (pt != &pool->caching[i].orders[j]) + continue; + + ttm_pool_type_fini(pt); + } } /* We removed the pool types from the LRU, but we need to also make sure -- cgit 1.2.3-korg From b32233accefff1338806f064fb9b62cf5bc0609f Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Thu, 11 Apr 2024 22:55:09 -0400 Subject: drm/vmwgfx: Fix prime import/export vmwgfx never supported prime import of external buffers. Furthermore the driver exposes two different objects to userspace: vmw_surface's and gem buffers but prime import/export only worked with vmw_surfaces. Because gem buffers are used through the dumb_buffer interface this meant that the driver created buffers couldn't have been prime exported or imported. Fix prime import/export. Makes IGT's kms_prime pass. Signed-off-by: Zack Rusin Fixes: 8afa13a0583f ("drm/vmwgfx: Implement DRIVER_GEM") Cc: # v6.6+ Reviewed-by: Martin Krastev Link: https://patchwork.freedesktop.org/patch/msgid/20240412025511.78553-4-zack.rusin@broadcom.com --- drivers/gpu/drm/vmwgfx/vmwgfx_blit.c | 35 ++++++++++++++++++++++-- drivers/gpu/drm/vmwgfx/vmwgfx_bo.c | 7 +++-- drivers/gpu/drm/vmwgfx/vmwgfx_bo.h | 2 ++ drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 1 + drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 3 ++ drivers/gpu/drm/vmwgfx/vmwgfx_gem.c | 32 ++++++++++++++++++++++ drivers/gpu/drm/vmwgfx/vmwgfx_prime.c | 15 ++++++++-- drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c | 44 ++++++++++++++++++++---------- 8 files changed, 117 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_blit.c b/drivers/gpu/drm/vmwgfx/vmwgfx_blit.c index c52c7bf1485b1..717d624e9a052 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_blit.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_blit.c @@ -456,8 +456,10 @@ int vmw_bo_cpu_blit(struct ttm_buffer_object *dst, .no_wait_gpu = false }; u32 j, initial_line = dst_offset / dst_stride; - struct vmw_bo_blit_line_data d; + struct vmw_bo_blit_line_data d = {0}; int ret = 0; + struct page **dst_pages = NULL; + struct page **src_pages = NULL; /* Buffer objects need to be either pinned or reserved: */ if (!(dst->pin_count)) @@ -477,12 +479,35 @@ int vmw_bo_cpu_blit(struct ttm_buffer_object *dst, return ret; } + if (!src->ttm->pages && src->ttm->sg) { + src_pages = kvmalloc_array(src->ttm->num_pages, + sizeof(struct page *), GFP_KERNEL); + if (!src_pages) + return -ENOMEM; + ret = drm_prime_sg_to_page_array(src->ttm->sg, src_pages, + src->ttm->num_pages); + if (ret) + goto out; + } + if (!dst->ttm->pages && dst->ttm->sg) { + dst_pages = kvmalloc_array(dst->ttm->num_pages, + sizeof(struct page *), GFP_KERNEL); + if (!dst_pages) { + ret = -ENOMEM; + goto out; + } + ret = drm_prime_sg_to_page_array(dst->ttm->sg, dst_pages, + dst->ttm->num_pages); + if (ret) + goto out; + } + d.mapped_dst = 0; d.mapped_src = 0; d.dst_addr = NULL; d.src_addr = NULL; - d.dst_pages = dst->ttm->pages; - d.src_pages = src->ttm->pages; + d.dst_pages = dst->ttm->pages ? dst->ttm->pages : dst_pages; + d.src_pages = src->ttm->pages ? src->ttm->pages : src_pages; d.dst_num_pages = PFN_UP(dst->resource->size); d.src_num_pages = PFN_UP(src->resource->size); d.dst_prot = ttm_io_prot(dst, dst->resource, PAGE_KERNEL); @@ -504,6 +529,10 @@ out: kunmap_atomic(d.src_addr); if (d.dst_addr) kunmap_atomic(d.dst_addr); + if (src_pages) + kvfree(src_pages); + if (dst_pages) + kvfree(dst_pages); return ret; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c index bfd41ce3c8f4f..e5eb21a471a60 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c @@ -377,7 +377,8 @@ static int vmw_bo_init(struct vmw_private *dev_priv, { struct ttm_operation_ctx ctx = { .interruptible = params->bo_type != ttm_bo_type_kernel, - .no_wait_gpu = false + .no_wait_gpu = false, + .resv = params->resv, }; struct ttm_device *bdev = &dev_priv->bdev; struct drm_device *vdev = &dev_priv->drm; @@ -394,8 +395,8 @@ static int vmw_bo_init(struct vmw_private *dev_priv, vmw_bo_placement_set(vmw_bo, params->domain, params->busy_domain); ret = ttm_bo_init_reserved(bdev, &vmw_bo->tbo, params->bo_type, - &vmw_bo->placement, 0, &ctx, NULL, - NULL, destroy); + &vmw_bo->placement, 0, &ctx, + params->sg, params->resv, destroy); if (unlikely(ret)) return ret; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h index 0d496dc9c6af7..f349642e6190d 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h @@ -55,6 +55,8 @@ struct vmw_bo_params { enum ttm_bo_type bo_type; size_t size; bool pin; + struct dma_resv *resv; + struct sg_table *sg; }; /** diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 0a304706e0132..58fb40c93100a 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -1628,6 +1628,7 @@ static const struct drm_driver driver = { .prime_fd_to_handle = vmw_prime_fd_to_handle, .prime_handle_to_fd = vmw_prime_handle_to_fd, + .gem_prime_import_sg_table = vmw_prime_import_sg_table, .fops = &vmwgfx_driver_fops, .name = VMWGFX_DRIVER_NAME, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 12efecc17df66..b019a1a1787af 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -1130,6 +1130,9 @@ extern int vmw_prime_handle_to_fd(struct drm_device *dev, struct drm_file *file_priv, uint32_t handle, uint32_t flags, int *prime_fd); +struct drm_gem_object *vmw_prime_import_sg_table(struct drm_device *dev, + struct dma_buf_attachment *attach, + struct sg_table *table); /* * MemoryOBject management - vmwgfx_mob.c diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c index 12787bb9c111d..d6bcaf078b1f4 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c @@ -149,6 +149,38 @@ out_no_bo: return ret; } +struct drm_gem_object *vmw_prime_import_sg_table(struct drm_device *dev, + struct dma_buf_attachment *attach, + struct sg_table *table) +{ + int ret; + struct vmw_private *dev_priv = vmw_priv(dev); + struct drm_gem_object *gem = NULL; + struct vmw_bo *vbo; + struct vmw_bo_params params = { + .domain = (dev_priv->has_mob) ? VMW_BO_DOMAIN_SYS : VMW_BO_DOMAIN_VRAM, + .busy_domain = VMW_BO_DOMAIN_SYS, + .bo_type = ttm_bo_type_sg, + .size = attach->dmabuf->size, + .pin = false, + .resv = attach->dmabuf->resv, + .sg = table, + + }; + + dma_resv_lock(params.resv, NULL); + + ret = vmw_bo_create(dev_priv, ¶ms, &vbo); + if (ret != 0) + goto out_no_bo; + + vbo->tbo.base.funcs = &vmw_gem_object_funcs; + + gem = &vbo->tbo.base; +out_no_bo: + dma_resv_unlock(params.resv); + return gem; +} int vmw_gem_object_create_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c b/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c index 2d72a5ee7c0c7..c99cad4449915 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c @@ -75,8 +75,12 @@ int vmw_prime_fd_to_handle(struct drm_device *dev, int fd, u32 *handle) { struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile; + int ret = ttm_prime_fd_to_handle(tfile, fd, handle); - return ttm_prime_fd_to_handle(tfile, fd, handle); + if (ret) + ret = drm_gem_prime_fd_to_handle(dev, file_priv, fd, handle); + + return ret; } int vmw_prime_handle_to_fd(struct drm_device *dev, @@ -85,5 +89,12 @@ int vmw_prime_handle_to_fd(struct drm_device *dev, int *prime_fd) { struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile; - return ttm_prime_handle_to_fd(tfile, handle, flags, prime_fd); + int ret; + + if (handle > VMWGFX_NUM_MOB) + ret = ttm_prime_handle_to_fd(tfile, handle, flags, prime_fd); + else + ret = drm_gem_prime_handle_to_fd(dev, file_priv, handle, flags, prime_fd); + + return ret; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c index 4d23d0a70bcb7..621d98b376bbb 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c @@ -188,13 +188,18 @@ static int vmw_ttm_map_dma(struct vmw_ttm_tt *vmw_tt) switch (dev_priv->map_mode) { case vmw_dma_map_bind: case vmw_dma_map_populate: - vsgt->sgt = &vmw_tt->sgt; - ret = sg_alloc_table_from_pages_segment( - &vmw_tt->sgt, vsgt->pages, vsgt->num_pages, 0, - (unsigned long)vsgt->num_pages << PAGE_SHIFT, - dma_get_max_seg_size(dev_priv->drm.dev), GFP_KERNEL); - if (ret) - goto out_sg_alloc_fail; + if (vmw_tt->dma_ttm.page_flags & TTM_TT_FLAG_EXTERNAL) { + vsgt->sgt = vmw_tt->dma_ttm.sg; + } else { + vsgt->sgt = &vmw_tt->sgt; + ret = sg_alloc_table_from_pages_segment(&vmw_tt->sgt, + vsgt->pages, vsgt->num_pages, 0, + (unsigned long)vsgt->num_pages << PAGE_SHIFT, + dma_get_max_seg_size(dev_priv->drm.dev), + GFP_KERNEL); + if (ret) + goto out_sg_alloc_fail; + } ret = vmw_ttm_map_for_dma(vmw_tt); if (unlikely(ret != 0)) @@ -209,8 +214,9 @@ static int vmw_ttm_map_dma(struct vmw_ttm_tt *vmw_tt) return 0; out_map_fail: - sg_free_table(vmw_tt->vsgt.sgt); - vmw_tt->vsgt.sgt = NULL; + drm_warn(&dev_priv->drm, "VSG table map failed!"); + sg_free_table(vsgt->sgt); + vsgt->sgt = NULL; out_sg_alloc_fail: return ret; } @@ -356,15 +362,17 @@ static void vmw_ttm_destroy(struct ttm_device *bdev, struct ttm_tt *ttm) static int vmw_ttm_populate(struct ttm_device *bdev, struct ttm_tt *ttm, struct ttm_operation_ctx *ctx) { - int ret; + bool external = (ttm->page_flags & TTM_TT_FLAG_EXTERNAL) != 0; - /* TODO: maybe completely drop this ? */ if (ttm_tt_is_populated(ttm)) return 0; - ret = ttm_pool_alloc(&bdev->pool, ttm, ctx); + if (external && ttm->sg) + return drm_prime_sg_to_dma_addr_array(ttm->sg, + ttm->dma_address, + ttm->num_pages); - return ret; + return ttm_pool_alloc(&bdev->pool, ttm, ctx); } static void vmw_ttm_unpopulate(struct ttm_device *bdev, @@ -372,6 +380,10 @@ static void vmw_ttm_unpopulate(struct ttm_device *bdev, { struct vmw_ttm_tt *vmw_tt = container_of(ttm, struct vmw_ttm_tt, dma_ttm); + bool external = (ttm->page_flags & TTM_TT_FLAG_EXTERNAL) != 0; + + if (external) + return; vmw_ttm_unbind(bdev, ttm); @@ -390,6 +402,7 @@ static struct ttm_tt *vmw_ttm_tt_create(struct ttm_buffer_object *bo, { struct vmw_ttm_tt *vmw_be; int ret; + bool external = bo->type == ttm_bo_type_sg; vmw_be = kzalloc(sizeof(*vmw_be), GFP_KERNEL); if (!vmw_be) @@ -398,7 +411,10 @@ static struct ttm_tt *vmw_ttm_tt_create(struct ttm_buffer_object *bo, vmw_be->dev_priv = vmw_priv_from_ttm(bo->bdev); vmw_be->mob = NULL; - if (vmw_be->dev_priv->map_mode == vmw_dma_alloc_coherent) + if (external) + page_flags |= TTM_TT_FLAG_EXTERNAL | TTM_TT_FLAG_EXTERNAL_MAPPABLE; + + if (vmw_be->dev_priv->map_mode == vmw_dma_alloc_coherent || external) ret = ttm_sg_tt_init(&vmw_be->dma_ttm, bo, page_flags, ttm_cached); else -- cgit 1.2.3-korg From a60ccade88f926e871a57176e86a34bbf0db0098 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Thu, 11 Apr 2024 22:55:10 -0400 Subject: drm/vmwgfx: Fix crtc's atomic check conditional The conditional was supposed to prevent enabling of a crtc state without a set primary plane. Accidently it also prevented disabling crtc state with a set primary plane. Neither is correct. Fix the conditional and just driver-warn when a crtc state has been enabled without a primary plane which will help debug broken userspace. Fixes IGT's kms_atomic_interruptible and kms_atomic_transition tests. Signed-off-by: Zack Rusin Fixes: 06ec41909e31 ("drm/vmwgfx: Add and connect CRTC helper functions") Cc: Broadcom internal kernel review list Cc: dri-devel@lists.freedesktop.org Cc: # v4.12+ Reviewed-by: Ian Forbes Reviewed-by: Martin Krastev Link: https://patchwork.freedesktop.org/patch/msgid/20240412025511.78553-5-zack.rusin@broadcom.com --- drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index cd4925346ed45..84ae4e10a2ebe 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -933,6 +933,7 @@ int vmw_du_cursor_plane_atomic_check(struct drm_plane *plane, int vmw_du_crtc_atomic_check(struct drm_crtc *crtc, struct drm_atomic_state *state) { + struct vmw_private *vmw = vmw_priv(crtc->dev); struct drm_crtc_state *new_state = drm_atomic_get_new_crtc_state(state, crtc); struct vmw_display_unit *du = vmw_crtc_to_du(new_state->crtc); @@ -940,9 +941,13 @@ int vmw_du_crtc_atomic_check(struct drm_crtc *crtc, bool has_primary = new_state->plane_mask & drm_plane_mask(crtc->primary); - /* We always want to have an active plane with an active CRTC */ - if (has_primary != new_state->enable) - return -EINVAL; + /* + * This is fine in general, but broken userspace might expect + * some actual rendering so give a clue as why it's blank. + */ + if (new_state->enable && !has_primary) + drm_dbg_driver(&vmw->drm, + "CRTC without a primary plane will be blank.\n"); if (new_state->connector_mask != connector_mask && -- cgit 1.2.3-korg From d4c972bff3129a9dd4c22a3999fd8eba1a81531a Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Thu, 11 Apr 2024 22:55:11 -0400 Subject: drm/vmwgfx: Sort primary plane formats by order of preference The table of primary plane formats wasn't sorted at all, leading to applications picking our least desirable formats by defaults. Sort the primary plane formats according to our order of preference. Nice side-effect of this change is that it makes IGT's kms_atomic plane-invalid-params pass because the test picks the first format which for vmwgfx was DRM_FORMAT_XRGB1555 and uses fb's with odd sizes which make Pixman, which IGT depends on assert due to the fact that our 16bpp formats aren't 32 bit aligned like Pixman requires all formats to be. Signed-off-by: Zack Rusin Fixes: 36cc79bc9077 ("drm/vmwgfx: Add universal plane support") Cc: Broadcom internal kernel review list Cc: dri-devel@lists.freedesktop.org Cc: # v4.12+ Acked-by: Pekka Paalanen Link: https://patchwork.freedesktop.org/patch/msgid/20240412025511.78553-6-zack.rusin@broadcom.com --- drivers/gpu/drm/vmwgfx/vmwgfx_kms.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h index a94947b588e85..19a843da87b78 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h @@ -243,10 +243,10 @@ struct vmw_framebuffer_bo { static const uint32_t __maybe_unused vmw_primary_plane_formats[] = { - DRM_FORMAT_XRGB1555, - DRM_FORMAT_RGB565, DRM_FORMAT_XRGB8888, DRM_FORMAT_ARGB8888, + DRM_FORMAT_RGB565, + DRM_FORMAT_XRGB1555, }; static const uint32_t __maybe_unused vmw_cursor_plane_formats[] = { -- cgit 1.2.3-korg From 35f4f8c9fc972248055096d63b782060e473311b Mon Sep 17 00:00:00 2001 From: Maíra Canal Date: Wed, 3 Apr 2024 17:24:50 -0300 Subject: drm/v3d: Don't increment `enabled_ns` twice MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The commit 509433d8146c ("drm/v3d: Expose the total GPU usage stats on sysfs") introduced the calculation of global GPU stats. For the regards, it used the already existing infrastructure provided by commit 09a93cc4f7d1 ("drm/v3d: Implement show_fdinfo() callback for GPU usage stats"). While adding global GPU stats calculation ability, the author forgot to delete the existing one. Currently, the value of `enabled_ns` is incremented twice by the end of the job, when it should be added just once. Therefore, delete the leftovers from commit 509433d8146c ("drm/v3d: Expose the total GPU usage stats on sysfs"). Fixes: 509433d8146c ("drm/v3d: Expose the total GPU usage stats on sysfs") Reported-by: Tvrtko Ursulin Signed-off-by: Maíra Canal Reviewed-by: Tvrtko Ursulin Reviewed-by: Jose Maria Casanova Crespo Link: https://patchwork.freedesktop.org/patch/msgid/20240403203517.731876-2-mcanal@igalia.com --- drivers/gpu/drm/v3d/v3d_irq.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c index 2e04f6cb661e4..ce6b2fb341d1f 100644 --- a/drivers/gpu/drm/v3d/v3d_irq.c +++ b/drivers/gpu/drm/v3d/v3d_irq.c @@ -105,7 +105,6 @@ v3d_irq(int irq, void *arg) struct v3d_file_priv *file = v3d->bin_job->base.file->driver_priv; u64 runtime = local_clock() - file->start_ns[V3D_BIN]; - file->enabled_ns[V3D_BIN] += local_clock() - file->start_ns[V3D_BIN]; file->jobs_sent[V3D_BIN]++; v3d->queue[V3D_BIN].jobs_sent++; @@ -126,7 +125,6 @@ v3d_irq(int irq, void *arg) struct v3d_file_priv *file = v3d->render_job->base.file->driver_priv; u64 runtime = local_clock() - file->start_ns[V3D_RENDER]; - file->enabled_ns[V3D_RENDER] += local_clock() - file->start_ns[V3D_RENDER]; file->jobs_sent[V3D_RENDER]++; v3d->queue[V3D_RENDER].jobs_sent++; @@ -147,7 +145,6 @@ v3d_irq(int irq, void *arg) struct v3d_file_priv *file = v3d->csd_job->base.file->driver_priv; u64 runtime = local_clock() - file->start_ns[V3D_CSD]; - file->enabled_ns[V3D_CSD] += local_clock() - file->start_ns[V3D_CSD]; file->jobs_sent[V3D_CSD]++; v3d->queue[V3D_CSD].jobs_sent++; @@ -195,7 +192,6 @@ v3d_hub_irq(int irq, void *arg) struct v3d_file_priv *file = v3d->tfu_job->base.file->driver_priv; u64 runtime = local_clock() - file->start_ns[V3D_TFU]; - file->enabled_ns[V3D_TFU] += local_clock() - file->start_ns[V3D_TFU]; file->jobs_sent[V3D_TFU]++; v3d->queue[V3D_TFU].jobs_sent++; -- cgit 1.2.3-korg From ee7e980dc7c9f22c142807c5f582a6524138f57a Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Thu, 4 Apr 2024 19:35:53 -0400 Subject: drm/nouveau/kms/nv50-: Disable AUX bus for disconnected DP ports GSP has its own state for keeping track of whether or not a given display connector is plugged in or not, and enforces this state on the driver. In particular, AUX transactions on a DisplayPort connector which GSP says is disconnected can never succeed - and can in some cases even cause unexpected timeouts, which can trickle up to cause other problems. A good example of this is runtime power management: where we can actually get stuck trying to resume the GPU if a userspace application like fwupd tries accessing a drm_aux_dev for a disconnected port. This was an issue I hit a few times with my Slimbook Executive 16 - where trying to offload something to the discrete GPU would wake it up, and then potentially cause it to timeout as fwupd tried to immediately access the dp_aux_dev nodes for nouveau. Likewise: we don't really have any cases I know of where we'd want to ignore this state and try an aux transaction anyway - and failing pointless aux transactions immediately can even speed things up. So - let's start enabling/disabling the aux bus in nouveau_dp_detect() to fix this. We enable the aux bus during connector probing, and leave it enabled if we discover something is actually on the connector. Otherwise, we just shut it off. This should fix some people's runtime PM issues (like myself), and also get rid of quite of a lot of GSP error spam in dmesg. Signed-off-by: Lyude Paul Reviewed-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20240404233736.7946-2-lyude@redhat.com (cherry picked from commit 9c8a10bf1f3467b2c16f6848249bdc7692ace825) --- drivers/gpu/drm/nouveau/nouveau_dp.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nouveau_dp.c b/drivers/gpu/drm/nouveau/nouveau_dp.c index 7de7707ec6a89..3f6ccce20ea02 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dp.c +++ b/drivers/gpu/drm/nouveau/nouveau_dp.c @@ -232,6 +232,9 @@ nouveau_dp_detect(struct nouveau_connector *nv_connector, dpcd[DP_DPCD_REV] != 0) return NOUVEAU_DP_SST; + // Ensure that the aux bus is enabled for probing + drm_dp_dpcd_set_powered(&nv_connector->aux, true); + mutex_lock(&nv_encoder->dp.hpd_irq_lock); if (mstm) { /* If we're not ready to handle MST state changes yet, just @@ -293,6 +296,13 @@ out: if (mstm && !mstm->suspended && ret != NOUVEAU_DP_MST) nv50_mstm_remove(mstm); + /* GSP doesn't like when we try to do aux transactions on a port it considers disconnected, + * and since we don't really have a usecase for that anyway - just disable the aux bus here + * if we've decided the connector is disconnected + */ + if (ret == NOUVEAU_DP_NONE) + drm_dp_dpcd_set_powered(&nv_connector->aux, false); + mutex_unlock(&nv_encoder->dp.hpd_irq_lock); return ret; } -- cgit 1.2.3-korg From bf52d7f9b2067f02efe7e32697479097aba4a055 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Thu, 4 Apr 2024 19:35:54 -0400 Subject: drm/nouveau/dp: Don't probe eDP ports twice harder I didn't pay close enough attention the last time I tried to fix this problem - while we currently do correctly take care to make sure we don't probe a connected eDP port more then once, we don't do the same thing for eDP ports we found to be disconnected. So, fix this and make sure we only ever probe eDP ports once and then leave them at that connector state forever (since without HPD, it's not going to change on its own anyway). This should get rid of the last few GSP errors getting spit out during runtime suspend and resume on some machines, as we tried to reprobe eDP ports in response to ACPI hotplug probe events. Signed-off-by: Lyude Paul Reviewed-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20240404233736.7946-3-lyude@redhat.com (cherry picked from commit fe6660b661c3397af0867d5d098f5b26581f1290) --- drivers/gpu/drm/nouveau/nouveau_dp.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_dp.c b/drivers/gpu/drm/nouveau/nouveau_dp.c index 3f6ccce20ea02..a72c45809484a 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dp.c +++ b/drivers/gpu/drm/nouveau/nouveau_dp.c @@ -225,12 +225,15 @@ nouveau_dp_detect(struct nouveau_connector *nv_connector, u8 *dpcd = nv_encoder->dp.dpcd; int ret = NOUVEAU_DP_NONE, hpd; - /* If we've already read the DPCD on an eDP device, we don't need to - * reread it as it won't change + /* eDP ports don't support hotplugging - so there's no point in probing eDP ports unless we + * haven't probed them once before. */ - if (connector->connector_type == DRM_MODE_CONNECTOR_eDP && - dpcd[DP_DPCD_REV] != 0) - return NOUVEAU_DP_SST; + if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) { + if (connector->status == connector_status_connected) + return NOUVEAU_DP_SST; + else if (connector->status == connector_status_disconnected) + return NOUVEAU_DP_NONE; + } // Ensure that the aux bus is enabled for probing drm_dp_dpcd_set_powered(&nv_connector->aux, true); -- cgit 1.2.3-korg From 9e4d3f4f34455abbaa9930bf6b7575a5cd081496 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 4 Apr 2024 13:07:59 +0300 Subject: drm/panel: visionox-rm69299: don't unregister DSI device The DSI device for the panel was registered by the DSI host, so it is an error to unregister it from the panel driver. Drop the call to mipi_dsi_device_unregister(). Fixes: c7f66d32dd43 ("drm/panel: add support for rm69299 visionox panel") Reviewed-by: Jessica Zhang Signed-off-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20240404-drop-panel-unregister-v1-1-9f56953c5fb9@linaro.org --- drivers/gpu/drm/panel/panel-visionox-rm69299.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-visionox-rm69299.c b/drivers/gpu/drm/panel/panel-visionox-rm69299.c index 775144695283f..b15ca56a09a74 100644 --- a/drivers/gpu/drm/panel/panel-visionox-rm69299.c +++ b/drivers/gpu/drm/panel/panel-visionox-rm69299.c @@ -253,8 +253,6 @@ static void visionox_rm69299_remove(struct mipi_dsi_device *dsi) struct visionox_rm69299 *ctx = mipi_dsi_get_drvdata(dsi); mipi_dsi_detach(ctx->dsi); - mipi_dsi_device_unregister(ctx->dsi); - drm_panel_remove(&ctx->panel); } -- cgit 1.2.3-korg From 941c0bdbc176df825adf77052263b2d63db6fef7 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 4 Apr 2024 13:08:00 +0300 Subject: drm/panel: novatek-nt36682e: don't unregister DSI device The DSI device for the panel was registered by the DSI host, so it is an error to unregister it from the panel driver. Drop the call to mipi_dsi_device_unregister(). Fixes: ea4f9975625a ("drm/panel: Add support for Novatek NT36672E panel driver") Reviewed-by: Jessica Zhang Signed-off-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20240404-drop-panel-unregister-v1-2-9f56953c5fb9@linaro.org --- drivers/gpu/drm/panel/panel-novatek-nt36672e.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-novatek-nt36672e.c b/drivers/gpu/drm/panel/panel-novatek-nt36672e.c index cb7406d744669..c39fe0fc5d69c 100644 --- a/drivers/gpu/drm/panel/panel-novatek-nt36672e.c +++ b/drivers/gpu/drm/panel/panel-novatek-nt36672e.c @@ -614,8 +614,6 @@ static void nt36672e_panel_remove(struct mipi_dsi_device *dsi) struct nt36672e_panel *ctx = mipi_dsi_get_drvdata(dsi); mipi_dsi_detach(ctx->dsi); - mipi_dsi_device_unregister(ctx->dsi); - drm_panel_remove(&ctx->panel); } -- cgit 1.2.3-korg