diff options
Diffstat (limited to 'drivers/gpu/arm/mali/csf/mali_kbase_csf_scheduler.c')
-rw-r--r-- | drivers/gpu/arm/mali/csf/mali_kbase_csf_scheduler.c | 1043 |
1 files changed, 726 insertions, 317 deletions
diff --git a/drivers/gpu/arm/mali/csf/mali_kbase_csf_scheduler.c b/drivers/gpu/arm/mali/csf/mali_kbase_csf_scheduler.c index 91c5a47e6ef91a..8d4ac71e10954a 100644 --- a/drivers/gpu/arm/mali/csf/mali_kbase_csf_scheduler.c +++ b/drivers/gpu/arm/mali/csf/mali_kbase_csf_scheduler.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -19,6 +19,8 @@ * */ +#include <linux/kthread.h> + #include <mali_kbase.h> #include "mali_kbase_config_defaults.h" #include <mali_kbase_ctx_sched.h> @@ -32,6 +34,12 @@ #include "uapi/mali_base_kernel.h" #include <mali_kbase_hwaccess_time.h> #include "mali_kbase_csf_tiler_heap_reclaim.h" +#include "mali_kbase_csf_mcu_shared_reg.h" +#include "version_compat_defs.h" +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) +#include <mali_kbase_gpu_metrics.h> +#include <csf/mali_kbase_csf_trace_buffer.h> +#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */ /* Value to indicate that a queue group is not groups_to_schedule list */ #define KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID (U32_MAX) @@ -57,6 +65,9 @@ /* Time to wait for completion of PING req before considering MCU as hung */ #define FW_PING_AFTER_ERROR_TIMEOUT_MS (10) +/* Explicitly defining this blocked_reason code as SB_WAIT for clarity */ +#define CS_STATUS_BLOCKED_ON_SB_WAIT CS_STATUS_BLOCKED_REASON_REASON_WAIT + static int scheduler_group_schedule(struct kbase_queue_group *group); static void remove_group_from_idle_wait(struct kbase_queue_group *const group); static @@ -78,6 +89,222 @@ static bool queue_group_scheduled_locked(struct kbase_queue_group *group); #define kctx_as_enabled(kctx) (!kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT)) +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) +/** + * gpu_metrics_ctx_init() - Take a reference on GPU metrics context if it exists, + * otherwise allocate and initialise one. + * + * @kctx: Pointer to the Kbase context. + * + * The GPU metrics context represents an "Application" for the purposes of GPU metrics + * reporting. There may be multiple kbase_contexts contributing data to a single GPU + * metrics context. + * This function takes a reference on GPU metrics context if it already exists + * corresponding to the Application that is creating the Kbase context, otherwise + * memory is allocated for it and initialised. + * + * Return: 0 on success, or negative on failure. + */ +static inline int gpu_metrics_ctx_init(struct kbase_context *kctx) +{ + struct kbase_gpu_metrics_ctx *gpu_metrics_ctx; + struct kbase_device *kbdev = kctx->kbdev; + int ret = 0; + + const struct cred *cred = get_current_cred(); + const unsigned int aid = cred->euid.val; + + put_cred(cred); + + /* Return early if this is not a Userspace created context */ + if (unlikely(!kctx->kfile)) + return 0; + + /* Serialize against the other threads trying to create/destroy Kbase contexts. */ + mutex_lock(&kbdev->kctx_list_lock); + mutex_lock(&kbdev->csf.scheduler.lock); + gpu_metrics_ctx = kbase_gpu_metrics_ctx_get(kbdev, aid); + mutex_unlock(&kbdev->csf.scheduler.lock); + + if (!gpu_metrics_ctx) { + gpu_metrics_ctx = kmalloc(sizeof(*gpu_metrics_ctx), GFP_KERNEL); + + if (gpu_metrics_ctx) { + mutex_lock(&kbdev->csf.scheduler.lock); + kbase_gpu_metrics_ctx_init(kbdev, gpu_metrics_ctx, aid); + mutex_unlock(&kbdev->csf.scheduler.lock); + } else { + dev_err(kbdev->dev, "Allocation for gpu_metrics_ctx failed"); + ret = -ENOMEM; + } + } + + kctx->gpu_metrics_ctx = gpu_metrics_ctx; + mutex_unlock(&kbdev->kctx_list_lock); + + return ret; +} + +/** + * gpu_metrics_ctx_term() - Drop a reference on a GPU metrics context and free it + * if the refcount becomes 0. + * + * @kctx: Pointer to the Kbase context. + */ +static inline void gpu_metrics_ctx_term(struct kbase_context *kctx) +{ + /* Return early if this is not a Userspace created context */ + if (unlikely(!kctx->kfile)) + return; + + /* Serialize against the other threads trying to create/destroy Kbase contexts. */ + mutex_lock(&kctx->kbdev->kctx_list_lock); + mutex_lock(&kctx->kbdev->csf.scheduler.lock); + kbase_gpu_metrics_ctx_put(kctx->kbdev, kctx->gpu_metrics_ctx); + mutex_unlock(&kctx->kbdev->csf.scheduler.lock); + mutex_unlock(&kctx->kbdev->kctx_list_lock); +} + +/** + * struct gpu_metrics_event - A GPU metrics event recorded in trace buffer. + * + * @csg_slot_act: The 32bit data consisting of a GPU metrics event. + * 5 bits[4:0] represents CSG slot number. + * 1 bit [5] represents the transition of the CSG group on the slot. + * '1' means idle->active whilst '0' does active->idle. + * @timestamp: 64bit timestamp consisting of a GPU metrics event. + * + * Note: It's packed and word-aligned as agreed layout with firmware. + */ +struct gpu_metrics_event { + u32 csg_slot_act; + u64 timestamp; +} __packed __aligned(4); +#define GPU_METRICS_EVENT_SIZE sizeof(struct gpu_metrics_event) + +#define GPU_METRICS_ACT_SHIFT 5 +#define GPU_METRICS_ACT_MASK (0x1 << GPU_METRICS_ACT_SHIFT) +#define GPU_METRICS_ACT_GET(val) (((val)&GPU_METRICS_ACT_MASK) >> GPU_METRICS_ACT_SHIFT) + +#define GPU_METRICS_CSG_MASK 0x1f +#define GPU_METRICS_CSG_GET(val) ((val)&GPU_METRICS_CSG_MASK) + +/** + * gpu_metrics_read_event() - Read a GPU metrics trace from trace buffer + * + * @kbdev: Pointer to the device + * @kctx: Kcontext that is derived from CSG slot field of a GPU metrics. + * @prev_act: Previous CSG activity transition in a GPU metrics. + * @cur_act: Current CSG activity transition in a GPU metrics. + * @ts: CSG activity transition timestamp in a GPU metrics. + * + * This function reads firmware trace buffer, named 'gpu_metrics' and + * parse one 12-byte data packet into following information. + * - The number of CSG slot on which CSG was transitioned to active or idle. + * - Activity transition (1: idle->active, 0: active->idle). + * - Timestamp in nanoseconds when the transition occurred. + * + * Return: true on success. + */ +static bool gpu_metrics_read_event(struct kbase_device *kbdev, struct kbase_context **kctx, + bool *prev_act, bool *cur_act, uint64_t *ts) +{ + struct firmware_trace_buffer *tb = kbdev->csf.scheduler.gpu_metrics_tb; + struct gpu_metrics_event e; + + if (kbase_csf_firmware_trace_buffer_read_data(tb, (u8 *)&e, GPU_METRICS_EVENT_SIZE) == + GPU_METRICS_EVENT_SIZE) { + const u8 slot = GPU_METRICS_CSG_GET(e.csg_slot_act); + struct kbase_queue_group *group = + kbdev->csf.scheduler.csg_slots[slot].resident_group; + + if (unlikely(!group)) { + dev_err(kbdev->dev, "failed to find CSG group from CSG slot(%u)", slot); + return false; + } + + *cur_act = GPU_METRICS_ACT_GET(e.csg_slot_act); + *ts = kbase_backend_time_convert_gpu_to_cpu(kbdev, e.timestamp); + *kctx = group->kctx; + + *prev_act = group->prev_act; + group->prev_act = *cur_act; + + return true; + } + + dev_err(kbdev->dev, "failed to read a GPU metrics from trace buffer"); + + return false; +} + +/** + * emit_gpu_metrics_to_frontend() - Emit GPU metrics events to the frontend. + * + * @kbdev: Pointer to the device + * + * This function must be called to emit GPU metrics data to the + * frontend whenever needed. + * Calls to this function will be serialized by scheduler lock. + * + * Kbase reports invalid activity traces when detected. + */ +static void emit_gpu_metrics_to_frontend(struct kbase_device *kbdev) +{ + u64 system_time = 0; + u64 ts_before_drain; + u64 ts = 0; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + +#if IS_ENABLED(CONFIG_MALI_NO_MALI) + return; +#endif + + if (WARN_ON_ONCE(kbdev->csf.scheduler.state == SCHED_SUSPENDED)) + return; + + kbase_backend_get_gpu_time_norequest(kbdev, NULL, &system_time, NULL); + ts_before_drain = kbase_backend_time_convert_gpu_to_cpu(kbdev, system_time); + + while (!kbase_csf_firmware_trace_buffer_is_empty(kbdev->csf.scheduler.gpu_metrics_tb)) { + struct kbase_context *kctx; + bool prev_act; + bool cur_act; + + if (gpu_metrics_read_event(kbdev, &kctx, &prev_act, &cur_act, &ts)) { + if (prev_act == cur_act) { + /* Error handling + * + * In case of active CSG, Kbase will try to recover the + * lost event by ending previously active event and + * starting a new one. + * + * In case of inactive CSG, the event is drop as Kbase + * cannot recover. + */ + dev_err(kbdev->dev, + "Invalid activity state transition. (prev_act = %u, cur_act = %u)", + prev_act, cur_act); + if (cur_act) { + kbase_gpu_metrics_ctx_end_activity(kctx, ts); + kbase_gpu_metrics_ctx_start_activity(kctx, ts); + } + } else { + /* Normal handling */ + if (cur_act) + kbase_gpu_metrics_ctx_start_activity(kctx, ts); + else + kbase_gpu_metrics_ctx_end_activity(kctx, ts); + } + } else + break; + } + + kbase_gpu_metrics_emit_tracepoint(kbdev, ts >= ts_before_drain ? ts + 1 : ts_before_drain); +} +#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */ + /** * wait_for_dump_complete_on_group_deschedule() - Wait for dump on fault and * scheduling tick/tock to complete before the group deschedule. @@ -300,80 +527,20 @@ out: * * @timer: Pointer to the scheduling tick hrtimer * - * This function will enqueue the scheduling tick work item for immediate - * execution, if it has not been queued already. + * This function will wake up kbase_csf_scheduler_kthread() to process a + * pending scheduling tick. It will be restarted manually once a tick has been + * processed if appropriate. * * Return: enum value to indicate that timer should not be restarted. */ static enum hrtimer_restart tick_timer_callback(struct hrtimer *timer) { - struct kbase_device *kbdev = container_of(timer, struct kbase_device, - csf.scheduler.tick_timer); - - kbase_csf_scheduler_tick_advance(kbdev); - return HRTIMER_NORESTART; -} - -/** - * start_tick_timer() - Start the scheduling tick hrtimer. - * - * @kbdev: Pointer to the device - * - * This function will start the scheduling tick hrtimer and is supposed to - * be called only from the tick work item function. The tick hrtimer should - * not be active already. - */ -static void start_tick_timer(struct kbase_device *kbdev) -{ - struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; - unsigned long flags; - - lockdep_assert_held(&scheduler->lock); - - spin_lock_irqsave(&scheduler->interrupt_lock, flags); - WARN_ON(scheduler->tick_timer_active); - if (likely(!work_pending(&scheduler->tick_work))) { - scheduler->tick_timer_active = true; - - hrtimer_start(&scheduler->tick_timer, - HR_TIMER_DELAY_MSEC(scheduler->csg_scheduling_period_ms), - HRTIMER_MODE_REL); - } - spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); -} - -/** - * cancel_tick_timer() - Cancel the scheduling tick hrtimer - * - * @kbdev: Pointer to the device - */ -static void cancel_tick_timer(struct kbase_device *kbdev) -{ - struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; - unsigned long flags; - - spin_lock_irqsave(&scheduler->interrupt_lock, flags); - scheduler->tick_timer_active = false; - spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); - hrtimer_cancel(&scheduler->tick_timer); -} - -/** - * enqueue_tick_work() - Enqueue the scheduling tick work item - * - * @kbdev: Pointer to the device - * - * This function will queue the scheduling tick work item for immediate - * execution. This shall only be called when both the tick hrtimer and tick - * work item are not active/pending. - */ -static void enqueue_tick_work(struct kbase_device *kbdev) -{ - struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; - - lockdep_assert_held(&scheduler->lock); + struct kbase_device *kbdev = + container_of(timer, struct kbase_device, csf.scheduler.tick_timer); kbase_csf_scheduler_invoke_tick(kbdev); + + return HRTIMER_NORESTART; } static void release_doorbell(struct kbase_device *kbdev, int doorbell_nr) @@ -518,8 +685,14 @@ static void update_on_slot_queues_offsets(struct kbase_device *kbdev) if (queue && queue->user_io_addr) { u64 const *const output_addr = - (u64 const *)(queue->user_io_addr + PAGE_SIZE); + (u64 const *)(queue->user_io_addr + + PAGE_SIZE / sizeof(u64)); + /* + * This 64-bit read will be atomic on a 64-bit kernel but may not + * be atomic on 32-bit kernels. Support for 32-bit kernels is + * limited to build-only. + */ queue->extract_ofs = output_addr[CS_EXTRACT_LO / sizeof(u64)]; } } @@ -553,7 +726,7 @@ void kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev) * updated whilst gpu_idle_worker() is executing. */ scheduler->fast_gpu_idle_handling = - (kbdev->csf.gpu_idle_hysteresis_ms == 0) || + (kbdev->csf.gpu_idle_hysteresis_ns == 0) || !kbase_csf_scheduler_all_csgs_idle(kbdev); /* The GPU idle worker relies on update_on_slot_queues_offsets() to have @@ -567,8 +740,8 @@ void kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev) update_on_slot_queues_offsets(kbdev); } } else { - /* Advance the scheduling tick to get the non-idle suspended groups loaded soon */ - kbase_csf_scheduler_tick_advance_nolock(kbdev); + /* Invoke the scheduling tick to get the non-idle suspended groups loaded soon */ + kbase_csf_scheduler_invoke_tick(kbdev); } } @@ -658,6 +831,14 @@ static bool queue_group_scheduled_locked(struct kbase_queue_group *group) return queue_group_scheduled(group); } +static void update_idle_protm_group_state_to_runnable(struct kbase_queue_group *group) +{ + lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock); + + group->run_state = KBASE_CSF_GROUP_RUNNABLE; + KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_RUNNABLE, group, group->run_state); +} + /** * scheduler_protm_wait_quit() - Wait for GPU to exit protected mode. * @@ -741,24 +922,6 @@ static void scheduler_force_protm_exit(struct kbase_device *kbdev) } /** - * scheduler_timer_is_enabled_nolock() - Check if the scheduler wakes up - * automatically for periodic tasks. - * - * @kbdev: Pointer to the device - * - * This is a variant of kbase_csf_scheduler_timer_is_enabled() that assumes the - * CSF scheduler lock to already have been held. - * - * Return: true if the scheduler is configured to wake up periodically - */ -static bool scheduler_timer_is_enabled_nolock(struct kbase_device *kbdev) -{ - lockdep_assert_held(&kbdev->csf.scheduler.lock); - - return kbdev->csf.scheduler.timer_enabled; -} - -/** * scheduler_pm_active_handle_suspend() - Acquire the PM reference count for * Scheduler * @@ -1450,6 +1613,7 @@ int kbase_csf_scheduler_queue_stop(struct kbase_queue *queue) err = sched_halt_stream(queue); unassign_user_doorbell_from_queue(kbdev, queue); + kbase_csf_mcu_shared_drop_stopped_queue(kbdev, queue); } mutex_unlock(&kbdev->csf.scheduler.lock); @@ -1461,9 +1625,9 @@ static void update_hw_active(struct kbase_queue *queue, bool active) { #if IS_ENABLED(CONFIG_MALI_NO_MALI) if (queue && queue->enabled) { - u32 *output_addr = (u32 *)(queue->user_io_addr + PAGE_SIZE); + u64 *output_addr = queue->user_io_addr + PAGE_SIZE / sizeof(u64); - output_addr[CS_ACTIVE / sizeof(u32)] = active; + output_addr[CS_ACTIVE / sizeof(*output_addr)] = active; } #else CSTD_UNUSED(queue); @@ -1473,11 +1637,16 @@ static void update_hw_active(struct kbase_queue *queue, bool active) static void program_cs_extract_init(struct kbase_queue *queue) { - u64 *input_addr = (u64 *)queue->user_io_addr; - u64 *output_addr = (u64 *)(queue->user_io_addr + PAGE_SIZE); + u64 *input_addr = queue->user_io_addr; + u64 *output_addr = queue->user_io_addr + PAGE_SIZE / sizeof(u64); - input_addr[CS_EXTRACT_INIT_LO / sizeof(u64)] = - output_addr[CS_EXTRACT_LO / sizeof(u64)]; + /* + * These 64-bit reads and writes will be atomic on a 64-bit kernel but may + * not be atomic on 32-bit kernels. Support for 32-bit kernels is limited to + * build-only. + */ + input_addr[CS_EXTRACT_INIT_LO / sizeof(*input_addr)] = + output_addr[CS_EXTRACT_LO / sizeof(*output_addr)]; } static void program_cs_trace_cfg(struct kbase_csf_cmd_stream_info *stream, @@ -1549,11 +1718,13 @@ static void program_cs(struct kbase_device *kbdev, WARN_ON(csi_index >= ginfo->stream_num)) return; - assign_user_doorbell_to_queue(kbdev, queue); - if (queue->doorbell_nr == KBASEP_USER_DB_NR_INVALID) - return; + if (queue->enabled) { + assign_user_doorbell_to_queue(kbdev, queue); + if (queue->doorbell_nr == KBASEP_USER_DB_NR_INVALID) + return; - WARN_ON(queue->doorbell_nr != queue->group->doorbell_nr); + WARN_ON(queue->doorbell_nr != queue->group->doorbell_nr); + } if (queue->enabled && queue_group_suspended_locked(group)) program_cs_extract_init(queue); @@ -1567,17 +1738,15 @@ static void program_cs(struct kbase_device *kbdev, kbase_csf_firmware_cs_input(stream, CS_SIZE, queue->size); - user_input = (queue->reg->start_pfn << PAGE_SHIFT); - kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_LO, - user_input & 0xFFFFFFFF); - kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_HI, - user_input >> 32); + user_input = queue->user_io_gpu_va; + WARN_ONCE(!user_input && queue->enabled, "Enabled queue should have a valid gpu_va"); + + kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_LO, user_input & 0xFFFFFFFF); + kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_HI, user_input >> 32); - user_output = ((queue->reg->start_pfn + 1) << PAGE_SHIFT); - kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_LO, - user_output & 0xFFFFFFFF); - kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_HI, - user_output >> 32); + user_output = user_input + PAGE_SIZE; + kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_LO, user_output & 0xFFFFFFFF); + kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_HI, user_output >> 32); kbase_csf_firmware_cs_input(stream, CS_CONFIG, (queue->doorbell_nr << 8) | (queue->priority & 0xF)); @@ -1608,8 +1777,10 @@ static void program_cs(struct kbase_device *kbdev, * or protected mode switch. */ kbase_csf_firmware_cs_input_mask(stream, CS_REQ, - CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK, - CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK); + CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK | + CS_REQ_IDLE_SHARED_SB_DEC_MASK, + CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK | + CS_REQ_IDLE_SHARED_SB_DEC_MASK); /* Set state to START/STOP */ kbase_csf_firmware_cs_input_mask(stream, CS_REQ, @@ -1624,6 +1795,20 @@ static void program_cs(struct kbase_device *kbdev, update_hw_active(queue, true); } +static int onslot_csg_add_new_queue(struct kbase_queue *queue) +{ + struct kbase_device *kbdev = queue->kctx->kbdev; + int err; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + err = kbase_csf_mcu_shared_add_queue(kbdev, queue); + if (!err) + program_cs(kbdev, queue, true); + + return err; +} + int kbase_csf_scheduler_queue_start(struct kbase_queue *queue) { struct kbase_queue_group *group = queue->group; @@ -1635,7 +1820,7 @@ int kbase_csf_scheduler_queue_start(struct kbase_queue *queue) kbase_reset_gpu_assert_prevented(kbdev); lockdep_assert_held(&queue->kctx->csf.lock); - if (WARN_ON(!group || queue->bind_state != KBASE_CSF_QUEUE_BOUND)) + if (WARN_ON_ONCE(!group || queue->bind_state != KBASE_CSF_QUEUE_BOUND)) return -EINVAL; mutex_lock(&kbdev->csf.scheduler.lock); @@ -1679,8 +1864,28 @@ int kbase_csf_scheduler_queue_start(struct kbase_queue *queue) * user door-bell on such a case. */ kbase_csf_ring_cs_user_doorbell(kbdev, queue); - } else - program_cs(kbdev, queue, true); + } else { + err = onslot_csg_add_new_queue(queue); + /* For an on slot CSG, the only error in adding a new + * queue to run is that the scheduler could not map + * the required userio pages due to likely some resource + * issues. In such a case, and if the group is yet + * to enter its fatal error state, we return a -EBUSY + * to the submitter for another kick. The queue itself + * has yet to be programmed hence needs to remain its + * previous (disabled) state. If the error persists, + * the group will eventually reports a fatal error by + * the group's error reporting mechanism, when the MCU + * shared region map retry limit of the group is + * exceeded. For such a case, the expected error value + * is -EIO. + */ + if (unlikely(err)) { + queue->enabled = cs_enabled; + mutex_unlock(&kbdev->csf.scheduler.lock); + return (err != -EIO) ? -EBUSY : err; + } + } } queue_delayed_work(system_long_wq, &kbdev->csf.scheduler.ping_work, msecs_to_jiffies(kbase_get_timeout_ms( @@ -1821,6 +2026,7 @@ static void halt_csg_slot(struct kbase_queue_group *group, bool suspend) unsigned long flags; struct kbase_csf_cmd_stream_group_info *ginfo = &global_iface->groups[slot]; + u32 halt_cmd = suspend ? CSG_REQ_STATE_SUSPEND : CSG_REQ_STATE_TERMINATE; @@ -1838,8 +2044,8 @@ static void halt_csg_slot(struct kbase_queue_group *group, bool suspend) csg_slot[slot].trigger_jiffies = jiffies; KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOP_REQ, group, halt_cmd); - KBASE_TLSTREAM_TL_KBASE_DEVICE_HALT_CSG( - kbdev, kbdev->gpu_props.props.raw_props.gpu_id, slot); + KBASE_TLSTREAM_TL_KBASE_DEVICE_HALTING_CSG( + kbdev, kbdev->gpu_props.props.raw_props.gpu_id, slot, suspend); } } @@ -1891,9 +2097,12 @@ static bool evaluate_sync_update(struct kbase_queue *queue) struct kbase_vmap_struct *mapping; bool updated = false; u32 *sync_ptr; + u32 sync_wait_size; + u32 sync_wait_align_mask; u32 sync_wait_cond; u32 sync_current_val; struct kbase_device *kbdev; + bool sync_wait_align_valid = false; bool sync_wait_cond_valid = false; if (WARN_ON(!queue)) @@ -1903,6 +2112,16 @@ static bool evaluate_sync_update(struct kbase_queue *queue) lockdep_assert_held(&kbdev->csf.scheduler.lock); + sync_wait_size = CS_STATUS_WAIT_SYNC_WAIT_SIZE_GET(queue->status_wait); + sync_wait_align_mask = + (sync_wait_size == 0 ? BASEP_EVENT32_ALIGN_BYTES : BASEP_EVENT64_ALIGN_BYTES) - 1; + sync_wait_align_valid = ((uintptr_t)queue->sync_ptr & sync_wait_align_mask) == 0; + if (!sync_wait_align_valid) { + dev_dbg(queue->kctx->kbdev->dev, "sync memory VA 0x%016llX is misaligned", + queue->sync_ptr); + goto out; + } + sync_ptr = kbase_phy_alloc_mapping_get(queue->kctx, queue->sync_ptr, &mapping); @@ -1987,7 +2206,7 @@ bool save_slot_cs(struct kbase_csf_cmd_stream_group_info const *const ginfo, KBASE_KTRACE_ADD_CSF_GRP_Q(stream->kbdev, QUEUE_SYNC_UPDATE_WAIT_STATUS, queue->group, queue, status); - if (CS_STATUS_WAIT_SYNC_WAIT_GET(status)) { + if (CS_STATUS_WAIT_SYNC_WAIT_GET(status) || CS_STATUS_WAIT_SB_MASK_GET(status)) { queue->status_wait = status; queue->sync_ptr = kbase_csf_firmware_cs_output(stream, CS_STATUS_WAIT_SYNC_POINTER_LO); @@ -2003,7 +2222,8 @@ bool save_slot_cs(struct kbase_csf_cmd_stream_group_info const *const ginfo, kbase_csf_firmware_cs_output(stream, CS_STATUS_BLOCKED_REASON)); - if (!evaluate_sync_update(queue)) { + if ((queue->blocked_reason == CS_STATUS_BLOCKED_ON_SB_WAIT) || + !evaluate_sync_update(queue)) { is_waiting = true; } else { /* Sync object already got updated & met the condition @@ -2039,7 +2259,7 @@ static void schedule_in_cycle(struct kbase_queue_group *group, bool force) * of work needs to be enforced in situation such as entering into * protected mode). */ - if (likely(scheduler_timer_is_enabled_nolock(kbdev)) || force) { + if (likely(kbase_csf_scheduler_timer_is_enabled(kbdev)) || force) { dev_dbg(kbdev->dev, "Kicking async for group %d\n", group->handle); kbase_csf_scheduler_invoke_tock(kbdev); @@ -2122,13 +2342,12 @@ void insert_group_to_runnable(struct kbase_csf_scheduler *const scheduler, scheduler->total_runnable_grps++; - if (likely(scheduler_timer_is_enabled_nolock(kbdev)) && - (scheduler->total_runnable_grps == 1 || - scheduler->state == SCHED_SUSPENDED || + if (likely(kbase_csf_scheduler_timer_is_enabled(kbdev)) && + (scheduler->total_runnable_grps == 1 || scheduler->state == SCHED_SUSPENDED || scheduler->state == SCHED_SLEEPING)) { dev_dbg(kbdev->dev, "Kicking scheduler on first runnable group\n"); /* Fire a scheduling to start the time-slice */ - enqueue_tick_work(kbdev); + kbase_csf_scheduler_invoke_tick(kbdev); } else schedule_in_cycle(group, false); @@ -2138,6 +2357,17 @@ void insert_group_to_runnable(struct kbase_csf_scheduler *const scheduler, scheduler_wakeup(kbdev, false); } +static void cancel_tick_work(struct kbase_csf_scheduler *const scheduler) +{ + hrtimer_cancel(&scheduler->tick_timer); + atomic_set(&scheduler->pending_tick_work, false); +} + +static void cancel_tock_work(struct kbase_csf_scheduler *const scheduler) +{ + atomic_set(&scheduler->pending_tock_work, false); +} + static void remove_group_from_runnable(struct kbase_csf_scheduler *const scheduler, struct kbase_queue_group *group, @@ -2232,7 +2462,7 @@ void remove_group_from_runnable(struct kbase_csf_scheduler *const scheduler, scheduler->total_runnable_grps--; if (!scheduler->total_runnable_grps) { dev_dbg(kctx->kbdev->dev, "Scheduler idle has no runnable groups"); - cancel_tick_timer(kctx->kbdev); + cancel_tick_work(scheduler); WARN_ON(atomic_read(&scheduler->non_idle_offslot_grps)); if (scheduler->state != SCHED_SUSPENDED) enqueue_gpu_idle_work(scheduler); @@ -2297,7 +2527,7 @@ static void deschedule_idle_wait_group(struct kbase_csf_scheduler *scheduler, insert_group_to_idle_wait(group); } -static void update_offslot_non_idle_cnt_for_faulty_grp(struct kbase_queue_group *group) +static void update_offslot_non_idle_cnt(struct kbase_queue_group *group) { struct kbase_device *kbdev = group->kctx->kbdev; struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; @@ -2378,7 +2608,7 @@ static bool confirm_cmd_buf_empty(struct kbase_queue const *queue) u32 glb_version = iface->version; u64 const *input_addr = (u64 const *)queue->user_io_addr; - u64 const *output_addr = (u64 const *)(queue->user_io_addr + PAGE_SIZE); + u64 const *output_addr = (u64 const *)(queue->user_io_addr + PAGE_SIZE / sizeof(u64)); if (glb_version >= kbase_csf_interface_version(1, 0, 0)) { /* CS_STATUS_SCOREBOARD supported from CSF 1.0 */ @@ -2392,6 +2622,11 @@ static bool confirm_cmd_buf_empty(struct kbase_queue const *queue) CS_STATUS_SCOREBOARDS)); } + /* + * These 64-bit reads and writes will be atomic on a 64-bit kernel but may + * not be atomic on 32-bit kernels. Support for 32-bit kernels is limited to + * build-only. + */ cs_empty = (input_addr[CS_INSERT_LO / sizeof(u64)] == output_addr[CS_EXTRACT_LO / sizeof(u64)]); cs_idle = cs_empty && (!sb_status); @@ -2434,9 +2669,14 @@ static void save_csg_slot(struct kbase_queue_group *group) if (!queue || !queue->enabled) continue; - if (save_slot_cs(ginfo, queue)) - sync_wait = true; - else { + if (save_slot_cs(ginfo, queue)) { + /* sync_wait is only true if the queue is blocked on + * a CQS and not a scoreboard. + */ + if (queue->blocked_reason != + CS_STATUS_BLOCKED_ON_SB_WAIT) + sync_wait = true; + } else { /* Need to confirm if ringbuffer of the GPU * queue is empty or not. A race can arise * between the flush of GPU queue and suspend @@ -2490,7 +2730,7 @@ static bool cleanup_csg_slot(struct kbase_queue_group *group) s8 slot; struct kbase_csf_csg_slot *csg_slot; unsigned long flags; - u32 i; + u32 csg_req, csg_ack, i; bool as_fault = false; lockdep_assert_held(&kbdev->csf.scheduler.lock); @@ -2528,8 +2768,17 @@ static bool cleanup_csg_slot(struct kbase_queue_group *group) as_fault = true; spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags); +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + emit_gpu_metrics_to_frontend(kbdev); +#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */ + /* now marking the slot is vacant */ spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); + /* Process pending SYNC_UPDATE, if any */ + csg_req = kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ); + csg_ack = kbase_csf_firmware_csg_output(ginfo, CSG_ACK); + kbase_csf_handle_csg_sync_update(kbdev, ginfo, group, csg_req, csg_ack); + kbdev->csf.scheduler.csg_slots[slot].resident_group = NULL; clear_bit(slot, kbdev->csf.scheduler.csg_slots_idle_mask); KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group, @@ -2550,6 +2799,11 @@ static bool cleanup_csg_slot(struct kbase_queue_group *group) KBASE_TLSTREAM_TL_KBASE_DEVICE_DEPROGRAM_CSG(kbdev, kbdev->gpu_props.props.raw_props.gpu_id, slot); + /* Notify the group is off-slot and the csg_reg might be available for + * resue with other groups in a 'lazy unbinding' style. + */ + kbase_csf_mcu_shared_set_group_csg_reg_unused(kbdev, group); + return as_fault; } @@ -2586,10 +2840,10 @@ static void update_csg_slot_priority(struct kbase_queue_group *group, u8 prio) return; /* Read the csg_ep_cfg back for updating the priority field */ - ep_cfg = kbase_csf_firmware_csg_input_read(ginfo, CSG_EP_REQ); + ep_cfg = kbase_csf_firmware_csg_input_read(ginfo, CSG_EP_REQ_LO); prev_prio = CSG_EP_REQ_PRIORITY_GET(ep_cfg); ep_cfg = CSG_EP_REQ_PRIORITY_SET(ep_cfg, prio); - kbase_csf_firmware_csg_input(ginfo, CSG_EP_REQ, ep_cfg); + kbase_csf_firmware_csg_input(ginfo, CSG_EP_REQ_LO, ep_cfg); spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); csg_req = kbase_csf_firmware_csg_output(ginfo, CSG_ACK); @@ -2623,18 +2877,17 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot, const u64 compute_mask = shader_core_mask & group->compute_mask; const u64 fragment_mask = shader_core_mask & group->fragment_mask; const u64 tiler_mask = tiler_core_mask & group->tiler_mask; - const u8 num_cores = kbdev->gpu_props.num_cores; - const u8 compute_max = min(num_cores, group->compute_max); - const u8 fragment_max = min(num_cores, group->fragment_max); + const u8 compute_max = min(kbdev->gpu_props.num_cores, group->compute_max); + const u8 fragment_max = min(kbdev->gpu_props.num_cores, group->fragment_max); const u8 tiler_max = min(CSG_TILER_MAX, group->tiler_max); struct kbase_csf_cmd_stream_group_info *ginfo; - u32 ep_cfg = 0; + u64 ep_cfg = 0; u32 csg_req; u32 state; int i; unsigned long flags; - const u64 normal_suspend_buf = - group->normal_suspend_buf.reg->start_pfn << PAGE_SHIFT; + u64 normal_suspend_buf; + u64 protm_suspend_buf; struct kbase_csf_csg_slot *csg_slot = &kbdev->csf.scheduler.csg_slots[slot]; @@ -2646,6 +2899,19 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot, WARN_ON(atomic_read(&csg_slot->state) != CSG_SLOT_READY); + if (unlikely(kbase_csf_mcu_shared_group_bind_csg_reg(kbdev, group))) { + dev_warn(kbdev->dev, + "Couldn't bind MCU shared csg_reg for group %d of context %d_%d, slot=%u", + group->handle, group->kctx->tgid, kctx->id, slot); + kbase_csf_mcu_shared_set_group_csg_reg_unused(kbdev, group); + return; + } + + /* The suspend buf has already been mapped through binding to csg_reg */ + normal_suspend_buf = group->normal_suspend_buf.gpu_va; + protm_suspend_buf = group->protected_suspend_buf.gpu_va; + WARN_ONCE(!normal_suspend_buf, "Normal suspend buffer not mapped"); + ginfo = &global_iface->groups[slot]; /* Pick an available address space for this context */ @@ -2658,6 +2924,7 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot, if (kctx->as_nr == KBASEP_AS_NR_INVALID) { dev_warn(kbdev->dev, "Could not get a valid AS for group %d of context %d_%d on slot %d\n", group->handle, kctx->tgid, kctx->id, slot); + kbase_csf_mcu_shared_set_group_csg_reg_unused(kbdev, group); return; } @@ -2687,6 +2954,7 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot, fragment_mask & U32_MAX); kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_FRAGMENT_HI, fragment_mask >> 32); + kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_OTHER, tiler_mask & U32_MAX); @@ -2698,7 +2966,7 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot, ep_cfg = CSG_EP_REQ_FRAGMENT_EP_SET(ep_cfg, fragment_max); ep_cfg = CSG_EP_REQ_TILER_EP_SET(ep_cfg, tiler_max); ep_cfg = CSG_EP_REQ_PRIORITY_SET(ep_cfg, prio); - kbase_csf_firmware_csg_input(ginfo, CSG_EP_REQ, ep_cfg); + kbase_csf_firmware_csg_input(ginfo, CSG_EP_REQ_LO, ep_cfg & U32_MAX); /* Program the address space number assigned to the context */ kbase_csf_firmware_csg_input(ginfo, CSG_CONFIG, kctx->as_nr); @@ -2708,15 +2976,15 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot, kbase_csf_firmware_csg_input(ginfo, CSG_SUSPEND_BUF_HI, normal_suspend_buf >> 32); - if (group->protected_suspend_buf.reg) { - const u64 protm_suspend_buf = - group->protected_suspend_buf.reg->start_pfn << - PAGE_SHIFT; - kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_LO, - protm_suspend_buf & U32_MAX); - kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_HI, - protm_suspend_buf >> 32); - } + /* Note, we program the P-mode buffer pointer here, but actual runtime + * enter into pmode execution is controlled by the P-mode phy pages are + * allocated and mapped with the bound csg_reg, which has a specific flag + * for indicating this P-mode runnable condition before a group is + * granted its p-mode section entry. Without a P-mode entry, the buffer + * pointed is not going to be accessed at all. + */ + kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_LO, protm_suspend_buf & U32_MAX); + kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_HI, protm_suspend_buf >> 32); if (group->dvs_buf) { kbase_csf_firmware_csg_input(ginfo, CSG_DVS_BUF_LO, @@ -2769,6 +3037,9 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot, /* Programming a slot consumes a group from scanout */ update_offslot_non_idle_cnt_for_onslot_grp(group); + + /* Notify the group's bound csg_reg is now in active use */ + kbase_csf_mcu_shared_set_group_csg_reg_active(kbdev, group); } static void remove_scheduled_group(struct kbase_device *kbdev, @@ -2789,7 +3060,7 @@ static void remove_scheduled_group(struct kbase_device *kbdev, } static void sched_evict_group(struct kbase_queue_group *group, bool fault, - bool update_non_idle_offslot_grps_cnt) + bool update_non_idle_offslot_grps_cnt_from_run_state) { struct kbase_context *kctx = group->kctx; struct kbase_device *kbdev = kctx->kbdev; @@ -2800,7 +3071,7 @@ static void sched_evict_group(struct kbase_queue_group *group, bool fault, if (queue_group_scheduled_locked(group)) { u32 i; - if (update_non_idle_offslot_grps_cnt && + if (update_non_idle_offslot_grps_cnt_from_run_state && (group->run_state == KBASE_CSF_GROUP_SUSPENDED || group->run_state == KBASE_CSF_GROUP_RUNNABLE)) { int new_val = atomic_dec_return( @@ -2815,8 +3086,11 @@ static void sched_evict_group(struct kbase_queue_group *group, bool fault, } if (group->prepared_seq_num != - KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID) + KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID) { + if (!update_non_idle_offslot_grps_cnt_from_run_state) + update_offslot_non_idle_cnt(group); remove_scheduled_group(kbdev, group); + } if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC) remove_group_from_idle_wait(group); @@ -2843,6 +3117,9 @@ static void sched_evict_group(struct kbase_queue_group *group, bool fault, } kbase_csf_tiler_heap_reclaim_sched_notify_grp_evict(group); + + /* Clear all the bound shared regions and unmap any in-place MMU maps */ + kbase_csf_mcu_shared_clear_evicted_group_csg_reg(kbdev, group); } static int term_group_sync(struct kbase_queue_group *group) @@ -3222,8 +3499,7 @@ static void program_group_on_vacant_csg_slot(struct kbase_device *kbdev, scheduler->remaining_tick_slots--; } } else { - update_offslot_non_idle_cnt_for_faulty_grp( - group); + update_offslot_non_idle_cnt(group); remove_scheduled_group(kbdev, group); } } @@ -3315,7 +3591,6 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev) DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS); DECLARE_BITMAP(evicted_mask, MAX_SUPPORTED_CSGS) = {0}; bool suspend_wait_failed = false; - long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); lockdep_assert_held(&kbdev->csf.scheduler.lock); @@ -3327,6 +3602,7 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev) while (!bitmap_empty(slot_mask, MAX_SUPPORTED_CSGS)) { DECLARE_BITMAP(changed, MAX_SUPPORTED_CSGS); + long remaining = kbase_csf_timeout_in_jiffies(kbase_get_timeout_ms(kbdev, CSF_CSG_SUSPEND_TIMEOUT)); bitmap_copy(changed, slot_mask, MAX_SUPPORTED_CSGS); @@ -3354,6 +3630,12 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev) * group is not terminated during * the sleep. */ + + /* Only emit suspend, if there was no AS fault */ + if (kctx_as_enabled(group->kctx) && !group->faulted) + KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG( + kbdev, + kbdev->gpu_props.props.raw_props.gpu_id, i); save_csg_slot(group); as_fault = cleanup_csg_slot(group); /* If AS fault detected, evict it */ @@ -3413,8 +3695,6 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev) */ clear_bit(i, slot_mask); set_bit(i, scheduler->csgs_events_enable_mask); - update_offslot_non_idle_cnt_for_onslot_grp( - group); } suspend_wait_failed = true; @@ -3836,16 +4116,13 @@ static void protm_enter_set_next_pending_seq(struct kbase_device *const kbdev) struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; u32 num_groups = kbdev->csf.global_iface.group_num; u32 num_csis = kbdev->csf.global_iface.groups[0].stream_num; - DECLARE_BITMAP(active_csgs, MAX_SUPPORTED_CSGS) = { 0 }; u32 i; kbase_csf_scheduler_spin_lock_assert_held(kbdev); - bitmap_xor(active_csgs, scheduler->csg_slots_idle_mask, scheduler->csg_inuse_bitmap, - num_groups); /* Reset the tick's pending protm seq number to invalid initially */ scheduler->tick_protm_pending_seq = KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID; - for_each_set_bit(i, active_csgs, num_groups) { + for_each_set_bit(i, scheduler->csg_inuse_bitmap, num_groups) { struct kbase_queue_group *group = scheduler->csg_slots[i].resident_group; /* Set to the next pending protm group's scan_seq_number */ @@ -3874,11 +4151,16 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev, struct kbase_queue_group *const input_grp) { struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + struct kbase_protected_suspend_buffer *sbuf = &input_grp->protected_suspend_buf; unsigned long flags; bool protm_in_use; lockdep_assert_held(&scheduler->lock); + /* Return early if the physical pages have not been allocated yet */ + if (unlikely(!sbuf->pma)) + return; + /* This lock is taken to prevent the issuing of MMU command during the * transition to protected mode. This helps avoid the scenario where the * entry to protected mode happens with a memory region being locked and @@ -3937,6 +4219,15 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev, KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_PROTM_ENTER, input_grp, 0u); +#if IS_ENABLED(CONFIG_MALI_CORESIGHT) + spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); + + /* Coresight must be disabled before entering protected mode. */ + kbase_debug_coresight_csf_disable_pmode_enter(kbdev); + + spin_lock_irqsave(&scheduler->interrupt_lock, flags); +#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ + kbase_csf_enter_protected_mode(kbdev); /* Set the pending protm seq number to the next one */ protm_enter_set_next_pending_seq(kbdev); @@ -4049,8 +4340,7 @@ static void scheduler_apply(struct kbase_device *kbdev) if (!kctx_as_enabled(group->kctx) || group->faulted) { /* Drop the head group and continue */ - update_offslot_non_idle_cnt_for_faulty_grp( - group); + update_offslot_non_idle_cnt(group); remove_scheduled_group(kbdev, group); continue; } @@ -4073,8 +4363,9 @@ static void scheduler_apply(struct kbase_device *kbdev) program_suspending_csg_slots(kbdev); } -static void scheduler_ctx_scan_groups(struct kbase_device *kbdev, - struct kbase_context *kctx, int priority) +static void scheduler_ctx_scan_groups(struct kbase_device *kbdev, struct kbase_context *kctx, + int priority, struct list_head *privileged_groups, + struct list_head *active_groups) { struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; struct kbase_queue_group *group; @@ -4088,8 +4379,9 @@ static void scheduler_ctx_scan_groups(struct kbase_device *kbdev, if (!kctx_as_enabled(kctx)) return; - list_for_each_entry(group, &kctx->csf.sched.runnable_groups[priority], - link) { + list_for_each_entry(group, &kctx->csf.sched.runnable_groups[priority], link) { + bool protm_req; + if (WARN_ON(!list_empty(&group->link_to_schedule))) /* This would be a bug */ list_del_init(&group->link_to_schedule); @@ -4100,33 +4392,30 @@ static void scheduler_ctx_scan_groups(struct kbase_device *kbdev, /* Set the scanout sequence number, starting from 0 */ group->scan_seq_num = scheduler->csg_scan_count_for_tick++; + protm_req = !bitmap_empty(group->protm_pending_bitmap, + kbdev->csf.global_iface.groups[0].stream_num); + if (scheduler->tick_protm_pending_seq == - KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID) { - if (!bitmap_empty(group->protm_pending_bitmap, - kbdev->csf.global_iface.groups[0].stream_num)) - scheduler->tick_protm_pending_seq = - group->scan_seq_num; + KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID) { + if (protm_req) + scheduler->tick_protm_pending_seq = group->scan_seq_num; } - if (queue_group_idle_locked(group)) { + if (protm_req && on_slot_group_idle_locked(group)) + update_idle_protm_group_state_to_runnable(group); + else if (queue_group_idle_locked(group)) { if (can_schedule_idle_group(group)) list_add_tail(&group->link_to_schedule, &scheduler->idle_groups_to_schedule); continue; } - if (!scheduler->ngrp_to_schedule) { - /* keep the top csg's origin */ - scheduler->top_ctx = kctx; - scheduler->top_grp = group; + if (protm_req && (group->priority == KBASE_QUEUE_GROUP_PRIORITY_REALTIME)) { + list_add_tail(&group->link_to_schedule, privileged_groups); + continue; } - list_add_tail(&group->link_to_schedule, - &scheduler->groups_to_schedule); - group->prepared_seq_num = scheduler->ngrp_to_schedule++; - - kctx->csf.sched.ngrp_to_schedule++; - count_active_address_space(kbdev, kctx); + list_add_tail(&group->link_to_schedule, active_groups); } } @@ -4329,6 +4618,8 @@ static void scheduler_update_idle_slots_status(struct kbase_device *kbdev, set_bit(i, csg_bitmap); } else { group->run_state = KBASE_CSF_GROUP_RUNNABLE; + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group, + group->run_state); } } @@ -4450,18 +4741,16 @@ static void scheduler_handle_idle_slots(struct kbase_device *kbdev) spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); } -static void scheduler_scan_idle_groups(struct kbase_device *kbdev) +static void scheduler_scan_group_list(struct kbase_device *kbdev, struct list_head *groups) { struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; struct kbase_queue_group *group, *n; - list_for_each_entry_safe(group, n, &scheduler->idle_groups_to_schedule, - link_to_schedule) { - WARN_ON(!can_schedule_idle_group(group)); - + list_for_each_entry_safe(group, n, groups, link_to_schedule) { if (!scheduler->ngrp_to_schedule) { /* keep the top csg's origin */ scheduler->top_ctx = group->kctx; + /* keep the top csg''s origin */ scheduler->top_grp = group; } @@ -4602,7 +4891,12 @@ static bool all_on_slot_groups_remained_idle(struct kbase_device *kbdev) if (!queue || !queue->user_io_addr) continue; - output_addr = (u64 const *)(queue->user_io_addr + PAGE_SIZE); + output_addr = (u64 const *)(queue->user_io_addr + PAGE_SIZE / sizeof(u64)); + /* + * These 64-bit reads and writes will be atomic on a 64-bit kernel + * but may not be atomic on 32-bit kernels. Support for 32-bit + * kernels is limited to build-only. + */ cur_extract_ofs = output_addr[CS_EXTRACT_LO / sizeof(u64)]; if (cur_extract_ofs != queue->extract_ofs) { /* More work has been executed since the idle @@ -4691,10 +4985,13 @@ static void scheduler_sleep_on_idle(struct kbase_device *kbdev) dev_dbg(kbdev->dev, "Scheduler to be put to sleep on GPU becoming idle"); - cancel_tick_timer(kbdev); + cancel_tick_work(scheduler); scheduler_pm_idle_before_sleep(kbdev); scheduler->state = SCHED_SLEEPING; KBASE_KTRACE_ADD(kbdev, SCHED_SLEEPING, NULL, scheduler->state); +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + emit_gpu_metrics_to_frontend(kbdev); +#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */ } #endif @@ -4712,6 +5009,7 @@ static void scheduler_sleep_on_idle(struct kbase_device *kbdev) */ static bool scheduler_suspend_on_idle(struct kbase_device *kbdev) { + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; int ret = suspend_active_groups_on_powerdown(kbdev, false); if (ret) { @@ -4719,13 +5017,13 @@ static bool scheduler_suspend_on_idle(struct kbase_device *kbdev) atomic_read( &kbdev->csf.scheduler.non_idle_offslot_grps)); /* Bring forward the next tick */ - kbase_csf_scheduler_tick_advance(kbdev); + kbase_csf_scheduler_invoke_tick(kbdev); return false; } dev_dbg(kbdev->dev, "Scheduler to be suspended on GPU becoming idle"); scheduler_suspend(kbdev); - cancel_tick_timer(kbdev); + cancel_tick_work(scheduler); return true; } @@ -4785,6 +5083,7 @@ static void gpu_idle_worker(struct work_struct *work) static int scheduler_prepare(struct kbase_device *kbdev) { struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + struct list_head privileged_groups, active_groups; unsigned long flags; int i; @@ -4810,6 +5109,8 @@ static int scheduler_prepare(struct kbase_device *kbdev) scheduler->num_active_address_spaces = 0; scheduler->num_csg_slots_for_tick = 0; bitmap_zero(scheduler->csg_slots_prio_update, MAX_SUPPORTED_CSGS); + INIT_LIST_HEAD(&privileged_groups); + INIT_LIST_HEAD(&active_groups); spin_lock_irqsave(&scheduler->interrupt_lock, flags); scheduler->tick_protm_pending_seq = @@ -4819,10 +5120,17 @@ static int scheduler_prepare(struct kbase_device *kbdev) struct kbase_context *kctx; list_for_each_entry(kctx, &scheduler->runnable_kctxs, csf.link) - scheduler_ctx_scan_groups(kbdev, kctx, i); + scheduler_ctx_scan_groups(kbdev, kctx, i, &privileged_groups, + &active_groups); } spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); + /* Adds privileged (RT + p.mode) groups to the scanout list */ + scheduler_scan_group_list(kbdev, &privileged_groups); + + /* Adds remainder of active groups to the scanout list */ + scheduler_scan_group_list(kbdev, &active_groups); + /* Update this tick's non-idle groups */ scheduler->non_idle_scanout_grps = scheduler->ngrp_to_schedule; @@ -4837,7 +5145,7 @@ static int scheduler_prepare(struct kbase_device *kbdev) scheduler->non_idle_scanout_grps); /* Adds those idle but runnable groups to the scanout list */ - scheduler_scan_idle_groups(kbdev); + scheduler_scan_group_list(kbdev, &scheduler->idle_groups_to_schedule); WARN_ON(scheduler->csg_scan_count_for_tick < scheduler->ngrp_to_schedule); @@ -4939,11 +5247,9 @@ static int prepare_fast_local_tock(struct kbase_device *kbdev) return bitmap_weight(csg_bitmap, num_groups); } -static int wait_csg_slots_suspend(struct kbase_device *kbdev, unsigned long *slot_mask, - unsigned int timeout_ms) +static int wait_csg_slots_suspend(struct kbase_device *kbdev, unsigned long *slot_mask) { struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; - long remaining = kbase_csf_timeout_in_jiffies(timeout_ms); u32 num_groups = kbdev->csf.global_iface.group_num; int err = 0; DECLARE_BITMAP(slot_mask_local, MAX_SUPPORTED_CSGS); @@ -4952,11 +5258,11 @@ static int wait_csg_slots_suspend(struct kbase_device *kbdev, unsigned long *slo bitmap_copy(slot_mask_local, slot_mask, MAX_SUPPORTED_CSGS); - while (!bitmap_empty(slot_mask_local, MAX_SUPPORTED_CSGS) && remaining) { + while (!bitmap_empty(slot_mask_local, MAX_SUPPORTED_CSGS)) { + long remaining = kbase_csf_timeout_in_jiffies(kbase_get_timeout_ms(kbdev, CSF_CSG_SUSPEND_TIMEOUT)); DECLARE_BITMAP(changed, MAX_SUPPORTED_CSGS); bitmap_copy(changed, slot_mask_local, MAX_SUPPORTED_CSGS); - remaining = wait_event_timeout( kbdev->csf.event_wait, slots_state_changed(kbdev, changed, csg_slot_stopped_locked), remaining); @@ -4979,9 +5285,17 @@ static int wait_csg_slots_suspend(struct kbase_device *kbdev, unsigned long *slo * group is not terminated during * the sleep. */ + + /* Only emit suspend, if there was no AS fault */ + if (kctx_as_enabled(group->kctx) && !group->faulted) + KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG( + kbdev, + kbdev->gpu_props.props.raw_props.gpu_id, i); + save_csg_slot(group); - if (cleanup_csg_slot(group)) + if (cleanup_csg_slot(group)) { sched_evict_group(group, true, true); + } } } } else { @@ -4992,8 +5306,8 @@ static int wait_csg_slots_suspend(struct kbase_device *kbdev, unsigned long *slo slot_mask_local[0]); /* Return the bitmask of the timed out slots to the caller */ bitmap_copy(slot_mask, slot_mask_local, MAX_SUPPORTED_CSGS); - err = -ETIMEDOUT; + break; } } @@ -5031,8 +5345,13 @@ static void evict_lru_or_blocked_csg(struct kbase_device *kbdev) if (all_addr_spaces_used) { for (i = 0; i != total_csg_slots; ++i) { - if (scheduler->csg_slots[i].resident_group != NULL) + if (scheduler->csg_slots[i].resident_group != NULL) { + if (WARN_ON(scheduler->csg_slots[i].resident_group->kctx->as_nr < + 0)) + continue; + as_usage[scheduler->csg_slots[i].resident_group->kctx->as_nr]++; + } } } @@ -5050,9 +5369,12 @@ static void evict_lru_or_blocked_csg(struct kbase_device *kbdev) * idle. */ if ((group->run_state == KBASE_CSF_GROUP_IDLE) && - (group->priority != BASE_QUEUE_GROUP_PRIORITY_REALTIME) && + (group->priority != KBASE_QUEUE_GROUP_PRIORITY_REALTIME) && ((lru_idle_group == NULL) || (lru_idle_group->prepared_seq_num < group->prepared_seq_num))) { + if (WARN_ON(group->kctx->as_nr < 0)) + continue; + /* If all address spaces are used, we need to ensure the group does not * share the AS with other active CSGs. Or CSG would be freed without AS * and this optimization would not work. @@ -5069,7 +5391,7 @@ static void evict_lru_or_blocked_csg(struct kbase_device *kbdev) lru_idle_group->handle, lru_idle_group->kctx->tgid, lru_idle_group->kctx->id, lru_idle_group->csg_nr); suspend_queue_group(lru_idle_group); - if (wait_csg_slots_suspend(kbdev, &slot_mask, kbdev->csf.fw_timeout_ms)) { + if (wait_csg_slots_suspend(kbdev, &slot_mask)) { enum dumpfault_error_type error_type = DF_CSG_SUSPEND_TIMEOUT; dev_warn( @@ -5165,16 +5487,12 @@ redo_local_tock: * queue jobs. */ if (protm_grp && scheduler->top_grp == protm_grp) { - int new_val; - dev_dbg(kbdev->dev, "Scheduler keep protm exec: group-%d", protm_grp->handle); - new_val = atomic_dec_return(&scheduler->non_idle_offslot_grps); - KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC, protm_grp, - new_val); - spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); + update_offslot_non_idle_cnt_for_onslot_grp(protm_grp); + remove_scheduled_group(kbdev, protm_grp); scheduler_check_pmode_progress(kbdev); } else if (scheduler->top_grp) { if (protm_grp) @@ -5288,10 +5606,8 @@ static bool can_skip_scheduling(struct kbase_device *kbdev) return false; } -static void schedule_on_tock(struct work_struct *work) +static void schedule_on_tock(struct kbase_device *kbdev) { - struct kbase_device *kbdev = - container_of(work, struct kbase_device, csf.scheduler.tock_work.work); struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; int err; @@ -5326,12 +5642,12 @@ static void schedule_on_tock(struct work_struct *work) KBASE_KTRACE_ADD(kbdev, SCHED_INACTIVE, NULL, scheduler->state); if (!scheduler->total_runnable_grps) enqueue_gpu_idle_work(scheduler); +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + emit_gpu_metrics_to_frontend(kbdev); +#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */ mutex_unlock(&scheduler->lock); kbase_reset_gpu_allow(kbdev); - dev_dbg(kbdev->dev, - "Waking up for event after schedule-on-tock completes."); - wake_up_all(&kbdev->csf.event_wait); KBASE_KTRACE_ADD(kbdev, SCHEDULER_TOCK_END, NULL, 0u); return; @@ -5340,10 +5656,8 @@ exit_no_schedule_unlock: kbase_reset_gpu_allow(kbdev); } -static void schedule_on_tick(struct work_struct *work) +static void schedule_on_tick(struct kbase_device *kbdev) { - struct kbase_device *kbdev = - container_of(work, struct kbase_device, csf.scheduler.tick_work); struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; int err = kbase_reset_gpu_try_prevent(kbdev); @@ -5356,7 +5670,6 @@ static void schedule_on_tick(struct work_struct *work) kbase_debug_csf_fault_wait_completion(kbdev); mutex_lock(&scheduler->lock); - WARN_ON(scheduler->tick_timer_active); if (can_skip_scheduling(kbdev)) goto exit_no_schedule_unlock; @@ -5371,23 +5684,25 @@ static void schedule_on_tick(struct work_struct *work) scheduler->last_schedule = jiffies; /* Kicking next scheduling if needed */ - if (likely(scheduler_timer_is_enabled_nolock(kbdev)) && - (scheduler->total_runnable_grps > 0)) { - start_tick_timer(kbdev); - dev_dbg(kbdev->dev, - "scheduling for next tick, num_runnable_groups:%u\n", + if (likely(kbase_csf_scheduler_timer_is_enabled(kbdev)) && + (scheduler->total_runnable_grps > 0)) { + hrtimer_start(&scheduler->tick_timer, + HR_TIMER_DELAY_MSEC(scheduler->csg_scheduling_period_ms), + HRTIMER_MODE_REL); + dev_dbg(kbdev->dev, "scheduling for next tick, num_runnable_groups:%u\n", scheduler->total_runnable_grps); } else if (!scheduler->total_runnable_grps) { enqueue_gpu_idle_work(scheduler); } scheduler->state = SCHED_INACTIVE; +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + emit_gpu_metrics_to_frontend(kbdev); +#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */ mutex_unlock(&scheduler->lock); KBASE_KTRACE_ADD(kbdev, SCHED_INACTIVE, NULL, scheduler->state); kbase_reset_gpu_allow(kbdev); - dev_dbg(kbdev->dev, "Waking up for event after schedule-on-tick completes."); - wake_up_all(&kbdev->csf.event_wait); KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_END, NULL, scheduler->total_runnable_grps); return; @@ -5417,7 +5732,7 @@ static int suspend_active_queue_groups(struct kbase_device *kbdev, } } - ret = wait_csg_slots_suspend(kbdev, slot_mask, kbdev->reset_timeout_ms); + ret = wait_csg_slots_suspend(kbdev, slot_mask); return ret; } @@ -5452,11 +5767,10 @@ static int suspend_active_queue_groups_on_reset(struct kbase_device *kbdev) * overflow. */ kbase_gpu_start_cache_clean(kbdev, GPU_COMMAND_CACHE_CLN_INV_L2_LSC); - ret2 = kbase_gpu_wait_cache_clean_timeout(kbdev, - kbdev->reset_timeout_ms); + ret2 = kbase_gpu_wait_cache_clean_timeout(kbdev, kbdev->mmu_or_gpu_cache_op_wait_time_ms); if (ret2) { - dev_warn(kbdev->dev, "[%llu] Timeout waiting for cache clean to complete before reset", - kbase_backend_get_cycle_cnt(kbdev)); + dev_err(kbdev->dev, "[%llu] Timeout waiting for CACHE_CLN_INV_L2_LSC", + kbase_backend_get_cycle_cnt(kbdev)); if (!ret) ret = ret2; } @@ -5574,17 +5888,6 @@ unlock: return suspend_on_slot_groups; } -static void cancel_tick_work(struct kbase_csf_scheduler *const scheduler) -{ - cancel_work_sync(&scheduler->tick_work); -} - -static void cancel_tock_work(struct kbase_csf_scheduler *const scheduler) -{ - atomic_set(&scheduler->pending_tock_work, false); - cancel_delayed_work_sync(&scheduler->tock_work); -} - static void scheduler_inner_reset(struct kbase_device *kbdev) { u32 const num_groups = kbdev->csf.global_iface.group_num; @@ -5595,7 +5898,6 @@ static void scheduler_inner_reset(struct kbase_device *kbdev) /* Cancel any potential queued delayed work(s) */ cancel_work_sync(&kbdev->csf.scheduler.gpu_idle_work); - cancel_tick_timer(kbdev); cancel_tick_work(scheduler); cancel_tock_work(scheduler); cancel_delayed_work_sync(&scheduler->ping_work); @@ -5794,8 +6096,7 @@ int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group, if (!WARN_ON(scheduler->state == SCHED_SUSPENDED)) suspend_queue_group(group); - err = wait_csg_slots_suspend(kbdev, slot_mask, - kbdev->csf.fw_timeout_ms); + err = wait_csg_slots_suspend(kbdev, slot_mask); if (err) { dev_warn(kbdev->dev, "[%llu] Timeout waiting for the group %d to suspend on slot %d", kbase_backend_get_cycle_cnt(kbdev), @@ -5835,7 +6136,7 @@ int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group, target_page_nr < sus_buf->nr_pages; i++) { struct page *pg = as_page(group->normal_suspend_buf.phy[i]); - void *sus_page = kmap(pg); + void *sus_page = kbase_kmap(pg); if (sus_page) { kbase_sync_single_for_cpu(kbdev, @@ -5846,7 +6147,7 @@ int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group, sus_buf->pages, sus_page, &to_copy, sus_buf->nr_pages, &target_page_nr, offset); - kunmap(pg); + kbase_kunmap(pg, sus_page); if (err) break; } else { @@ -5962,12 +6263,21 @@ static struct kbase_queue_group *scheduler_get_protm_enter_async_group( spin_lock_irqsave(&scheduler->interrupt_lock, flags); - if (kbase_csf_scheduler_protected_mode_in_use(kbdev) || - bitmap_empty(pending, ginfo->stream_num)) + if (bitmap_empty(pending, ginfo->stream_num)) { + dev_dbg(kbdev->dev, + "Pmode requested for group %d of ctx %d_%d with no pending queues", + input_grp->handle, input_grp->kctx->tgid, input_grp->kctx->id); input_grp = NULL; + } else if (kbase_csf_scheduler_protected_mode_in_use(kbdev)) { + kbase_csf_scheduler_invoke_tock(kbdev); + input_grp = NULL; + } spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); } else { + if (group && (group->priority == KBASE_QUEUE_GROUP_PRIORITY_REALTIME)) + kbase_csf_scheduler_invoke_tock(kbdev); + input_grp = NULL; } @@ -5988,8 +6298,8 @@ void kbase_csf_scheduler_group_protm_enter(struct kbase_queue_group *group) mutex_lock(&scheduler->lock); - if (group->run_state == KBASE_CSF_GROUP_IDLE) - group->run_state = KBASE_CSF_GROUP_RUNNABLE; + if (on_slot_group_idle_locked(group)) + update_idle_protm_group_state_to_runnable(group); /* Check if the group is now eligible for execution in protected mode. */ if (scheduler_get_protm_enter_async_group(kbdev, group)) scheduler_group_check_protm_enter(kbdev, group); @@ -6256,6 +6566,13 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx) { int priority; int err; + struct kbase_device *kbdev = kctx->kbdev; + +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + err = gpu_metrics_ctx_init(kctx); + if (err) + return err; +#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */ kbase_ctx_sched_init_ctx(kctx); @@ -6273,8 +6590,7 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx) alloc_ordered_workqueue("mali_kbase_csf_sync_update_wq", WQ_HIGHPRI); if (!kctx->csf.sched.sync_update_wq) { - dev_err(kctx->kbdev->dev, - "Failed to initialize scheduler context workqueue"); + dev_err(kbdev->dev, "Failed to initialize scheduler context workqueue"); err = -ENOMEM; goto alloc_wq_failed; } @@ -6287,8 +6603,7 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx) err = kbase_csf_event_wait_add(kctx, check_group_sync_update_cb, kctx); if (err) { - dev_err(kctx->kbdev->dev, - "Failed to register a sync update callback"); + dev_err(kbdev->dev, "Failed to register a sync update callback"); goto event_wait_add_failed; } @@ -6298,6 +6613,9 @@ event_wait_add_failed: destroy_workqueue(kctx->csf.sched.sync_update_wq); alloc_wq_failed: kbase_ctx_sched_remove_ctx(kctx); +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + gpu_metrics_ctx_term(kctx); +#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */ return err; } @@ -6308,6 +6626,74 @@ void kbase_csf_scheduler_context_term(struct kbase_context *kctx) destroy_workqueue(kctx->csf.sched.sync_update_wq); kbase_ctx_sched_remove_ctx(kctx); +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + gpu_metrics_ctx_term(kctx); +#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */ +} + +static int kbase_csf_scheduler_kthread(void *data) +{ + struct kbase_device *const kbdev = data; + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + + while (scheduler->kthread_running) { + struct kbase_queue *queue; + + if (wait_for_completion_interruptible(&scheduler->kthread_signal) != 0) + continue; + reinit_completion(&scheduler->kthread_signal); + + /* Iterate through queues with pending kicks */ + do { + u8 prio; + + spin_lock(&kbdev->csf.pending_gpuq_kicks_lock); + queue = NULL; + for (prio = 0; prio != KBASE_QUEUE_GROUP_PRIORITY_COUNT; ++prio) { + if (!list_empty(&kbdev->csf.pending_gpuq_kicks[prio])) { + queue = list_first_entry( + &kbdev->csf.pending_gpuq_kicks[prio], + struct kbase_queue, pending_kick_link); + list_del_init(&queue->pending_kick_link); + break; + } + } + spin_unlock(&kbdev->csf.pending_gpuq_kicks_lock); + + if (queue != NULL) { + WARN_ONCE( + prio != queue->group_priority, + "Queue %pK has priority %hhu but instead its kick was handled at priority %hhu", + (void *)queue, queue->group_priority, prio); + + kbase_csf_process_queue_kick(queue); + + /* Perform a scheduling tock for high-priority queue groups if + * required. + */ + BUILD_BUG_ON(KBASE_QUEUE_GROUP_PRIORITY_REALTIME != 0); + BUILD_BUG_ON(KBASE_QUEUE_GROUP_PRIORITY_HIGH != 1); + if ((prio <= KBASE_QUEUE_GROUP_PRIORITY_HIGH) && + atomic_read(&scheduler->pending_tock_work)) + schedule_on_tock(kbdev); + } + } while (queue != NULL); + + /* Check if we need to perform a scheduling tick/tock. A tick + * event shall override a tock event but not vice-versa. + */ + if (atomic_cmpxchg(&scheduler->pending_tick_work, true, false) == true) { + atomic_set(&scheduler->pending_tock_work, false); + schedule_on_tick(kbdev); + } else if (atomic_read(&scheduler->pending_tock_work)) { + schedule_on_tock(kbdev); + } + + dev_dbg(kbdev->dev, "Waking up for event after a scheduling iteration."); + wake_up_all(&kbdev->csf.event_wait); + } + + return 0; } int kbase_csf_scheduler_init(struct kbase_device *kbdev) @@ -6326,31 +6712,51 @@ int kbase_csf_scheduler_init(struct kbase_device *kbdev) return -ENOMEM; } - return 0; + init_completion(&scheduler->kthread_signal); + scheduler->kthread_running = true; + scheduler->gpuq_kthread = + kthread_run(&kbase_csf_scheduler_kthread, kbdev, "mali-gpuq-kthread"); + if (!scheduler->gpuq_kthread) { + kfree(scheduler->csg_slots); + scheduler->csg_slots = NULL; + + dev_err(kbdev->dev, "Failed to spawn the GPU queue submission worker thread"); + return -ENOMEM; + } +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) && !IS_ENABLED(CONFIG_MALI_NO_MALI) + scheduler->gpu_metrics_tb = + kbase_csf_firmware_get_trace_buffer(kbdev, KBASE_CSFFW_GPU_METRICS_BUF_NAME); + if (!scheduler->gpu_metrics_tb) { + scheduler->kthread_running = false; + complete(&scheduler->kthread_signal); + kthread_stop(scheduler->gpuq_kthread); + scheduler->gpuq_kthread = NULL; + + kfree(scheduler->csg_slots); + scheduler->csg_slots = NULL; + + dev_err(kbdev->dev, "Failed to get the handler of gpu_metrics from trace buffer"); + return -ENOENT; + } +#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */ + + return kbase_csf_mcu_shared_regs_data_init(kbdev); } int kbase_csf_scheduler_early_init(struct kbase_device *kbdev) { struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; - scheduler->timer_enabled = true; + atomic_set(&scheduler->timer_enabled, true); - scheduler->wq = alloc_ordered_workqueue("csf_scheduler_wq", WQ_HIGHPRI); - if (!scheduler->wq) { - dev_err(kbdev->dev, "Failed to allocate scheduler workqueue\n"); - return -ENOMEM; - } scheduler->idle_wq = alloc_ordered_workqueue( "csf_scheduler_gpu_idle_wq", WQ_HIGHPRI); if (!scheduler->idle_wq) { - dev_err(kbdev->dev, - "Failed to allocate GPU idle scheduler workqueue\n"); - destroy_workqueue(kbdev->csf.scheduler.wq); + dev_err(kbdev->dev, "Failed to allocate GPU idle scheduler workqueue\n"); return -ENOMEM; } - INIT_WORK(&scheduler->tick_work, schedule_on_tick); - INIT_DEFERRABLE_WORK(&scheduler->tock_work, schedule_on_tock); + atomic_set(&scheduler->pending_tick_work, false); atomic_set(&scheduler->pending_tock_work, false); INIT_DEFERRABLE_WORK(&scheduler->ping_work, firmware_aliveness_monitor); @@ -6385,7 +6791,6 @@ int kbase_csf_scheduler_early_init(struct kbase_device *kbdev) hrtimer_init(&scheduler->tick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); scheduler->tick_timer.function = tick_timer_callback; - scheduler->tick_timer_active = false; kbase_csf_tiler_heap_reclaim_mgr_init(kbdev); @@ -6394,6 +6799,14 @@ int kbase_csf_scheduler_early_init(struct kbase_device *kbdev) void kbase_csf_scheduler_term(struct kbase_device *kbdev) { + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + + if (scheduler->gpuq_kthread) { + scheduler->kthread_running = false; + complete(&scheduler->kthread_signal); + kthread_stop(scheduler->gpuq_kthread); + } + if (kbdev->csf.scheduler.csg_slots) { WARN_ON(atomic_read(&kbdev->csf.scheduler.non_idle_offslot_grps)); /* The unload of Driver can take place only when all contexts have @@ -6418,22 +6831,19 @@ void kbase_csf_scheduler_term(struct kbase_device *kbdev) mutex_unlock(&kbdev->csf.scheduler.lock); cancel_delayed_work_sync(&kbdev->csf.scheduler.ping_work); - cancel_tick_timer(kbdev); - cancel_tick_work(&kbdev->csf.scheduler); - cancel_tock_work(&kbdev->csf.scheduler); kfree(kbdev->csf.scheduler.csg_slots); kbdev->csf.scheduler.csg_slots = NULL; } KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_TERMINATED, NULL, kbase_csf_scheduler_get_nr_active_csgs(kbdev)); + /* Terminating the MCU shared regions, following the release of slots */ + kbase_csf_mcu_shared_regs_data_term(kbdev); } void kbase_csf_scheduler_early_term(struct kbase_device *kbdev) { if (kbdev->csf.scheduler.idle_wq) destroy_workqueue(kbdev->csf.scheduler.idle_wq); - if (kbdev->csf.scheduler.wq) - destroy_workqueue(kbdev->csf.scheduler.wq); kbase_csf_tiler_heap_reclaim_mgr_term(kbdev); mutex_destroy(&kbdev->csf.scheduler.lock); @@ -6455,7 +6865,7 @@ static void scheduler_enable_tick_timer_nolock(struct kbase_device *kbdev) lockdep_assert_held(&kbdev->csf.scheduler.lock); - if (unlikely(!scheduler_timer_is_enabled_nolock(kbdev))) + if (unlikely(!kbase_csf_scheduler_timer_is_enabled(kbdev))) return; WARN_ON((scheduler->state != SCHED_INACTIVE) && @@ -6463,7 +6873,7 @@ static void scheduler_enable_tick_timer_nolock(struct kbase_device *kbdev) (scheduler->state != SCHED_SLEEPING)); if (scheduler->total_runnable_grps > 0) { - enqueue_tick_work(kbdev); + kbase_csf_scheduler_invoke_tick(kbdev); dev_dbg(kbdev->dev, "Re-enabling the scheduler timer\n"); } else if (scheduler->state != SCHED_SUSPENDED) { enqueue_gpu_idle_work(scheduler); @@ -6477,43 +6887,24 @@ void kbase_csf_scheduler_enable_tick_timer(struct kbase_device *kbdev) mutex_unlock(&kbdev->csf.scheduler.lock); } -bool kbase_csf_scheduler_timer_is_enabled(struct kbase_device *kbdev) -{ - struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; - bool enabled; - - mutex_lock(&scheduler->lock); - enabled = scheduler_timer_is_enabled_nolock(kbdev); - mutex_unlock(&scheduler->lock); - - return enabled; -} - void kbase_csf_scheduler_timer_set_enabled(struct kbase_device *kbdev, bool enable) { struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; bool currently_enabled; + /* This lock is taken to prevent this code being executed concurrently + * by userspace. + */ mutex_lock(&scheduler->lock); - currently_enabled = scheduler_timer_is_enabled_nolock(kbdev); + currently_enabled = kbase_csf_scheduler_timer_is_enabled(kbdev); if (currently_enabled && !enable) { - scheduler->timer_enabled = false; - cancel_tick_timer(kbdev); - mutex_unlock(&scheduler->lock); - /* The non-sync version to cancel the normal work item is not - * available, so need to drop the lock before cancellation. - */ + atomic_set(&scheduler->timer_enabled, false); cancel_tick_work(scheduler); - cancel_tock_work(scheduler); - return; - } - - if (!currently_enabled && enable) { - scheduler->timer_enabled = true; - - scheduler_enable_tick_timer_nolock(kbdev); + } else if (!currently_enabled && enable) { + atomic_set(&scheduler->timer_enabled, true); + kbase_csf_scheduler_invoke_tick(kbdev); } mutex_unlock(&scheduler->lock); @@ -6523,17 +6914,17 @@ void kbase_csf_scheduler_kick(struct kbase_device *kbdev) { struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; - mutex_lock(&scheduler->lock); + if (unlikely(kbase_csf_scheduler_timer_is_enabled(kbdev))) + return; - if (unlikely(scheduler_timer_is_enabled_nolock(kbdev))) - goto out; + /* This lock is taken to prevent this code being executed concurrently + * by userspace. + */ + mutex_lock(&scheduler->lock); - if (scheduler->total_runnable_grps > 0) { - enqueue_tick_work(kbdev); - dev_dbg(kbdev->dev, "Kicking the scheduler manually\n"); - } + kbase_csf_scheduler_invoke_tick(kbdev); + dev_dbg(kbdev->dev, "Kicking the scheduler manually\n"); -out: mutex_unlock(&scheduler->lock); } @@ -6570,7 +6961,7 @@ int kbase_csf_scheduler_pm_suspend_no_lock(struct kbase_device *kbdev) } else { dev_info(kbdev->dev, "Scheduler PM suspend"); scheduler_suspend(kbdev); - cancel_tick_timer(kbdev); + cancel_tick_work(scheduler); } } @@ -6649,7 +7040,7 @@ void kbase_csf_scheduler_pm_idle(struct kbase_device *kbdev) } KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_idle); -int kbase_csf_scheduler_wait_mcu_active(struct kbase_device *kbdev) +static int scheduler_wait_mcu_active(struct kbase_device *kbdev, bool killable_wait) { struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; unsigned long flags; @@ -6662,9 +7053,17 @@ int kbase_csf_scheduler_wait_mcu_active(struct kbase_device *kbdev) spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); kbase_pm_unlock(kbdev); - kbase_pm_wait_for_poweroff_work_complete(kbdev); + if (killable_wait) + err = kbase_pm_killable_wait_for_poweroff_work_complete(kbdev); + else + err = kbase_pm_wait_for_poweroff_work_complete(kbdev); + if (err) + return err; - err = kbase_pm_wait_for_desired_state(kbdev); + if (killable_wait) + err = kbase_pm_killable_wait_for_desired_state(kbdev); + else + err = kbase_pm_wait_for_desired_state(kbdev); if (!err) { spin_lock_irqsave(&kbdev->hwaccess_lock, flags); WARN_ON(kbdev->pm.backend.mcu_state != KBASE_MCU_ON); @@ -6673,6 +7072,17 @@ int kbase_csf_scheduler_wait_mcu_active(struct kbase_device *kbdev) return err; } + +int kbase_csf_scheduler_killable_wait_mcu_active(struct kbase_device *kbdev) +{ + return scheduler_wait_mcu_active(kbdev, true); +} + +int kbase_csf_scheduler_wait_mcu_active(struct kbase_device *kbdev) +{ + return scheduler_wait_mcu_active(kbdev, false); +} + KBASE_EXPORT_TEST_API(kbase_csf_scheduler_wait_mcu_active); #ifdef KBASE_PM_RUNTIME @@ -6751,8 +7161,7 @@ void kbase_csf_scheduler_force_sleep(struct kbase_device *kbdev) struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; mutex_lock(&scheduler->lock); - if (kbase_pm_gpu_sleep_allowed(kbdev) && - (scheduler->state == SCHED_INACTIVE)) + if (kbase_pm_gpu_sleep_allowed(kbdev) && (scheduler->state == SCHED_INACTIVE)) scheduler_sleep_on_idle(kbdev); mutex_unlock(&scheduler->lock); } |