drivers/gpu/drm/xe/xe_gt_types.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367

/* SPDX-License-Identifier: MIT */
/*
 * Copyright © 2022-2023 Intel Corporation
 */

#ifndef _XE_GT_TYPES_H_
#define _XE_GT_TYPES_H_

#include "xe_force_wake_types.h"
#include "xe_gt_idle_types.h"
#include "xe_hw_engine_types.h"
#include "xe_hw_fence_types.h"
#include "xe_reg_sr_types.h"
#include "xe_sa_types.h"
#include "xe_uc_types.h"

struct xe_exec_queue_ops;
struct xe_migrate;
struct xe_ring_ops;

enum xe_gt_type {
	XE_GT_TYPE_UNINITIALIZED,
	XE_GT_TYPE_MAIN,
	XE_GT_TYPE_MEDIA,
};

#define XE_MAX_DSS_FUSE_REGS	3
#define XE_MAX_EU_FUSE_REGS	1

typedef unsigned long xe_dss_mask_t[BITS_TO_LONGS(32 * XE_MAX_DSS_FUSE_REGS)];
typedef unsigned long xe_eu_mask_t[BITS_TO_LONGS(32 * XE_MAX_EU_FUSE_REGS)];

struct xe_mmio_range {
	u32 start;
	u32 end;
};

/*
 * The hardware has multiple kinds of multicast register ranges that need
 * special register steering (and future platforms are expected to add
 * additional types).
 *
 * During driver startup, we initialize the steering control register to
 * direct reads to a slice/subslice that are valid for the 'subslice' class
 * of multicast registers.  If another type of steering does not have any
 * overlap in valid steering targets with 'subslice' style registers, we will
 * need to explicitly re-steer reads of registers of the other type.
 *
 * Only the replication types that may need additional non-default steering
 * are listed here.
 */
enum xe_steering_type {
	L3BANK,
	MSLICE,
	LNCF,
	DSS,
	OADDRM,
	SQIDI_PSMI,

	/*
	 * On some platforms there are multiple types of MCR registers that
	 * will always return a non-terminated value at instance (0, 0).  We'll
	 * lump those all into a single category to keep things simple.
	 */
	INSTANCE0,

	/*
	 * Register ranges that don't need special steering for each register:
	 * it's sufficient to keep the HW-default for the selector, or only
	 * change it once, on GT initialization. This needs to be the last
	 * steering type.
	 */
	IMPLICIT_STEERING,
	NUM_STEERING_TYPES
};

#define gt_to_tile(gt__)							\
	_Generic(gt__,								\
		 const struct xe_gt * : (const struct xe_tile *)((gt__)->tile),	\
		 struct xe_gt * : (gt__)->tile)

#define gt_to_xe(gt__)										\
	_Generic(gt__,										\
		 const struct xe_gt * : (const struct xe_device *)(gt_to_tile(gt__)->xe),	\
		 struct xe_gt * : gt_to_tile(gt__)->xe)

/**
 * struct xe_gt - A "Graphics Technology" unit of the GPU
 *
 * A GT ("Graphics Technology") is the subset of a GPU primarily responsible
 * for implementing the graphics, compute, and/or media IP.  It encapsulates
 * the hardware engines, programmable execution units, and GuC.   Each GT has
 * its own handling of power management (RC6+forcewake) and multicast register
 * steering.
 *
 * A GPU/tile may have a single GT that supplies all graphics, compute, and
 * media functionality, or the graphics/compute and media may be split into
 * separate GTs within a tile.
 */
struct xe_gt {
	/** @tile: Backpointer to GT's tile */
	struct xe_tile *tile;

	/** @info: GT info */
	struct {
		/** @info.type: type of GT */
		enum xe_gt_type type;
		/** @info.id: Unique ID of this GT within the PCI Device */
		u8 id;
		/** @info.reference_clock: clock frequency */
		u32 reference_clock;
		/** @info.engine_mask: mask of engines present on GT */
		u64 engine_mask;
		/**
		 * @info.__engine_mask: mask of engines present on GT read from
		 * xe_pci.c, used to fake reading the engine_mask from the
		 * hwconfig blob.
		 */
		u64 __engine_mask;
		/** @info.gmdid: raw GMD_ID value from hardware */
		u32 gmdid;
	} info;

	/**
	 * @mmio: mmio info for GT.  All GTs within a tile share the same
	 * register space, but have their own copy of GSI registers at a
	 * specific offset, as well as their own forcewake handling.
	 */
	struct {
		/** @mmio.fw: force wake for GT */
		struct xe_force_wake fw;
		/**
		 * @mmio.adj_limit: adjust MMIO address if address is below this
		 * value
		 */
		u32 adj_limit;
		/** @mmio.adj_offset: offect to add to MMIO address when adjusting */
		u32 adj_offset;
	} mmio;

	/**
	 * @reg_sr: table with registers to be restored on GT init/resume/reset
	 */
	struct xe_reg_sr reg_sr;

	/** @reset: state for GT resets */
	struct {
		/**
		 * @reset.worker: work so GT resets can done async allowing to reset
		 * code to safely flush all code paths
		 */
		struct work_struct worker;
	} reset;

	/** @tlb_invalidation: TLB invalidation state */
	struct {
		/** @tlb_invalidation.seqno: TLB invalidation seqno, protected by CT lock */
#define TLB_INVALIDATION_SEQNO_MAX	0x100000
		int seqno;
		/**
		 * @tlb_invalidation.seqno_recv: last received TLB invalidation seqno,
		 * protected by CT lock
		 */
		int seqno_recv;
		/**
		 * @tlb_invalidation.pending_fences: list of pending fences waiting TLB
		 * invaliations, protected by CT lock
		 */
		struct list_head pending_fences;
		/**
		 * @tlb_invalidation.pending_lock: protects @tlb_invalidation.pending_fences
		 * and updating @tlb_invalidation.seqno_recv.
		 */
		spinlock_t pending_lock;
		/**
		 * @tlb_invalidation.fence_tdr: schedules a delayed call to
		 * xe_gt_tlb_fence_timeout after the timeut interval is over.
		 */
		struct delayed_work fence_tdr;
		/** @tlb_invalidation.fence_context: context for TLB invalidation fences */
		u64 fence_context;
		/**
		 * @tlb_invalidation.fence_seqno: seqno to TLB invalidation fences, protected by
		 * tlb_invalidation.lock
		 */
		u32 fence_seqno;
		/** @tlb_invalidation.lock: protects TLB invalidation fences */
		spinlock_t lock;
	} tlb_invalidation;

	/**
	 * @ccs_mode: Number of compute engines enabled.
	 * Allows fixed mapping of available compute slices to compute engines.
	 * By default only the first available compute engine is enabled and all
	 * available compute slices are allocated to it.
	 */
	u32 ccs_mode;

	/** @usm: unified shared memory state */
	struct {
		/**
		 * @usm.bb_pool: Pool from which batchbuffers, for USM operations
		 * (e.g. migrations, fixing page tables), are allocated.
		 * Dedicated pool needed so USM operations to not get blocked
		 * behind any user operations which may have resulted in a
		 * fault.
		 */
		struct xe_sa_manager *bb_pool;
		/**
		 * @usm.reserved_bcs_instance: reserved BCS instance used for USM
		 * operations (e.g. mmigrations, fixing page tables)
		 */
		u16 reserved_bcs_instance;
		/** @usm.pf_wq: page fault work queue, unbound, high priority */
		struct workqueue_struct *pf_wq;
		/** @usm.acc_wq: access counter work queue, unbound, high priority */
		struct workqueue_struct *acc_wq;
		/**
		 * @usm.pf_queue: Page fault queue used to sync faults so faults can
		 * be processed not under the GuC CT lock. The queue is sized so
		 * it can sync all possible faults (1 per physical engine).
		 * Multiple queues exists for page faults from different VMs are
		 * be processed in parallel.
		 */
		struct pf_queue {
			/** @usm.pf_queue.gt: back pointer to GT */
			struct xe_gt *gt;
#define PF_QUEUE_NUM_DW	128
			/** @usm.pf_queue.data: data in the page fault queue */
			u32 data[PF_QUEUE_NUM_DW];
			/**
			 * @usm.pf_queue.tail: tail pointer in DWs for page fault queue,
			 * moved by worker which processes faults (consumer).
			 */
			u16 tail;
			/**
			 * @usm.pf_queue.head: head pointer in DWs for page fault queue,
			 * moved by G2H handler (producer).
			 */
			u16 head;
			/** @usm.pf_queue.lock: protects page fault queue */
			spinlock_t lock;
			/** @usm.pf_queue.worker: to process page faults */
			struct work_struct worker;
#define NUM_PF_QUEUE	4
		} pf_queue[NUM_PF_QUEUE];
		/**
		 * @usm.acc_queue: Same as page fault queue, cannot process access
		 * counters under CT lock.
		 */
		struct acc_queue {
			/** @usm.acc_queue.gt: back pointer to GT */
			struct xe_gt *gt;
#define ACC_QUEUE_NUM_DW	128
			/** @usm.acc_queue.data: data in the page fault queue */
			u32 data[ACC_QUEUE_NUM_DW];
			/**
			 * @usm.acc_queue.tail: tail pointer in DWs for access counter queue,
			 * moved by worker which processes counters
			 * (consumer).
			 */
			u16 tail;
			/**
			 * @usm.acc_queue.head: head pointer in DWs for access counter queue,
			 * moved by G2H handler (producer).
			 */
			u16 head;
			/** @usm.acc_queue.lock: protects page fault queue */
			spinlock_t lock;
			/** @usm.acc_queue.worker: to process access counters */
			struct work_struct worker;
#define NUM_ACC_QUEUE	4
		} acc_queue[NUM_ACC_QUEUE];
	} usm;

	/** @ordered_wq: used to serialize GT resets and TDRs */
	struct workqueue_struct *ordered_wq;

	/** @uc: micro controllers on the GT */
	struct xe_uc uc;

	/** @gtidle: idle properties of GT */
	struct xe_gt_idle gtidle;

	/** @exec_queue_ops: submission backend exec queue operations */
	const struct xe_exec_queue_ops *exec_queue_ops;

	/**
	 * @ring_ops: ring operations for this hw engine (1 per engine class)
	 */
	const struct xe_ring_ops *ring_ops[XE_ENGINE_CLASS_MAX];

	/** @fence_irq: fence IRQs (1 per engine class) */
	struct xe_hw_fence_irq fence_irq[XE_ENGINE_CLASS_MAX];

	/** @default_lrc: default LRC state */
	void *default_lrc[XE_ENGINE_CLASS_MAX];

	/** @hw_engines: hardware engines on the GT */
	struct xe_hw_engine hw_engines[XE_NUM_HW_ENGINES];

	/** @eclass: per hardware engine class interface on the GT */
	struct xe_hw_engine_class_intf  eclass[XE_ENGINE_CLASS_MAX];

	/** @pcode: GT's PCODE */
	struct {
		/** @pcode.lock: protecting GT's PCODE mailbox data */
		struct mutex lock;
	} pcode;

	/** @sysfs: sysfs' kobj used by xe_gt_sysfs */
	struct kobject *sysfs;

	/** @freq: Main GT freq sysfs control */
	struct kobject *freq;

	/** @mocs: info */
	struct {
		/** @mocs.uc_index: UC index */
		u8 uc_index;
		/** @mocs.wb_index: WB index, only used on L3_CCS platforms */
		u8 wb_index;
	} mocs;

	/** @fuse_topo: GT topology reported by fuse registers */
	struct {
		/** @fuse_topo.g_dss_mask: dual-subslices usable by geometry */
		xe_dss_mask_t g_dss_mask;

		/** @fuse_topo.c_dss_mask: dual-subslices usable by compute */
		xe_dss_mask_t c_dss_mask;

		/** @fuse_topo.eu_mask_per_dss: EU mask per DSS*/
		xe_eu_mask_t eu_mask_per_dss;
	} fuse_topo;

	/** @steering: register steering for individual HW units */
	struct {
		/** @steering.ranges: register ranges used for this steering type */
		const struct xe_mmio_range *ranges;

		/** @steering.group_target: target to steer accesses to */
		u16 group_target;
		/** @steering.instance_target: instance to steer accesses to */
		u16 instance_target;
	} steering[NUM_STEERING_TYPES];

	/**
	 * @mcr_lock: protects the MCR_SELECTOR register for the duration
	 *    of a steered operation
	 */
	spinlock_t mcr_lock;

	/** @wa_active: keep track of active workarounds */
	struct {
		/** @wa_active.gt: bitmap with active GT workarounds */
		unsigned long *gt;
		/** @wa_active.engine: bitmap with active engine workarounds */
		unsigned long *engine;
		/** @wa_active.lrc: bitmap with active LRC workarounds */
		unsigned long *lrc;
		/** @wa_active.oob: bitmap with active OOB workaroudns */
		unsigned long *oob;
	} wa_active;
};

#endif