1 /*
2 *
3 * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
4 *
5 * This program is free software and is provided to you under the terms of the
6 * GNU General Public License version 2 as published by the Free Software
7 * Foundation, and any use by you of this program is subject to the terms
8 * of such GNU licence.
9 *
10 * A copy of the licence is included with the program, and can also be obtained
11 * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
12 * Boston, MA 02110-1301, USA.
13 *
14 */
15
16
17
18
19
20 /**
21 * @file mali_kbase_defs.h
22 *
23 * Defintions (types, defines, etcs) common to Kbase. They are placed here to
24 * allow the hierarchy of header files to work.
25 */
26
27 #ifndef _KBASE_DEFS_H_
28 #define _KBASE_DEFS_H_
29
30 #include <mali_kbase_config.h>
31 #include <mali_base_hwconfig_features.h>
32 #include <mali_base_hwconfig_issues.h>
33 #include <mali_kbase_mem_lowlevel.h>
34 #include <mali_kbase_mmu_hw.h>
35 #include <mali_kbase_mmu_mode.h>
36 #include <mali_kbase_instr_defs.h>
37 #include <mali_kbase_pm.h>
38 #include <protected_mode_switcher.h>
39
40 #include <linux/atomic.h>
41 #include <linux/mempool.h>
42 #include <linux/slab.h>
43 #include <linux/file.h>
44
45 #ifdef CONFIG_MALI_FPGA_BUS_LOGGER
46 #include <linux/bus_logger.h>
47 #endif
48
49
50 #ifdef CONFIG_KDS
51 #include <linux/kds.h>
52 #endif /* CONFIG_KDS */
53
54 #if defined(CONFIG_SYNC)
55 #include <sync.h>
56 #else
57 #include "mali_kbase_fence_defs.h"
58 #endif
59
60 #ifdef CONFIG_DEBUG_FS
61 #include <linux/debugfs.h>
62 #endif /* CONFIG_DEBUG_FS */
63
64 #ifdef CONFIG_MALI_DEVFREQ
65 #include <linux/devfreq.h>
66 #endif /* CONFIG_MALI_DEVFREQ */
67
68 #include <linux/clk.h>
69 #include <linux/regulator/consumer.h>
70 #include <soc/rockchip/rockchip_opp_select.h>
71
72 #if defined(CONFIG_PM)
73 #define KBASE_PM_RUNTIME 1
74 #endif
75
76 /** Enable SW tracing when set */
77 #ifdef CONFIG_MALI_MIDGARD_ENABLE_TRACE
78 #define KBASE_TRACE_ENABLE 1
79 #endif
80
81 #ifndef KBASE_TRACE_ENABLE
82 #ifdef CONFIG_MALI_DEBUG
83 #define KBASE_TRACE_ENABLE 1
84 #else
85 #define KBASE_TRACE_ENABLE 0
86 #endif /* CONFIG_MALI_DEBUG */
87 #endif /* KBASE_TRACE_ENABLE */
88
89 /** Dump Job slot trace on error (only active if KBASE_TRACE_ENABLE != 0) */
90 #define KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR 1
91
92 /**
93 * Number of milliseconds before resetting the GPU when a job cannot be "zapped" from the hardware.
94 * Note that the time is actually ZAP_TIMEOUT+SOFT_STOP_RESET_TIMEOUT between the context zap starting and the GPU
95 * actually being reset to give other contexts time for their jobs to be soft-stopped and removed from the hardware
96 * before resetting.
97 */
98 #define ZAP_TIMEOUT 1000
99
100 /** Number of milliseconds before we time out on a GPU soft/hard reset */
101 #define RESET_TIMEOUT 500
102
103 /**
104 * Prevent soft-stops from occuring in scheduling situations
105 *
106 * This is not due to HW issues, but when scheduling is desired to be more predictable.
107 *
108 * Therefore, soft stop may still be disabled due to HW issues.
109 *
110 * @note Soft stop will still be used for non-scheduling purposes e.g. when terminating a context.
111 *
112 * @note if not in use, define this value to 0 instead of \#undef'ing it
113 */
114 #define KBASE_DISABLE_SCHEDULING_SOFT_STOPS 0
115
116 /**
117 * Prevent hard-stops from occuring in scheduling situations
118 *
119 * This is not due to HW issues, but when scheduling is desired to be more predictable.
120 *
121 * @note Hard stop will still be used for non-scheduling purposes e.g. when terminating a context.
122 *
123 * @note if not in use, define this value to 0 instead of \#undef'ing it
124 */
125 #define KBASE_DISABLE_SCHEDULING_HARD_STOPS 0
126
127 /**
128 * The maximum number of Job Slots to support in the Hardware.
129 *
130 * You can optimize this down if your target devices will only ever support a
131 * small number of job slots.
132 */
133 #define BASE_JM_MAX_NR_SLOTS 3
134
135 /**
136 * The maximum number of Address Spaces to support in the Hardware.
137 *
138 * You can optimize this down if your target devices will only ever support a
139 * small number of Address Spaces
140 */
141 #define BASE_MAX_NR_AS 16
142
143 /* mmu */
144 #define MIDGARD_MMU_VA_BITS 48
145
146 #if MIDGARD_MMU_VA_BITS > 39
147 #define MIDGARD_MMU_TOPLEVEL 0
148 #else
149 #define MIDGARD_MMU_TOPLEVEL 1
150 #endif
151
152 #define MIDGARD_MMU_BOTTOMLEVEL 3
153
154 #define GROWABLE_FLAGS_REQUIRED (KBASE_REG_PF_GROW | KBASE_REG_GPU_WR)
155
156 /** setting in kbase_context::as_nr that indicates it's invalid */
157 #define KBASEP_AS_NR_INVALID (-1)
158
159 #define KBASE_LOCK_REGION_MAX_SIZE (63)
160 #define KBASE_LOCK_REGION_MIN_SIZE (11)
161
162 #define KBASE_TRACE_SIZE_LOG2 8 /* 256 entries */
163 #define KBASE_TRACE_SIZE (1 << KBASE_TRACE_SIZE_LOG2)
164 #define KBASE_TRACE_MASK ((1 << KBASE_TRACE_SIZE_LOG2)-1)
165
166 #include "mali_kbase_js_defs.h"
167 #include "mali_kbase_hwaccess_defs.h"
168
169 #define KBASEP_FORCE_REPLAY_DISABLED 0
170
171 /* Maximum force replay limit when randomization is enabled */
172 #define KBASEP_FORCE_REPLAY_RANDOM_LIMIT 16
173
174 /** Atom has been previously soft-stoppped */
175 #define KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED (1<<1)
176 /** Atom has been previously retried to execute */
177 #define KBASE_KATOM_FLAGS_RERUN (1<<2)
178 #define KBASE_KATOM_FLAGS_JOBCHAIN (1<<3)
179 /** Atom has been previously hard-stopped. */
180 #define KBASE_KATOM_FLAG_BEEN_HARD_STOPPED (1<<4)
181 /** Atom has caused us to enter disjoint state */
182 #define KBASE_KATOM_FLAG_IN_DISJOINT (1<<5)
183 /* Atom blocked on cross-slot dependency */
184 #define KBASE_KATOM_FLAG_X_DEP_BLOCKED (1<<7)
185 /* Atom has fail dependency on cross-slot dependency */
186 #define KBASE_KATOM_FLAG_FAIL_BLOCKER (1<<8)
187 /* Atom is currently in the list of atoms blocked on cross-slot dependencies */
188 #define KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST (1<<9)
189 /* Atom is currently holding a context reference */
190 #define KBASE_KATOM_FLAG_HOLDING_CTX_REF (1<<10)
191 /* Atom requires GPU to be in protected mode */
192 #define KBASE_KATOM_FLAG_PROTECTED (1<<11)
193 /* Atom has been stored in runnable_tree */
194 #define KBASE_KATOM_FLAG_JSCTX_IN_TREE (1<<12)
195
196 /* SW related flags about types of JS_COMMAND action
197 * NOTE: These must be masked off by JS_COMMAND_MASK */
198
199 /** This command causes a disjoint event */
200 #define JS_COMMAND_SW_CAUSES_DISJOINT 0x100
201
202 /** Bitmask of all SW related flags */
203 #define JS_COMMAND_SW_BITS (JS_COMMAND_SW_CAUSES_DISJOINT)
204
205 #if (JS_COMMAND_SW_BITS & JS_COMMAND_MASK)
206 #error JS_COMMAND_SW_BITS not masked off by JS_COMMAND_MASK. Must update JS_COMMAND_SW_<..> bitmasks
207 #endif
208
209 /** Soft-stop command that causes a Disjoint event. This of course isn't
210 * entirely masked off by JS_COMMAND_MASK */
211 #define JS_COMMAND_SOFT_STOP_WITH_SW_DISJOINT \
212 (JS_COMMAND_SW_CAUSES_DISJOINT | JS_COMMAND_SOFT_STOP)
213
214 #define KBASEP_ATOM_ID_INVALID BASE_JD_ATOM_COUNT
215
216 /* Serialize atoms within a slot (ie only one atom per job slot) */
217 #define KBASE_SERIALIZE_INTRA_SLOT (1 << 0)
218 /* Serialize atoms between slots (ie only one job slot running at any time) */
219 #define KBASE_SERIALIZE_INTER_SLOT (1 << 1)
220 /* Reset the GPU after each atom completion */
221 #define KBASE_SERIALIZE_RESET (1 << 2)
222
223 #ifdef CONFIG_DEBUG_FS
224 struct base_job_fault_event {
225
226 u32 event_code;
227 struct kbase_jd_atom *katom;
228 struct work_struct job_fault_work;
229 struct list_head head;
230 int reg_offset;
231 };
232
233 #endif
234
235 struct kbase_jd_atom_dependency {
236 struct kbase_jd_atom *atom;
237 u8 dep_type;
238 };
239
240 /**
241 * struct kbase_io_access - holds information about 1 register access
242 *
243 * @addr: first bit indicates r/w (r=0, w=1)
244 * @value: value written or read
245 */
246 struct kbase_io_access {
247 uintptr_t addr;
248 u32 value;
249 };
250
251 /**
252 * struct kbase_io_history - keeps track of all recent register accesses
253 *
254 * @enabled: true if register accesses are recorded, false otherwise
255 * @lock: spinlock protecting kbase_io_access array
256 * @count: number of registers read/written
257 * @size: number of elements in kbase_io_access array
258 * @buf: array of kbase_io_access
259 */
260 struct kbase_io_history {
261 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
262 bool enabled;
263 #else
264 u32 enabled;
265 #endif
266
267 spinlock_t lock;
268 size_t count;
269 u16 size;
270 struct kbase_io_access *buf;
271 };
272
273 /**
274 * @brief The function retrieves a read-only reference to the atom field from
275 * the kbase_jd_atom_dependency structure
276 *
277 * @param[in] dep kbase jd atom dependency.
278 *
279 * @return readonly reference to dependent ATOM.
280 */
kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep)281 static inline const struct kbase_jd_atom * kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep)
282 {
283 LOCAL_ASSERT(dep != NULL);
284
285 return (const struct kbase_jd_atom *)(dep->atom);
286 }
287
288 /**
289 * @brief The function retrieves a read-only reference to the dependency type field from
290 * the kbase_jd_atom_dependency structure
291 *
292 * @param[in] dep kbase jd atom dependency.
293 *
294 * @return A dependency type value.
295 */
kbase_jd_katom_dep_type(const struct kbase_jd_atom_dependency *dep)296 static inline u8 kbase_jd_katom_dep_type(const struct kbase_jd_atom_dependency *dep)
297 {
298 LOCAL_ASSERT(dep != NULL);
299
300 return dep->dep_type;
301 }
302
303 /**
304 * @brief Setter macro for dep_atom array entry in kbase_jd_atom
305 *
306 * @param[in] dep The kbase jd atom dependency.
307 * @param[in] a The ATOM to be set as a dependency.
308 * @param type The ATOM dependency type to be set.
309 *
310 */
kbase_jd_katom_dep_set(const struct kbase_jd_atom_dependency *const_dep, struct kbase_jd_atom *a, u8 type)311 static inline void kbase_jd_katom_dep_set(const struct kbase_jd_atom_dependency *const_dep,
312 struct kbase_jd_atom *a, u8 type)
313 {
314 struct kbase_jd_atom_dependency *dep;
315
316 LOCAL_ASSERT(const_dep != NULL);
317
318 dep = (struct kbase_jd_atom_dependency *)const_dep;
319
320 dep->atom = a;
321 dep->dep_type = type;
322 }
323
324 /**
325 * @brief Setter macro for dep_atom array entry in kbase_jd_atom
326 *
327 * @param[in] dep The kbase jd atom dependency to be cleared.
328 *
329 */
kbase_jd_katom_dep_clear(const struct kbase_jd_atom_dependency *const_dep)330 static inline void kbase_jd_katom_dep_clear(const struct kbase_jd_atom_dependency *const_dep)
331 {
332 struct kbase_jd_atom_dependency *dep;
333
334 LOCAL_ASSERT(const_dep != NULL);
335
336 dep = (struct kbase_jd_atom_dependency *)const_dep;
337
338 dep->atom = NULL;
339 dep->dep_type = BASE_JD_DEP_TYPE_INVALID;
340 }
341
342 enum kbase_atom_gpu_rb_state {
343 /* Atom is not currently present in slot ringbuffer */
344 KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB,
345 /* Atom is in slot ringbuffer but is blocked on a previous atom */
346 KBASE_ATOM_GPU_RB_WAITING_BLOCKED,
347 /* Atom is in slot ringbuffer but is waiting for a previous protected
348 * mode transition to complete */
349 KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV,
350 /* Atom is in slot ringbuffer but is waiting for proected mode
351 * transition */
352 KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION,
353 /* Atom is in slot ringbuffer but is waiting for cores to become
354 * available */
355 KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE,
356 /* Atom is in slot ringbuffer but is blocked on affinity */
357 KBASE_ATOM_GPU_RB_WAITING_AFFINITY,
358 /* Atom is in slot ringbuffer and ready to run */
359 KBASE_ATOM_GPU_RB_READY,
360 /* Atom is in slot ringbuffer and has been submitted to the GPU */
361 KBASE_ATOM_GPU_RB_SUBMITTED,
362 /* Atom must be returned to JS as soon as it reaches the head of the
363 * ringbuffer due to a previous failure */
364 KBASE_ATOM_GPU_RB_RETURN_TO_JS = -1
365 };
366
367 enum kbase_atom_enter_protected_state {
368 /*
369 * Starting state:
370 * Check if a transition into protected mode is required.
371 *
372 * NOTE: The integer value of this must
373 * match KBASE_ATOM_EXIT_PROTECTED_CHECK.
374 */
375 KBASE_ATOM_ENTER_PROTECTED_CHECK = 0,
376 /* Wait for vinstr to suspend. */
377 KBASE_ATOM_ENTER_PROTECTED_VINSTR,
378 /* Wait for the L2 to become idle in preparation for
379 * the coherency change. */
380 KBASE_ATOM_ENTER_PROTECTED_IDLE_L2,
381 /* End state;
382 * Prepare coherency change. */
383 KBASE_ATOM_ENTER_PROTECTED_FINISHED,
384 };
385
386 enum kbase_atom_exit_protected_state {
387 /*
388 * Starting state:
389 * Check if a transition out of protected mode is required.
390 *
391 * NOTE: The integer value of this must
392 * match KBASE_ATOM_ENTER_PROTECTED_CHECK.
393 */
394 KBASE_ATOM_EXIT_PROTECTED_CHECK = 0,
395 /* Wait for the L2 to become idle in preparation
396 * for the reset. */
397 KBASE_ATOM_EXIT_PROTECTED_IDLE_L2,
398 /* Issue the protected reset. */
399 KBASE_ATOM_EXIT_PROTECTED_RESET,
400 /* End state;
401 * Wait for the reset to complete. */
402 KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT,
403 };
404
405 struct kbase_ext_res {
406 u64 gpu_address;
407 struct kbase_mem_phy_alloc *alloc;
408 };
409
410 struct kbase_jd_atom {
411 struct work_struct work;
412 ktime_t start_timestamp;
413
414 struct base_jd_udata udata;
415 struct kbase_context *kctx;
416
417 struct list_head dep_head[2];
418 struct list_head dep_item[2];
419 const struct kbase_jd_atom_dependency dep[2];
420 /* List head used during job dispatch job_done processing - as
421 * dependencies may not be entirely resolved at this point, we need to
422 * use a separate list head. */
423 struct list_head jd_item;
424 /* true if atom's jd_item is currently on a list. Prevents atom being
425 * processed twice. */
426 bool in_jd_list;
427
428 u16 nr_extres;
429 struct kbase_ext_res *extres;
430
431 u32 device_nr;
432 u64 affinity;
433 u64 jc;
434 enum kbase_atom_coreref_state coreref_state;
435 #ifdef CONFIG_KDS
436 struct list_head node;
437 struct kds_resource_set *kds_rset;
438 bool kds_dep_satisfied;
439 #endif /* CONFIG_KDS */
440 #if defined(CONFIG_SYNC)
441 /* Stores either an input or output fence, depending on soft-job type */
442 struct sync_fence *fence;
443 struct sync_fence_waiter sync_waiter;
444 #endif /* CONFIG_SYNC */
445 #if defined(CONFIG_MALI_DMA_FENCE) || defined(CONFIG_SYNC_FILE)
446 struct {
447 /* Use the functions/API defined in mali_kbase_fence.h to
448 * when working with this sub struct */
449 #if defined(CONFIG_SYNC_FILE)
450 /* Input fence */
451 #if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
452 struct fence *fence_in;
453 #else
454 struct dma_fence *fence_in;
455 #endif
456 #endif
457 /* This points to the dma-buf output fence for this atom. If
458 * this is NULL then there is no fence for this atom and the
459 * following fields related to dma_fence may have invalid data.
460 *
461 * The context and seqno fields contain the details for this
462 * fence.
463 *
464 * This fence is signaled when the katom is completed,
465 * regardless of the event_code of the katom (signal also on
466 * failure).
467 */
468 #if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
469 struct fence *fence;
470 #else
471 struct dma_fence *fence;
472 #endif
473 /* The dma-buf fence context number for this atom. A unique
474 * context number is allocated to each katom in the context on
475 * context creation.
476 */
477 unsigned int context;
478 /* The dma-buf fence sequence number for this atom. This is
479 * increased every time this katom uses dma-buf fence.
480 */
481 atomic_t seqno;
482 /* This contains a list of all callbacks set up to wait on
483 * other fences. This atom must be held back from JS until all
484 * these callbacks have been called and dep_count have reached
485 * 0. The initial value of dep_count must be equal to the
486 * number of callbacks on this list.
487 *
488 * This list is protected by jctx.lock. Callbacks are added to
489 * this list when the atom is built and the wait are set up.
490 * All the callbacks then stay on the list until all callbacks
491 * have been called and the atom is queued, or cancelled, and
492 * then all callbacks are taken off the list and freed.
493 */
494 struct list_head callbacks;
495 /* Atomic counter of number of outstandind dma-buf fence
496 * dependencies for this atom. When dep_count reaches 0 the
497 * atom may be queued.
498 *
499 * The special value "-1" may only be set after the count
500 * reaches 0, while holding jctx.lock. This indicates that the
501 * atom has been handled, either queued in JS or cancelled.
502 *
503 * If anyone but the dma-fence worker sets this to -1 they must
504 * ensure that any potentially queued worker must have
505 * completed before allowing the atom to be marked as unused.
506 * This can be done by flushing the fence work queue:
507 * kctx->dma_fence.wq.
508 */
509 atomic_t dep_count;
510 } dma_fence;
511 #endif /* CONFIG_MALI_DMA_FENCE || CONFIG_SYNC_FILE*/
512
513 /* Note: refer to kbasep_js_atom_retained_state, which will take a copy of some of the following members */
514 enum base_jd_event_code event_code;
515 base_jd_core_req core_req; /**< core requirements */
516 /** Job Slot to retry submitting to if submission from IRQ handler failed
517 *
518 * NOTE: see if this can be unified into the another member e.g. the event */
519 int retry_submit_on_slot;
520
521 u32 ticks;
522 /* JS atom priority with respect to other atoms on its kctx. */
523 int sched_priority;
524
525 int poking; /* BASE_HW_ISSUE_8316 */
526
527 wait_queue_head_t completed;
528 enum kbase_jd_atom_state status;
529 #ifdef CONFIG_GPU_TRACEPOINTS
530 int work_id;
531 #endif
532 /* Assigned after atom is completed. Used to check whether PRLAM-10676 workaround should be applied */
533 int slot_nr;
534
535 u32 atom_flags;
536
537 /* Number of times this atom has been retried. Used by replay soft job.
538 */
539 int retry_count;
540
541 enum kbase_atom_gpu_rb_state gpu_rb_state;
542
543 u64 need_cache_flush_cores_retained;
544
545 atomic_t blocked;
546
547 /* Pointer to atom that this atom has same-slot dependency on */
548 struct kbase_jd_atom *pre_dep;
549 /* Pointer to atom that has same-slot dependency on this atom */
550 struct kbase_jd_atom *post_dep;
551
552 /* Pointer to atom that this atom has cross-slot dependency on */
553 struct kbase_jd_atom *x_pre_dep;
554 /* Pointer to atom that has cross-slot dependency on this atom */
555 struct kbase_jd_atom *x_post_dep;
556
557 /* The GPU's flush count recorded at the time of submission, used for
558 * the cache flush optimisation */
559 u32 flush_id;
560
561 struct kbase_jd_atom_backend backend;
562 #ifdef CONFIG_DEBUG_FS
563 struct base_job_fault_event fault_event;
564 #endif
565
566 /* List head used for three different purposes:
567 * 1. Overflow list for JS ring buffers. If an atom is ready to run,
568 * but there is no room in the JS ring buffer, then the atom is put
569 * on the ring buffer's overflow list using this list node.
570 * 2. List of waiting soft jobs.
571 */
572 struct list_head queue;
573
574 /* Used to keep track of all JIT free/alloc jobs in submission order
575 */
576 struct list_head jit_node;
577 bool jit_blocked;
578
579 /* If non-zero, this indicates that the atom will fail with the set
580 * event_code when the atom is processed. */
581 enum base_jd_event_code will_fail_event_code;
582
583 /* Atoms will only ever be transitioning into, or out of
584 * protected mode so we do not need two separate fields.
585 */
586 union {
587 enum kbase_atom_enter_protected_state enter;
588 enum kbase_atom_exit_protected_state exit;
589 } protected_state;
590
591 struct rb_node runnable_tree_node;
592
593 /* 'Age' of atom relative to other atoms in the context. */
594 u32 age;
595 };
596
kbase_jd_katom_is_protected(const struct kbase_jd_atom *katom)597 static inline bool kbase_jd_katom_is_protected(const struct kbase_jd_atom *katom)
598 {
599 return (bool)(katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED);
600 }
601
602 /*
603 * Theory of operations:
604 *
605 * Atom objects are statically allocated within the context structure.
606 *
607 * Each atom is the head of two lists, one for the "left" set of dependencies, one for the "right" set.
608 */
609
610 #define KBASE_JD_DEP_QUEUE_SIZE 256
611
612 struct kbase_jd_context {
613 struct mutex lock;
614 struct kbasep_js_kctx_info sched_info;
615 struct kbase_jd_atom atoms[BASE_JD_ATOM_COUNT];
616
617 /** Tracks all job-dispatch jobs. This includes those not tracked by
618 * the scheduler: 'not ready to run' and 'dependency-only' jobs. */
619 u32 job_nr;
620
621 /** Waitq that reflects whether there are no jobs (including SW-only
622 * dependency jobs). This is set when no jobs are present on the ctx,
623 * and clear when there are jobs.
624 *
625 * @note: Job Dispatcher knows about more jobs than the Job Scheduler:
626 * the Job Scheduler is unaware of jobs that are blocked on dependencies,
627 * and SW-only dependency jobs.
628 *
629 * This waitq can be waited upon to find out when the context jobs are all
630 * done/cancelled (including those that might've been blocked on
631 * dependencies) - and so, whether it can be terminated. However, it should
632 * only be terminated once it is not present in the run-pool (see
633 * kbasep_js_kctx_info::ctx::is_scheduled).
634 *
635 * Since the waitq is only set under kbase_jd_context::lock,
636 * the waiter should also briefly obtain and drop kbase_jd_context::lock to
637 * guarentee that the setter has completed its work on the kbase_context
638 *
639 * This must be updated atomically with:
640 * - kbase_jd_context::job_nr */
641 wait_queue_head_t zero_jobs_wait;
642
643 /** Job Done workqueue. */
644 struct workqueue_struct *job_done_wq;
645
646 spinlock_t tb_lock;
647 u32 *tb;
648 size_t tb_wrap_offset;
649
650 #ifdef CONFIG_KDS
651 struct kds_callback kds_cb;
652 #endif /* CONFIG_KDS */
653 #ifdef CONFIG_GPU_TRACEPOINTS
654 atomic_t work_id;
655 #endif
656 };
657
658 struct kbase_device_info {
659 u32 features;
660 };
661
662 /** Poking state for BASE_HW_ISSUE_8316 */
663 enum {
664 KBASE_AS_POKE_STATE_IN_FLIGHT = 1<<0,
665 KBASE_AS_POKE_STATE_KILLING_POKE = 1<<1
666 };
667
668 /** Poking state for BASE_HW_ISSUE_8316 */
669 typedef u32 kbase_as_poke_state;
670
671 struct kbase_mmu_setup {
672 u64 transtab;
673 u64 memattr;
674 u64 transcfg;
675 };
676
677 /**
678 * Important: Our code makes assumptions that a struct kbase_as structure is always at
679 * kbase_device->as[number]. This is used to recover the containing
680 * struct kbase_device from a struct kbase_as structure.
681 *
682 * Therefore, struct kbase_as structures must not be allocated anywhere else.
683 */
684 struct kbase_as {
685 int number;
686
687 struct workqueue_struct *pf_wq;
688 struct work_struct work_pagefault;
689 struct work_struct work_busfault;
690 enum kbase_mmu_fault_type fault_type;
691 bool protected_mode;
692 u32 fault_status;
693 u64 fault_addr;
694 u64 fault_extra_addr;
695
696 struct kbase_mmu_setup current_setup;
697
698 /* BASE_HW_ISSUE_8316 */
699 struct workqueue_struct *poke_wq;
700 struct work_struct poke_work;
701 /** Protected by hwaccess_lock */
702 int poke_refcount;
703 /** Protected by hwaccess_lock */
704 kbase_as_poke_state poke_state;
705 struct hrtimer poke_timer;
706 };
707
kbase_as_has_bus_fault(struct kbase_as *as)708 static inline int kbase_as_has_bus_fault(struct kbase_as *as)
709 {
710 return as->fault_type == KBASE_MMU_FAULT_TYPE_BUS;
711 }
712
kbase_as_has_page_fault(struct kbase_as *as)713 static inline int kbase_as_has_page_fault(struct kbase_as *as)
714 {
715 return as->fault_type == KBASE_MMU_FAULT_TYPE_PAGE;
716 }
717
718 struct kbasep_mem_device {
719 atomic_t used_pages; /* Tracks usage of OS shared memory. Updated
720 when OS memory is allocated/freed. */
721
722 };
723
724 #define KBASE_TRACE_CODE(X) KBASE_TRACE_CODE_ ## X
725
726 enum kbase_trace_code {
727 /* IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE
728 * THIS MUST BE USED AT THE START OF THE ENUM */
729 #define KBASE_TRACE_CODE_MAKE_CODE(X) KBASE_TRACE_CODE(X)
730 #include "mali_kbase_trace_defs.h"
731 #undef KBASE_TRACE_CODE_MAKE_CODE
732 /* Comma on its own, to extend the list */
733 ,
734 /* Must be the last in the enum */
735 KBASE_TRACE_CODE_COUNT
736 };
737
738 #define KBASE_TRACE_FLAG_REFCOUNT (((u8)1) << 0)
739 #define KBASE_TRACE_FLAG_JOBSLOT (((u8)1) << 1)
740
741 struct kbase_trace {
742 struct timespec64 timestamp;
743 u32 thread_id;
744 u32 cpu;
745 void *ctx;
746 bool katom;
747 int atom_number;
748 u64 atom_udata[2];
749 u64 gpu_addr;
750 unsigned long info_val;
751 u8 code;
752 u8 jobslot;
753 u8 refcount;
754 u8 flags;
755 };
756
757 /** Event IDs for the power management framework.
758 *
759 * Any of these events might be missed, so they should not be relied upon to
760 * find the precise state of the GPU at a particular time in the
761 * trace. Overall, we should get a high percentage of these events for
762 * statisical purposes, and so a few missing should not be a problem */
763 enum kbase_timeline_pm_event {
764 /* helper for tests */
765 KBASEP_TIMELINE_PM_EVENT_FIRST,
766
767 /** Event reserved for backwards compatibility with 'init' events */
768 KBASE_TIMELINE_PM_EVENT_RESERVED_0 = KBASEP_TIMELINE_PM_EVENT_FIRST,
769
770 /** The power state of the device has changed.
771 *
772 * Specifically, the device has reached a desired or available state.
773 */
774 KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED,
775
776 /** The GPU is becoming active.
777 *
778 * This event is sent when the first context is about to use the GPU.
779 */
780 KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE,
781
782 /** The GPU is becoming idle.
783 *
784 * This event is sent when the last context has finished using the GPU.
785 */
786 KBASE_TIMELINE_PM_EVENT_GPU_IDLE,
787
788 /** Event reserved for backwards compatibility with 'policy_change'
789 * events */
790 KBASE_TIMELINE_PM_EVENT_RESERVED_4,
791
792 /** Event reserved for backwards compatibility with 'system_suspend'
793 * events */
794 KBASE_TIMELINE_PM_EVENT_RESERVED_5,
795
796 /** Event reserved for backwards compatibility with 'system_resume'
797 * events */
798 KBASE_TIMELINE_PM_EVENT_RESERVED_6,
799
800 /** The job scheduler is requesting to power up/down cores.
801 *
802 * This event is sent when:
803 * - powered down cores are needed to complete a job
804 * - powered up cores are not needed anymore
805 */
806 KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE,
807
808 KBASEP_TIMELINE_PM_EVENT_LAST = KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE,
809 };
810
811 #ifdef CONFIG_MALI_TRACE_TIMELINE
812 struct kbase_trace_kctx_timeline {
813 atomic_t jd_atoms_in_flight;
814 u32 owner_tgid;
815 };
816
817 struct kbase_trace_kbdev_timeline {
818 /* Note: strictly speaking, not needed, because it's in sync with
819 * kbase_device::jm_slots[]::submitted_nr
820 *
821 * But it's kept as an example of how to add global timeline tracking
822 * information
823 *
824 * The caller must hold hwaccess_lock when accessing this */
825 u8 slot_atoms_submitted[BASE_JM_MAX_NR_SLOTS];
826
827 /* Last UID for each PM event */
828 atomic_t pm_event_uid[KBASEP_TIMELINE_PM_EVENT_LAST+1];
829 /* Counter for generating PM event UIDs */
830 atomic_t pm_event_uid_counter;
831 /*
832 * L2 transition state - true indicates that the transition is ongoing
833 * Expected to be protected by hwaccess_lock */
834 bool l2_transitioning;
835 };
836 #endif /* CONFIG_MALI_TRACE_TIMELINE */
837
838
839 struct kbasep_kctx_list_element {
840 struct list_head link;
841 struct kbase_context *kctx;
842 };
843
844 /**
845 * Data stored per device for power management.
846 *
847 * This structure contains data for the power management framework. There is one
848 * instance of this structure per device in the system.
849 */
850 struct kbase_pm_device_data {
851 /**
852 * The lock protecting Power Management structures accessed outside of
853 * IRQ.
854 *
855 * This lock must also be held whenever the GPU is being powered on or
856 * off.
857 */
858 struct mutex lock;
859
860 /** The reference count of active contexts on this device. */
861 int active_count;
862 /** Flag indicating suspending/suspended */
863 bool suspending;
864 /* Wait queue set when active_count == 0 */
865 wait_queue_head_t zero_active_count_wait;
866
867 /**
868 * Bit masks identifying the available shader cores that are specified
869 * via sysfs. One mask per job slot.
870 */
871 u64 debug_core_mask[BASE_JM_MAX_NR_SLOTS];
872 u64 debug_core_mask_all;
873
874 /**
875 * Callback for initializing the runtime power management.
876 *
877 * @param kbdev The kbase device
878 *
879 * @return 0 on success, else error code
880 */
881 int (*callback_power_runtime_init)(struct kbase_device *kbdev);
882
883 /**
884 * Callback for terminating the runtime power management.
885 *
886 * @param kbdev The kbase device
887 */
888 void (*callback_power_runtime_term)(struct kbase_device *kbdev);
889
890 /* Time in milliseconds between each dvfs sample */
891 u32 dvfs_period;
892
893 /* Period of GPU poweroff timer */
894 ktime_t gpu_poweroff_time;
895
896 /* Number of ticks of GPU poweroff timer before shader is powered off */
897 int poweroff_shader_ticks;
898
899 /* Number of ticks of GPU poweroff timer before GPU is powered off */
900 int poweroff_gpu_ticks;
901
902 struct kbase_pm_backend_data backend;
903 };
904
905 /**
906 * struct kbase_mem_pool - Page based memory pool for kctx/kbdev
907 * @kbdev: Kbase device where memory is used
908 * @cur_size: Number of free pages currently in the pool (may exceed @max_size
909 * in some corner cases)
910 * @max_size: Maximum number of free pages in the pool
911 * @pool_lock: Lock protecting the pool - must be held when modifying @cur_size
912 * and @page_list
913 * @page_list: List of free pages in the pool
914 * @reclaim: Shrinker for kernel reclaim of free pages
915 * @next_pool: Pointer to next pool where pages can be allocated when this pool
916 * is empty. Pages will spill over to the next pool when this pool
917 * is full. Can be NULL if there is no next pool.
918 */
919 struct kbase_mem_pool {
920 struct kbase_device *kbdev;
921 size_t cur_size;
922 size_t max_size;
923 spinlock_t pool_lock;
924 struct list_head page_list;
925 struct shrinker reclaim;
926
927 struct kbase_mem_pool *next_pool;
928 };
929
930 /**
931 * struct kbase_devfreq_opp - Lookup table for converting between nominal OPP
932 * frequency, and real frequency and core mask
933 * @opp_freq: Nominal OPP frequency
934 * @real_freq: Real GPU frequency
935 * @core_mask: Shader core mask
936 */
937 struct kbase_devfreq_opp {
938 u64 opp_freq;
939 u64 real_freq;
940 u64 core_mask;
941 };
942
943 #define DEVNAME_SIZE 16
944
945 struct kbase_device {
946 s8 slot_submit_count_irq[BASE_JM_MAX_NR_SLOTS];
947
948 u32 hw_quirks_sc;
949 u32 hw_quirks_tiler;
950 u32 hw_quirks_mmu;
951 u32 hw_quirks_jm;
952
953 struct list_head entry;
954 struct device *dev;
955 unsigned int kbase_group_error;
956 struct miscdevice mdev;
957 u64 reg_start;
958 size_t reg_size;
959 void __iomem *reg;
960
961 struct {
962 int irq;
963 int flags;
964 } irqs[3];
965
966 struct clk *clock;
967 #ifdef CONFIG_REGULATOR
968 struct regulator *regulator;
969 #endif
970 char devname[DEVNAME_SIZE];
971
972 #ifdef CONFIG_MALI_NO_MALI
973 void *model;
974 struct kmem_cache *irq_slab;
975 struct workqueue_struct *irq_workq;
976 atomic_t serving_job_irq;
977 atomic_t serving_gpu_irq;
978 atomic_t serving_mmu_irq;
979 spinlock_t reg_op_lock;
980 #endif /* CONFIG_MALI_NO_MALI */
981
982 struct kbase_pm_device_data pm;
983 struct kbasep_js_device_data js_data;
984 struct kbase_mem_pool mem_pool;
985 struct kbasep_mem_device memdev;
986 struct kbase_mmu_mode const *mmu_mode;
987
988 struct kbase_as as[BASE_MAX_NR_AS];
989 /* The below variables (as_free and as_to_kctx) are managed by the
990 * Context Scheduler. The kbasep_js_device_data::runpool_irq::lock must
991 * be held whilst accessing these.
992 */
993 u16 as_free; /* Bitpattern of free Address Spaces */
994 /* Mapping from active Address Spaces to kbase_context */
995 struct kbase_context *as_to_kctx[BASE_MAX_NR_AS];
996
997
998 spinlock_t mmu_mask_change;
999
1000 struct kbase_gpu_props gpu_props;
1001
1002 /** List of SW workarounds for HW issues */
1003 unsigned long hw_issues_mask[(BASE_HW_ISSUE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
1004 /** List of features available */
1005 unsigned long hw_features_mask[(BASE_HW_FEATURE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
1006
1007 /* Bitmaps of cores that are currently in use (running jobs).
1008 * These should be kept up to date by the job scheduler.
1009 *
1010 * pm.power_change_lock should be held when accessing these members.
1011 *
1012 * kbase_pm_check_transitions_nolock() should be called when bits are
1013 * cleared to update the power management system and allow transitions to
1014 * occur. */
1015 u64 shader_inuse_bitmap;
1016
1017 /* Refcount for cores in use */
1018 u32 shader_inuse_cnt[64];
1019
1020 /* Bitmaps of cores the JS needs for jobs ready to run */
1021 u64 shader_needed_bitmap;
1022
1023 /* Refcount for cores needed */
1024 u32 shader_needed_cnt[64];
1025
1026 u32 tiler_inuse_cnt;
1027
1028 u32 tiler_needed_cnt;
1029
1030 /* struct for keeping track of the disjoint information
1031 *
1032 * The state is > 0 if the GPU is in a disjoint state. Otherwise 0
1033 * The count is the number of disjoint events that have occurred on the GPU
1034 */
1035 struct {
1036 atomic_t count;
1037 atomic_t state;
1038 } disjoint_event;
1039
1040 /* Refcount for tracking users of the l2 cache, e.g. when using hardware counter instrumentation. */
1041 u32 l2_users_count;
1042
1043 /* Bitmaps of cores that are currently available (powered up and the power policy is happy for jobs to be
1044 * submitted to these cores. These are updated by the power management code. The job scheduler should avoid
1045 * submitting new jobs to any cores that are not marked as available.
1046 *
1047 * pm.power_change_lock should be held when accessing these members.
1048 */
1049 u64 shader_available_bitmap;
1050 u64 tiler_available_bitmap;
1051 u64 l2_available_bitmap;
1052 u64 stack_available_bitmap;
1053
1054 u64 shader_ready_bitmap;
1055 u64 shader_transitioning_bitmap;
1056
1057 s8 nr_hw_address_spaces; /**< Number of address spaces in the GPU (constant after driver initialisation) */
1058 s8 nr_user_address_spaces; /**< Number of address spaces available to user contexts */
1059
1060 /* Structure used for instrumentation and HW counters dumping */
1061 struct kbase_hwcnt {
1062 /* The lock should be used when accessing any of the following members */
1063 spinlock_t lock;
1064
1065 struct kbase_context *kctx;
1066 u64 addr;
1067
1068 struct kbase_instr_backend backend;
1069 } hwcnt;
1070
1071 struct kbase_vinstr_context *vinstr_ctx;
1072
1073 #if KBASE_TRACE_ENABLE
1074 spinlock_t trace_lock;
1075 u16 trace_first_out;
1076 u16 trace_next_in;
1077 struct kbase_trace *trace_rbuf;
1078 #endif
1079
1080 u32 reset_timeout_ms;
1081
1082 struct mutex cacheclean_lock;
1083
1084 /* Platform specific private data to be accessed by mali_kbase_config_xxx.c only */
1085 void *platform_context;
1086
1087 /* List of kbase_contexts created */
1088 struct list_head kctx_list;
1089 struct mutex kctx_list_lock;
1090
1091 struct rockchip_opp_info opp_info;
1092 #ifdef CONFIG_MALI_DEVFREQ
1093 struct devfreq_dev_profile devfreq_profile;
1094 struct devfreq *devfreq;
1095 unsigned long current_freq;
1096 unsigned long current_nominal_freq;
1097 unsigned long current_voltage;
1098 u64 current_core_mask;
1099 struct kbase_devfreq_opp *opp_table;
1100 int num_opps;
1101 struct monitor_dev_info *mdev_info;
1102 #ifdef CONFIG_DEVFREQ_THERMAL
1103 #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
1104 struct devfreq_cooling_device *devfreq_cooling;
1105 #else
1106 struct thermal_cooling_device *devfreq_cooling;
1107 #endif
1108 /* Current IPA model - true for configured model, false for fallback */
1109 atomic_t ipa_use_configured_model;
1110 struct {
1111 /* Access to this struct must be with ipa.lock held */
1112 struct mutex lock;
1113 struct kbase_ipa_model *configured_model;
1114 struct kbase_ipa_model *fallback_model;
1115 } ipa;
1116 #endif /* CONFIG_DEVFREQ_THERMAL */
1117 #endif /* CONFIG_MALI_DEVFREQ */
1118
1119
1120 #ifdef CONFIG_MALI_TRACE_TIMELINE
1121 struct kbase_trace_kbdev_timeline timeline;
1122 #endif
1123
1124 /*
1125 * Control for enabling job dump on failure, set when control debugfs
1126 * is opened.
1127 */
1128 bool job_fault_debug;
1129
1130 #ifdef CONFIG_DEBUG_FS
1131 /* directory for debugfs entries */
1132 struct dentry *mali_debugfs_directory;
1133 /* Root directory for per context entry */
1134 struct dentry *debugfs_ctx_directory;
1135
1136 #ifdef CONFIG_MALI_DEBUG
1137 /* bit for each as, set if there is new data to report */
1138 u64 debugfs_as_read_bitmap;
1139 #endif /* CONFIG_MALI_DEBUG */
1140
1141 /* failed job dump, used for separate debug process */
1142 wait_queue_head_t job_fault_wq;
1143 wait_queue_head_t job_fault_resume_wq;
1144 struct workqueue_struct *job_fault_resume_workq;
1145 struct list_head job_fault_event_list;
1146 spinlock_t job_fault_event_lock;
1147 struct kbase_context *kctx_fault;
1148
1149 #if !MALI_CUSTOMER_RELEASE
1150 /* Per-device data for register dumping interface */
1151 struct {
1152 u16 reg_offset; /* Offset of a GPU_CONTROL register to be
1153 dumped upon request */
1154 } regs_dump_debugfs_data;
1155 #endif /* !MALI_CUSTOMER_RELEASE */
1156 #endif /* CONFIG_DEBUG_FS */
1157
1158 /* fbdump profiling controls set by gator */
1159 u32 kbase_profiling_controls[FBDUMP_CONTROL_MAX];
1160
1161
1162 #if MALI_CUSTOMER_RELEASE == 0
1163 /* Number of jobs that are run before a job is forced to fail and
1164 * replay. May be KBASEP_FORCE_REPLAY_DISABLED, to disable forced
1165 * failures. */
1166 int force_replay_limit;
1167 /* Count of jobs between forced failures. Incremented on each job. A
1168 * job is forced to fail once this is greater than or equal to
1169 * force_replay_limit. */
1170 int force_replay_count;
1171 /* Core requirement for jobs to be failed and replayed. May be zero. */
1172 base_jd_core_req force_replay_core_req;
1173 /* true if force_replay_limit should be randomized. The random
1174 * value will be in the range of 1 - KBASEP_FORCE_REPLAY_RANDOM_LIMIT.
1175 */
1176 bool force_replay_random;
1177 #endif
1178
1179 /* Total number of created contexts */
1180 atomic_t ctx_num;
1181
1182 #ifdef CONFIG_DEBUG_FS
1183 /* Holds the most recent register accesses */
1184 struct kbase_io_history io_history;
1185 #endif /* CONFIG_DEBUG_FS */
1186
1187 struct kbase_hwaccess_data hwaccess;
1188
1189 /* Count of page/bus faults waiting for workqueues to process */
1190 atomic_t faults_pending;
1191
1192 /* true if GPU is powered off or power off operation is in progress */
1193 bool poweroff_pending;
1194
1195
1196 /* defaults for new context created for this device */
1197 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0))
1198 bool infinite_cache_active_default;
1199 #else
1200 u32 infinite_cache_active_default;
1201 #endif
1202 size_t mem_pool_max_size_default;
1203
1204 /* current gpu coherency mode */
1205 u32 current_gpu_coherency_mode;
1206 /* system coherency mode */
1207 u32 system_coherency;
1208 /* Flag to track when cci snoops have been enabled on the interface */
1209 bool cci_snoop_enabled;
1210
1211 /* SMC function IDs to call into Trusted firmware to enable/disable
1212 * cache snooping. Value of 0 indicates that they are not used
1213 */
1214 u32 snoop_enable_smc;
1215 u32 snoop_disable_smc;
1216
1217 /* Protected mode operations */
1218 struct protected_mode_ops *protected_ops;
1219
1220 /* Protected device attached to this kbase device */
1221 struct protected_mode_device *protected_dev;
1222
1223 /*
1224 * true when GPU is put into protected mode
1225 */
1226 bool protected_mode;
1227
1228 /*
1229 * true when GPU is transitioning into or out of protected mode
1230 */
1231 bool protected_mode_transition;
1232
1233 /*
1234 * true if protected mode is supported
1235 */
1236 bool protected_mode_support;
1237
1238
1239 #ifdef CONFIG_MALI_DEBUG
1240 wait_queue_head_t driver_inactive_wait;
1241 bool driver_inactive;
1242 #endif /* CONFIG_MALI_DEBUG */
1243
1244 #ifdef CONFIG_MALI_FPGA_BUS_LOGGER
1245 /*
1246 * Bus logger integration.
1247 */
1248 struct bus_logger_client *buslogger;
1249 #endif
1250 /* Boolean indicating if an IRQ flush during reset is in progress. */
1251 bool irq_reset_flush;
1252
1253 /* list of inited sub systems. Used during terminate/error recovery */
1254 u32 inited_subsys;
1255
1256 spinlock_t hwaccess_lock;
1257
1258 /* Protects access to MMU operations */
1259 struct mutex mmu_hw_mutex;
1260
1261 /* Current serialization mode. See KBASE_SERIALIZE_* for details */
1262 u8 serialize_jobs;
1263 };
1264
1265 /**
1266 * struct jsctx_queue - JS context atom queue
1267 * @runnable_tree: Root of RB-tree containing currently runnable atoms on this
1268 * job slot.
1269 * @x_dep_head: Head item of the linked list of atoms blocked on cross-slot
1270 * dependencies. Atoms on this list will be moved to the
1271 * runnable_tree when the blocking atom completes.
1272 *
1273 * hwaccess_lock must be held when accessing this structure.
1274 */
1275 struct jsctx_queue {
1276 struct rb_root runnable_tree;
1277 struct list_head x_dep_head;
1278 };
1279
1280
1281 #define KBASE_API_VERSION(major, minor) ((((major) & 0xFFF) << 20) | \
1282 (((minor) & 0xFFF) << 8) | \
1283 ((0 & 0xFF) << 0))
1284
1285 /**
1286 * enum kbase_context_flags - Flags for kbase contexts
1287 *
1288 * @KCTX_COMPAT: Set when the context process is a compat process, 32-bit
1289 * process on a 64-bit kernel.
1290 *
1291 * @KCTX_RUNNABLE_REF: Set when context is counted in
1292 * kbdev->js_data.nr_contexts_runnable. Must hold queue_mutex when accessing.
1293 *
1294 * @KCTX_ACTIVE: Set when the context is active.
1295 *
1296 * @KCTX_PULLED: Set when last kick() caused atoms to be pulled from this
1297 * context.
1298 *
1299 * @KCTX_MEM_PROFILE_INITIALIZED: Set when the context's memory profile has been
1300 * initialized.
1301 *
1302 * @KCTX_INFINITE_CACHE: Set when infinite cache is to be enabled for new
1303 * allocations. Existing allocations will not change.
1304 *
1305 * @KCTX_SUBMIT_DISABLED: Set to prevent context from submitting any jobs.
1306 *
1307 * @KCTX_PRIVILEGED:Set if the context uses an address space and should be kept
1308 * scheduled in.
1309 *
1310 * @KCTX_SCHEDULED: Set when the context is scheduled on the Run Pool.
1311 * This is only ever updated whilst the jsctx_mutex is held.
1312 *
1313 * @KCTX_DYING: Set when the context process is in the process of being evicted.
1314 *
1315 * @KCTX_NO_IMPLICIT_SYNC: Set when explicit Android fences are in use on this
1316 * context, to disable use of implicit dma-buf fences. This is used to avoid
1317 * potential synchronization deadlocks.
1318 *
1319 * All members need to be separate bits. This enum is intended for use in a
1320 * bitmask where multiple values get OR-ed together.
1321 */
1322 enum kbase_context_flags {
1323 KCTX_COMPAT = 1U << 0,
1324 KCTX_RUNNABLE_REF = 1U << 1,
1325 KCTX_ACTIVE = 1U << 2,
1326 KCTX_PULLED = 1U << 3,
1327 KCTX_MEM_PROFILE_INITIALIZED = 1U << 4,
1328 KCTX_INFINITE_CACHE = 1U << 5,
1329 KCTX_SUBMIT_DISABLED = 1U << 6,
1330 KCTX_PRIVILEGED = 1U << 7,
1331 KCTX_SCHEDULED = 1U << 8,
1332 KCTX_DYING = 1U << 9,
1333 KCTX_NO_IMPLICIT_SYNC = 1U << 10,
1334 };
1335
1336 struct kbase_context {
1337 struct file *filp;
1338 struct kbase_device *kbdev;
1339 int id; /* System wide unique id */
1340 unsigned long api_version;
1341 phys_addr_t pgd;
1342 struct list_head event_list;
1343 struct list_head event_coalesce_list;
1344 struct mutex event_mutex;
1345 atomic_t event_closed;
1346 struct workqueue_struct *event_workq;
1347 atomic_t event_count;
1348 int event_coalesce_count;
1349
1350 atomic_t flags;
1351
1352 atomic_t setup_complete;
1353 atomic_t setup_in_progress;
1354
1355 u64 *mmu_teardown_pages;
1356
1357 struct page *aliasing_sink_page;
1358
1359 struct mutex mmu_lock;
1360 struct mutex reg_lock; /* To be converted to a rwlock? */
1361 struct rb_root reg_rbtree_same; /* RB tree of GPU (live) regions,
1362 * SAME_VA zone */
1363 struct rb_root reg_rbtree_exec; /* RB tree of GPU (live) regions,
1364 * EXEC zone */
1365 struct rb_root reg_rbtree_custom; /* RB tree of GPU (live) regions,
1366 * CUSTOM_VA zone */
1367
1368 unsigned long cookies;
1369 struct kbase_va_region *pending_regions[BITS_PER_LONG];
1370
1371 wait_queue_head_t event_queue;
1372 pid_t tgid;
1373 pid_t pid;
1374
1375 struct kbase_jd_context jctx;
1376 atomic_t used_pages;
1377 atomic_t nonmapped_pages;
1378
1379 struct kbase_mem_pool mem_pool;
1380
1381 struct shrinker reclaim;
1382 struct list_head evict_list;
1383
1384 struct list_head waiting_soft_jobs;
1385 spinlock_t waiting_soft_jobs_lock;
1386 #ifdef CONFIG_KDS
1387 struct list_head waiting_kds_resource;
1388 #endif
1389 #ifdef CONFIG_MALI_DMA_FENCE
1390 struct {
1391 struct list_head waiting_resource;
1392 struct workqueue_struct *wq;
1393 } dma_fence;
1394 #endif /* CONFIG_MALI_DMA_FENCE */
1395 /** This is effectively part of the Run Pool, because it only has a valid
1396 * setting (!=KBASEP_AS_NR_INVALID) whilst the context is scheduled in
1397 *
1398 * The hwaccess_lock must be held whilst accessing this.
1399 *
1400 * If the context relating to this as_nr is required, you must use
1401 * kbasep_js_runpool_retain_ctx() to ensure that the context doesn't disappear
1402 * whilst you're using it. Alternatively, just hold the hwaccess_lock
1403 * to ensure the context doesn't disappear (but this has restrictions on what other locks
1404 * you can take whilst doing this) */
1405 int as_nr;
1406
1407 /* Keeps track of the number of users of this context. A user can be a
1408 * job that is available for execution, instrumentation needing to 'pin'
1409 * a context for counter collection, etc. If the refcount reaches 0 then
1410 * this context is considered inactive and the previously programmed
1411 * AS might be cleared at any point.
1412 */
1413 atomic_t refcount;
1414
1415 /* NOTE:
1416 *
1417 * Flags are in jctx.sched_info.ctx.flags
1418 * Mutable flags *must* be accessed under jctx.sched_info.ctx.jsctx_mutex
1419 *
1420 * All other flags must be added there */
1421 spinlock_t mm_update_lock;
1422 struct mm_struct *process_mm;
1423 /* End of the SAME_VA zone */
1424 u64 same_va_end;
1425
1426 #ifdef CONFIG_MALI_TRACE_TIMELINE
1427 struct kbase_trace_kctx_timeline timeline;
1428 #endif
1429 #ifdef CONFIG_DEBUG_FS
1430 /* Content of mem_profile file */
1431 char *mem_profile_data;
1432 /* Size of @c mem_profile_data */
1433 size_t mem_profile_size;
1434 /* Mutex guarding memory profile state */
1435 struct mutex mem_profile_lock;
1436 /* Memory profile directory under debugfs */
1437 struct dentry *kctx_dentry;
1438
1439 /* for job fault debug */
1440 unsigned int *reg_dump;
1441 atomic_t job_fault_count;
1442 /* This list will keep the following atoms during the dump
1443 * in the same context
1444 */
1445 struct list_head job_fault_resume_event_list;
1446
1447 #endif /* CONFIG_DEBUG_FS */
1448
1449 struct jsctx_queue jsctx_queue
1450 [KBASE_JS_ATOM_SCHED_PRIO_COUNT][BASE_JM_MAX_NR_SLOTS];
1451
1452 /* Number of atoms currently pulled from this context */
1453 atomic_t atoms_pulled;
1454 /* Number of atoms currently pulled from this context, per slot */
1455 atomic_t atoms_pulled_slot[BASE_JM_MAX_NR_SLOTS];
1456 /* Number of atoms currently pulled from this context, per slot and
1457 * priority. Hold hwaccess_lock when accessing */
1458 int atoms_pulled_slot_pri[BASE_JM_MAX_NR_SLOTS][
1459 KBASE_JS_ATOM_SCHED_PRIO_COUNT];
1460
1461 /* true if slot is blocked on the given priority. This will be set on a
1462 * soft-stop */
1463 bool blocked_js[BASE_JM_MAX_NR_SLOTS][KBASE_JS_ATOM_SCHED_PRIO_COUNT];
1464
1465 /* Bitmask of slots that can be pulled from */
1466 u32 slots_pullable;
1467
1468 /* Backend specific data */
1469 struct kbase_context_backend backend;
1470
1471 /* Work structure used for deferred ASID assignment */
1472 struct work_struct work;
1473
1474 /* Only one userspace vinstr client per kbase context */
1475 struct kbase_vinstr_client *vinstr_cli;
1476 struct mutex vinstr_cli_lock;
1477
1478 /* List of completed jobs waiting for events to be posted */
1479 struct list_head completed_jobs;
1480 /* Number of work items currently pending on job_done_wq */
1481 atomic_t work_count;
1482
1483 /* Waiting soft-jobs will fail when this timer expires */
1484 struct timer_list soft_job_timeout;
1485
1486 /* JIT allocation management */
1487 struct kbase_va_region *jit_alloc[256];
1488 struct list_head jit_active_head;
1489 struct list_head jit_pool_head;
1490 struct list_head jit_destroy_head;
1491 struct mutex jit_evict_lock;
1492 struct work_struct jit_work;
1493
1494 /* A list of the JIT soft-jobs in submission order
1495 * (protected by kbase_jd_context.lock)
1496 */
1497 struct list_head jit_atoms_head;
1498 /* A list of pending JIT alloc soft-jobs (using the 'queue' list_head)
1499 * (protected by kbase_jd_context.lock)
1500 */
1501 struct list_head jit_pending_alloc;
1502
1503 /* External sticky resource management */
1504 struct list_head ext_res_meta_head;
1505
1506 /* Used to record that a drain was requested from atomic context */
1507 atomic_t drain_pending;
1508
1509 /* Current age count, used to determine age for newly submitted atoms */
1510 u32 age_count;
1511 };
1512
1513 /**
1514 * struct kbase_ctx_ext_res_meta - Structure which binds an external resource
1515 * to a @kbase_context.
1516 * @ext_res_node: List head for adding the metadata to a
1517 * @kbase_context.
1518 * @alloc: The physical memory allocation structure
1519 * which is mapped.
1520 * @gpu_addr: The GPU virtual address the resource is
1521 * mapped to.
1522 *
1523 * External resources can be mapped into multiple contexts as well as the same
1524 * context multiple times.
1525 * As kbase_va_region itself isn't refcounted we can't attach our extra
1526 * information to it as it could be removed under our feet leaving external
1527 * resources pinned.
1528 * This metadata structure binds a single external resource to a single
1529 * context, ensuring that per context mapping is tracked separately so it can
1530 * be overridden when needed and abuses by the application (freeing the resource
1531 * multiple times) don't effect the refcount of the physical allocation.
1532 */
1533 struct kbase_ctx_ext_res_meta {
1534 struct list_head ext_res_node;
1535 struct kbase_mem_phy_alloc *alloc;
1536 u64 gpu_addr;
1537 };
1538
1539 enum kbase_reg_access_type {
1540 REG_READ,
1541 REG_WRITE
1542 };
1543
1544 enum kbase_share_attr_bits {
1545 /* (1ULL << 8) bit is reserved */
1546 SHARE_BOTH_BITS = (2ULL << 8), /* inner and outer shareable coherency */
1547 SHARE_INNER_BITS = (3ULL << 8) /* inner shareable coherency */
1548 };
1549
1550 /**
1551 * kbase_device_is_cpu_coherent - Returns if the device is CPU coherent.
1552 * @kbdev: kbase device
1553 *
1554 * Return: true if the device access are coherent, false if not.
1555 */
kbase_device_is_cpu_coherent(struct kbase_device *kbdev)1556 static inline bool kbase_device_is_cpu_coherent(struct kbase_device *kbdev)
1557 {
1558 if ((kbdev->system_coherency == COHERENCY_ACE_LITE) ||
1559 (kbdev->system_coherency == COHERENCY_ACE))
1560 return true;
1561
1562 return false;
1563 }
1564
1565 /* Conversion helpers for setting up high resolution timers */
1566 #define HR_TIMER_DELAY_MSEC(x) (ns_to_ktime(((u64)(x))*1000000U))
1567 #define HR_TIMER_DELAY_NSEC(x) (ns_to_ktime(x))
1568
1569 /* Maximum number of loops polling the GPU for a cache flush before we assume it must have completed */
1570 #define KBASE_CLEAN_CACHE_MAX_LOOPS 100000
1571 /* Maximum number of loops polling the GPU for an AS command to complete before we assume the GPU has hung */
1572 #define KBASE_AS_INACTIVE_MAX_LOOPS 100000
1573
1574 /* Maximum number of times a job can be replayed */
1575 #define BASEP_JD_REPLAY_LIMIT 15
1576
1577 /* JobDescriptorHeader - taken from the architecture specifications, the layout
1578 * is currently identical for all GPU archs. */
1579 struct job_descriptor_header {
1580 u32 exception_status;
1581 u32 first_incomplete_task;
1582 u64 fault_pointer;
1583 u8 job_descriptor_size : 1;
1584 u8 job_type : 7;
1585 u8 job_barrier : 1;
1586 u8 _reserved_01 : 1;
1587 u8 _reserved_1 : 1;
1588 u8 _reserved_02 : 1;
1589 u8 _reserved_03 : 1;
1590 u8 _reserved_2 : 1;
1591 u8 _reserved_04 : 1;
1592 u8 _reserved_05 : 1;
1593 u16 job_index;
1594 u16 job_dependency_index_1;
1595 u16 job_dependency_index_2;
1596 union {
1597 u64 _64;
1598 u32 _32;
1599 } next_job;
1600 };
1601
1602 #endif /* _KBASE_DEFS_H_ */
1603