1/*
2 * Copyright © 2022 Imagination Technologies Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24#include <assert.h>
25#include <stdbool.h>
26#include <stddef.h>
27#include <stdint.h>
28#include <vulkan/vulkan.h>
29
30#include "hwdef/rogue_hw_utils.h"
31#include "pvr_bo.h"
32#include "pvr_cdm_load_sr.h"
33#include "pvr_csb.h"
34#include "pvr_job_context.h"
35#include "pvr_pds.h"
36#include "pvr_private.h"
37#include "pvr_transfer_eot.h"
38#include "pvr_types.h"
39#include "pvr_vdm_load_sr.h"
40#include "pvr_vdm_store_sr.h"
41#include "pvr_winsys.h"
42#include "util/macros.h"
43#include "vk_alloc.h"
44#include "vk_log.h"
45
46/* TODO: Is there some way to ensure the Vulkan driver doesn't exceed this
47 * value when constructing the control stream?
48 */
49/* The VDM callstack is used by the hardware to implement control stream links
50 * with a return, i.e. sub-control streams/subroutines. This value specifies the
51 * maximum callstack depth.
52 */
53#define PVR_VDM_CALLSTACK_MAX_DEPTH 1U
54
55#define ROGUE_PDS_TASK_PROGRAM_SIZE 256U
56
57static VkResult pvr_ctx_reset_cmd_init(struct pvr_device *device,
58                                       struct pvr_reset_cmd *const reset_cmd)
59{
60   const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
61
62   /* The reset framework depends on compute support in the hw. */
63   assert(PVR_HAS_FEATURE(dev_info, compute));
64
65   if (PVR_HAS_QUIRK(dev_info, 51764))
66      pvr_finishme("Missing reset support for brn51764");
67
68   if (PVR_HAS_QUIRK(dev_info, 58839))
69      pvr_finishme("Missing reset support for brn58839");
70
71   return VK_SUCCESS;
72}
73
74static void pvr_ctx_reset_cmd_fini(struct pvr_device *device,
75                                   struct pvr_reset_cmd *reset_cmd)
76
77{
78   /* TODO: reset command cleanup. */
79}
80
81static VkResult pvr_pds_pt_store_program_create_and_upload(
82   struct pvr_device *device,
83   struct pvr_bo *pt_bo,
84   uint32_t pt_bo_size,
85   struct pvr_pds_upload *const pds_upload_out)
86{
87   struct pvr_pds_stream_out_terminate_program program = { 0 };
88   const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
89   const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
90   size_t staging_buffer_size;
91   uint32_t *staging_buffer;
92   uint32_t *data_buffer;
93   uint32_t *code_buffer;
94   VkResult result;
95
96   /* Check the bo size can be converted to dwords without any rounding. */
97   assert(pt_bo_size % 4 == 0);
98
99   program.pds_persistent_temp_size_to_store = pt_bo_size / 4;
100   program.dev_address_for_storing_persistent_temp = pt_bo->vma->dev_addr.addr;
101
102   pvr_pds_generate_stream_out_terminate_program(&program,
103                                                 NULL,
104                                                 PDS_GENERATE_SIZES,
105                                                 dev_info);
106
107   staging_buffer_size = (program.stream_out_terminate_pds_data_size +
108                          program.stream_out_terminate_pds_code_size) *
109                         sizeof(*staging_buffer);
110
111   staging_buffer = vk_zalloc(&device->vk.alloc,
112                              staging_buffer_size,
113                              8,
114                              VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
115   if (!staging_buffer)
116      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
117
118   data_buffer = staging_buffer;
119   code_buffer =
120      pvr_pds_generate_stream_out_terminate_program(&program,
121                                                    data_buffer,
122                                                    PDS_GENERATE_DATA_SEGMENT,
123                                                    dev_info);
124   pvr_pds_generate_stream_out_terminate_program(&program,
125                                                 code_buffer,
126                                                 PDS_GENERATE_CODE_SEGMENT,
127                                                 dev_info);
128
129   /* This PDS program is passed to the HW via the PPP state words. These only
130    * allow the data segment address to be specified and expect the code
131    * segment to immediately follow. Assume the code alignment is the same as
132    * the data.
133    */
134   result =
135      pvr_gpu_upload_pds(device,
136                         data_buffer,
137                         program.stream_out_terminate_pds_data_size,
138                         PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE),
139                         code_buffer,
140                         program.stream_out_terminate_pds_code_size,
141                         PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE),
142                         cache_line_size,
143                         pds_upload_out);
144
145   vk_free(&device->vk.alloc, staging_buffer);
146
147   return result;
148}
149
150static VkResult pvr_pds_pt_resume_program_create_and_upload(
151   struct pvr_device *device,
152   struct pvr_bo *pt_bo,
153   uint32_t pt_bo_size,
154   struct pvr_pds_upload *const pds_upload_out)
155{
156   struct pvr_pds_stream_out_init_program program = { 0 };
157   const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
158   const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
159   size_t staging_buffer_size;
160   uint32_t *staging_buffer;
161   uint32_t *data_buffer;
162   uint32_t *code_buffer;
163   VkResult result;
164
165   /* Check the bo size can be converted to dwords without any rounding. */
166   assert(pt_bo_size % 4 == 0);
167
168   program.num_buffers = 1;
169   program.pds_buffer_data_size[0] = pt_bo_size / 4;
170   program.dev_address_for_buffer_data[0] = pt_bo->vma->dev_addr.addr;
171
172   pvr_pds_generate_stream_out_init_program(&program,
173                                            NULL,
174                                            false,
175                                            PDS_GENERATE_SIZES,
176                                            dev_info);
177
178   staging_buffer_size = (program.stream_out_init_pds_data_size +
179                          program.stream_out_init_pds_code_size) *
180                         sizeof(*staging_buffer);
181
182   staging_buffer = vk_zalloc(&device->vk.alloc,
183                              staging_buffer_size,
184                              8,
185                              VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
186   if (!staging_buffer)
187      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
188
189   data_buffer = staging_buffer;
190   code_buffer =
191      pvr_pds_generate_stream_out_init_program(&program,
192                                               data_buffer,
193                                               false,
194                                               PDS_GENERATE_DATA_SEGMENT,
195                                               dev_info);
196   pvr_pds_generate_stream_out_init_program(&program,
197                                            code_buffer,
198                                            false,
199                                            PDS_GENERATE_CODE_SEGMENT,
200                                            dev_info);
201
202   /* This PDS program is passed to the HW via the PPP state words. These only
203    * allow the data segment address to be specified and expect the code
204    * segment to immediately follow. Assume the code alignment is the same as
205    * the data.
206    */
207   result =
208      pvr_gpu_upload_pds(device,
209                         data_buffer,
210                         program.stream_out_init_pds_data_size,
211                         PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE),
212                         code_buffer,
213                         program.stream_out_init_pds_code_size,
214                         PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE),
215                         cache_line_size,
216                         pds_upload_out);
217
218   vk_free(&device->vk.alloc, staging_buffer);
219
220   return result;
221}
222
223static VkResult
224pvr_render_job_pt_programs_setup(struct pvr_device *device,
225                                 struct rogue_pt_programs *pt_programs)
226{
227   VkResult result;
228
229   result = pvr_bo_alloc(device,
230                         device->heaps.pds_heap,
231                         ROGUE_LLS_PDS_PERSISTENT_TEMPS_BUFFER_SIZE,
232                         ROGUE_LLS_PDS_PERSISTENT_TEMPS_BUFFER_ALIGNMENT,
233                         PVR_BO_ALLOC_FLAG_CPU_ACCESS,
234                         &pt_programs->store_resume_state_bo);
235   if (result != VK_SUCCESS)
236      return result;
237
238   result = pvr_pds_pt_store_program_create_and_upload(
239      device,
240      pt_programs->store_resume_state_bo,
241      ROGUE_LLS_PDS_PERSISTENT_TEMPS_BUFFER_SIZE,
242      &pt_programs->pds_store_program);
243   if (result != VK_SUCCESS)
244      goto err_free_store_resume_state_bo;
245
246   result = pvr_pds_pt_resume_program_create_and_upload(
247      device,
248      pt_programs->store_resume_state_bo,
249      ROGUE_LLS_PDS_PERSISTENT_TEMPS_BUFFER_SIZE,
250      &pt_programs->pds_resume_program);
251   if (result != VK_SUCCESS)
252      goto err_free_pds_store_program;
253
254   return VK_SUCCESS;
255
256err_free_pds_store_program:
257   pvr_bo_free(device, pt_programs->pds_store_program.pvr_bo);
258
259err_free_store_resume_state_bo:
260   pvr_bo_free(device, pt_programs->store_resume_state_bo);
261
262   return result;
263}
264
265static void
266pvr_render_job_pt_programs_cleanup(struct pvr_device *device,
267                                   struct rogue_pt_programs *pt_programs)
268{
269   pvr_bo_free(device, pt_programs->pds_resume_program.pvr_bo);
270   pvr_bo_free(device, pt_programs->pds_store_program.pvr_bo);
271   pvr_bo_free(device, pt_programs->store_resume_state_bo);
272}
273
274static void pvr_pds_ctx_sr_program_setup(
275   bool cc_enable,
276   uint64_t usc_program_upload_offset,
277   uint8_t usc_temps,
278   pvr_dev_addr_t sr_addr,
279   struct pvr_pds_shared_storing_program *const program_out)
280{
281   /* The PDS task is the same for stores and loads. */
282   *program_out = (struct pvr_pds_shared_storing_program){
283		.cc_enable = cc_enable,
284		.doutw_control = {
285			.dest_store = PDS_UNIFIED_STORE,
286			.num_const64 = 2,
287			.doutw_data = {
288				[0] = sr_addr.addr,
289				[1] = sr_addr.addr + ROGUE_LLS_SHARED_REGS_RESERVE_SIZE,
290			},
291			.last_instruction = false,
292		},
293	};
294
295   pvr_pds_setup_doutu(&program_out->usc_task.usc_task_control,
296                       usc_program_upload_offset,
297                       usc_temps,
298                       PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE),
299                       false);
300}
301
302/* Note: pvr_pds_compute_ctx_sr_program_create_and_upload() is very similar to
303 * this. If there is a problem here it's likely that the same problem exists
304 * there so don't forget to update the compute function.
305 */
306static VkResult pvr_pds_render_ctx_sr_program_create_and_upload(
307   struct pvr_device *device,
308   uint64_t usc_program_upload_offset,
309   uint8_t usc_temps,
310   pvr_dev_addr_t sr_addr,
311   struct pvr_pds_upload *const pds_upload_out)
312{
313   const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
314   const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
315   const uint32_t pds_data_alignment =
316      PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) / 4U;
317
318   /* FIXME: pvr_pds_generate_shared_storing_program() doesn't return the data
319    * and code size when using the PDS_GENERATE_SIZES mode.
320    */
321   STATIC_ASSERT(ROGUE_PDS_TASK_PROGRAM_SIZE % 4 == 0);
322   uint32_t staging_buffer[ROGUE_PDS_TASK_PROGRAM_SIZE / 4U] = { 0 };
323   struct pvr_pds_shared_storing_program program;
324   ASSERTED uint32_t *buffer_end;
325   uint32_t code_offset;
326
327   pvr_pds_ctx_sr_program_setup(false,
328                                usc_program_upload_offset,
329                                usc_temps,
330                                sr_addr,
331                                &program);
332
333   pvr_pds_generate_shared_storing_program(&program,
334                                           &staging_buffer[0],
335                                           PDS_GENERATE_DATA_SEGMENT,
336                                           dev_info);
337
338   code_offset = ALIGN_POT(program.data_size, pds_data_alignment);
339
340   buffer_end =
341      pvr_pds_generate_shared_storing_program(&program,
342                                              &staging_buffer[code_offset],
343                                              PDS_GENERATE_CODE_SEGMENT,
344                                              dev_info);
345
346   assert((uint32_t)(buffer_end - staging_buffer) * 4 <
347          ROGUE_PDS_TASK_PROGRAM_SIZE);
348
349   return pvr_gpu_upload_pds(device,
350                             &staging_buffer[0],
351                             program.data_size,
352                             PVRX(VDMCTRL_PDS_STATE1_PDS_DATA_ADDR_ALIGNMENT),
353                             &staging_buffer[code_offset],
354                             program.code_size,
355                             PVRX(VDMCTRL_PDS_STATE2_PDS_CODE_ADDR_ALIGNMENT),
356                             cache_line_size,
357                             pds_upload_out);
358}
359
360/* Note: pvr_pds_render_ctx_sr_program_create_and_upload() is very similar to
361 * this. If there is a problem here it's likely that the same problem exists
362 * there so don't forget to update the render_ctx function.
363 */
364static VkResult pvr_pds_compute_ctx_sr_program_create_and_upload(
365   struct pvr_device *device,
366   bool is_loading_program,
367   uint64_t usc_program_upload_offset,
368   uint8_t usc_temps,
369   pvr_dev_addr_t sr_addr,
370   struct pvr_pds_upload *const pds_upload_out)
371{
372   const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
373   const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
374   const uint32_t pds_data_alignment =
375      PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) / 4U;
376
377   /* FIXME: pvr_pds_generate_shared_storing_program() doesn't return the data
378    * and code size when using the PDS_GENERATE_SIZES mode.
379    */
380   STATIC_ASSERT(ROGUE_PDS_TASK_PROGRAM_SIZE % 4 == 0);
381   uint32_t staging_buffer[ROGUE_PDS_TASK_PROGRAM_SIZE / 4U] = { 0 };
382   struct pvr_pds_shared_storing_program program;
383   uint32_t *buffer_ptr;
384   uint32_t code_offset;
385
386   pvr_pds_ctx_sr_program_setup(PVR_HAS_ERN(dev_info, 35421),
387                                usc_program_upload_offset,
388                                usc_temps,
389                                sr_addr,
390                                &program);
391
392   if (is_loading_program && PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)) {
393      pvr_pds_generate_compute_shared_loading_program(&program,
394                                                      &staging_buffer[0],
395                                                      PDS_GENERATE_DATA_SEGMENT,
396                                                      dev_info);
397   } else {
398      pvr_pds_generate_shared_storing_program(&program,
399                                              &staging_buffer[0],
400                                              PDS_GENERATE_DATA_SEGMENT,
401                                              dev_info);
402   }
403
404   code_offset = ALIGN_POT(program.data_size, pds_data_alignment);
405
406   buffer_ptr =
407      pvr_pds_generate_compute_barrier_conditional(&staging_buffer[code_offset],
408                                                   PDS_GENERATE_CODE_SEGMENT);
409
410   if (is_loading_program && PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)) {
411      buffer_ptr = pvr_pds_generate_compute_shared_loading_program(
412         &program,
413         buffer_ptr,
414         PDS_GENERATE_CODE_SEGMENT,
415         dev_info);
416   } else {
417      buffer_ptr =
418         pvr_pds_generate_shared_storing_program(&program,
419                                                 buffer_ptr,
420                                                 PDS_GENERATE_CODE_SEGMENT,
421                                                 dev_info);
422   }
423
424   assert((uint32_t)(buffer_ptr - staging_buffer) * 4 <
425          ROGUE_PDS_TASK_PROGRAM_SIZE);
426
427   STATIC_ASSERT(PVRX(CR_CDM_CONTEXT_PDS0_DATA_ADDR_ALIGNMENT) ==
428                 PVRX(CR_CDM_CONTEXT_LOAD_PDS0_DATA_ADDR_ALIGNMENT));
429
430   STATIC_ASSERT(PVRX(CR_CDM_CONTEXT_PDS0_CODE_ADDR_ALIGNMENT) ==
431                 PVRX(CR_CDM_CONTEXT_LOAD_PDS0_CODE_ADDR_ALIGNMENT));
432
433   return pvr_gpu_upload_pds(
434      device,
435      &staging_buffer[0],
436      program.data_size,
437      PVRX(CR_CDM_CONTEXT_PDS0_DATA_ADDR_ALIGNMENT),
438      &staging_buffer[code_offset],
439      (uint32_t)(buffer_ptr - &staging_buffer[code_offset]),
440      PVRX(CR_CDM_CONTEXT_PDS0_CODE_ADDR_ALIGNMENT),
441      cache_line_size,
442      pds_upload_out);
443}
444
445enum pvr_ctx_sr_program_target {
446   PVR_CTX_SR_RENDER_TARGET,
447   PVR_CTX_SR_COMPUTE_TARGET,
448};
449
450static VkResult pvr_ctx_sr_programs_setup(struct pvr_device *device,
451                                          enum pvr_ctx_sr_program_target target,
452                                          struct rogue_sr_programs *sr_programs)
453{
454   const uint64_t store_load_state_bo_size =
455      PVRX(LLS_USC_SHARED_REGS_BUFFER_SIZE) +
456      ROGUE_LLS_SHARED_REGS_RESERVE_SIZE;
457   const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
458   const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
459   uint64_t usc_store_program_upload_offset;
460   uint64_t usc_load_program_upload_offset;
461   const uint8_t *usc_load_sr_code;
462   uint32_t usc_load_sr_code_size;
463   VkResult result;
464
465   /* Note that this is being used for both compute and render ctx. There is no
466    * compute equivalent define for the VDMCTRL unit size.
467    */
468   /* 4 blocks (16 dwords / 64 bytes) in USC to prevent fragmentation. */
469   sr_programs->usc.unified_size =
470      DIV_ROUND_UP(64, PVRX(VDMCTRL_PDS_STATE0_USC_UNIFIED_SIZE_UNIT_SIZE));
471
472   result = pvr_bo_alloc(device,
473                         device->heaps.pds_heap,
474                         store_load_state_bo_size,
475                         cache_line_size,
476                         PVR_WINSYS_BO_FLAG_CPU_ACCESS,
477                         &sr_programs->store_load_state_bo);
478   if (result != VK_SUCCESS)
479      return result;
480
481   /* USC state update: SR state store. */
482
483   assert(sizeof(pvr_vdm_store_sr_code) < ROGUE_USC_TASK_PROGRAM_SIZE);
484
485   result = pvr_gpu_upload_usc(device,
486                               pvr_vdm_store_sr_code,
487                               sizeof(pvr_vdm_store_sr_code),
488                               cache_line_size,
489                               &sr_programs->usc.store_program_bo);
490   if (result != VK_SUCCESS)
491      goto err_free_store_load_state_bo;
492
493   usc_store_program_upload_offset =
494      sr_programs->usc.store_program_bo->vma->dev_addr.addr -
495      device->heaps.usc_heap->base_addr.addr;
496
497   /* USC state update: SR state load. */
498
499   if (target == PVR_CTX_SR_COMPUTE_TARGET && PVR_HAS_QUIRK(dev_info, 62269)) {
500      STATIC_ASSERT(sizeof(pvr_cdm_load_sr_code) < ROGUE_USC_TASK_PROGRAM_SIZE);
501
502      usc_load_sr_code = pvr_cdm_load_sr_code;
503      usc_load_sr_code_size = sizeof(pvr_cdm_load_sr_code);
504   } else {
505      STATIC_ASSERT(sizeof(pvr_vdm_load_sr_code) < ROGUE_USC_TASK_PROGRAM_SIZE);
506
507      usc_load_sr_code = pvr_vdm_load_sr_code;
508      usc_load_sr_code_size = sizeof(pvr_vdm_load_sr_code);
509   }
510
511   result = pvr_gpu_upload_usc(device,
512                               usc_load_sr_code,
513                               usc_load_sr_code_size,
514                               cache_line_size,
515                               &sr_programs->usc.load_program_bo);
516   if (result != VK_SUCCESS)
517      goto err_free_usc_store_program_bo;
518
519   usc_load_program_upload_offset =
520      sr_programs->usc.load_program_bo->vma->dev_addr.addr -
521      device->heaps.usc_heap->base_addr.addr;
522
523   /* FIXME: The number of USC temps should be output alongside
524    * pvr_vdm_store_sr_code rather than hard coded.
525    */
526   /* Create and upload the PDS load and store programs. Point them to the
527    * appropriate USC load and store programs.
528    */
529   switch (target) {
530   case PVR_CTX_SR_RENDER_TARGET:
531      /* PDS state update: SR state store. */
532      result = pvr_pds_render_ctx_sr_program_create_and_upload(
533         device,
534         usc_store_program_upload_offset,
535         8,
536         sr_programs->store_load_state_bo->vma->dev_addr,
537         &sr_programs->pds.store_program);
538      if (result != VK_SUCCESS)
539         goto err_free_usc_load_program_bo;
540
541      /* PDS state update: SR state load. */
542      result = pvr_pds_render_ctx_sr_program_create_and_upload(
543         device,
544         usc_load_program_upload_offset,
545         20,
546         sr_programs->store_load_state_bo->vma->dev_addr,
547         &sr_programs->pds.load_program);
548      if (result != VK_SUCCESS)
549         goto err_free_pds_store_program_bo;
550
551      break;
552
553   case PVR_CTX_SR_COMPUTE_TARGET:
554      /* PDS state update: SR state store. */
555      result = pvr_pds_compute_ctx_sr_program_create_and_upload(
556         device,
557         false,
558         usc_store_program_upload_offset,
559         8,
560         sr_programs->store_load_state_bo->vma->dev_addr,
561         &sr_programs->pds.store_program);
562      if (result != VK_SUCCESS)
563         goto err_free_usc_load_program_bo;
564
565      /* PDS state update: SR state load. */
566      result = pvr_pds_compute_ctx_sr_program_create_and_upload(
567         device,
568         true,
569         usc_load_program_upload_offset,
570         20,
571         sr_programs->store_load_state_bo->vma->dev_addr,
572         &sr_programs->pds.load_program);
573      if (result != VK_SUCCESS)
574         goto err_free_pds_store_program_bo;
575
576      break;
577
578   default:
579      unreachable("Invalid target.");
580      break;
581   }
582
583   return VK_SUCCESS;
584
585err_free_pds_store_program_bo:
586   pvr_bo_free(device, sr_programs->pds.store_program.pvr_bo);
587
588err_free_usc_load_program_bo:
589   pvr_bo_free(device, sr_programs->usc.load_program_bo);
590
591err_free_usc_store_program_bo:
592   pvr_bo_free(device, sr_programs->usc.store_program_bo);
593
594err_free_store_load_state_bo:
595   pvr_bo_free(device, sr_programs->store_load_state_bo);
596
597   return VK_SUCCESS;
598}
599
600static void pvr_ctx_sr_programs_cleanup(struct pvr_device *device,
601                                        struct rogue_sr_programs *sr_programs)
602{
603   pvr_bo_free(device, sr_programs->pds.load_program.pvr_bo);
604   pvr_bo_free(device, sr_programs->pds.store_program.pvr_bo);
605   pvr_bo_free(device, sr_programs->usc.load_program_bo);
606   pvr_bo_free(device, sr_programs->usc.store_program_bo);
607   pvr_bo_free(device, sr_programs->store_load_state_bo);
608}
609
610static VkResult
611pvr_render_ctx_switch_programs_setup(struct pvr_device *device,
612                                     struct pvr_render_ctx_programs *programs)
613{
614   VkResult result;
615
616   result = pvr_render_job_pt_programs_setup(device, &programs->pt);
617   if (result != VK_SUCCESS)
618      return result;
619
620   result = pvr_ctx_sr_programs_setup(device,
621                                      PVR_CTX_SR_RENDER_TARGET,
622                                      &programs->sr);
623   if (result != VK_SUCCESS)
624      goto err_pt_programs_cleanup;
625
626   return VK_SUCCESS;
627
628err_pt_programs_cleanup:
629   pvr_render_job_pt_programs_cleanup(device, &programs->pt);
630
631   return result;
632}
633
634static void
635pvr_render_ctx_switch_programs_cleanup(struct pvr_device *device,
636                                       struct pvr_render_ctx_programs *programs)
637{
638   pvr_ctx_sr_programs_cleanup(device, &programs->sr);
639   pvr_render_job_pt_programs_cleanup(device, &programs->pt);
640}
641
642static VkResult pvr_render_ctx_switch_init(struct pvr_device *device,
643                                           struct pvr_render_ctx *ctx)
644{
645   struct pvr_render_ctx_switch *ctx_switch = &ctx->ctx_switch;
646   const uint64_t vdm_state_bo_flags = PVR_BO_ALLOC_FLAG_GPU_UNCACHED |
647                                       PVR_BO_ALLOC_FLAG_CPU_ACCESS;
648   const uint64_t geom_state_bo_flags = PVR_BO_ALLOC_FLAG_GPU_UNCACHED |
649                                        PVR_BO_ALLOC_FLAG_CPU_ACCESS;
650   VkResult result;
651
652   result = pvr_bo_alloc(device,
653                         device->heaps.general_heap,
654                         ROGUE_LLS_VDM_CONTEXT_RESUME_BUFFER_SIZE,
655                         ROGUE_LLS_VDM_CONTEXT_RESUME_BUFFER_ALIGNMENT,
656                         vdm_state_bo_flags,
657                         &ctx_switch->vdm_state_bo);
658   if (result != VK_SUCCESS)
659      return result;
660
661   result = pvr_bo_alloc(device,
662                         device->heaps.general_heap,
663                         ROGUE_LLS_TA_STATE_BUFFER_SIZE,
664                         ROGUE_LLS_TA_STATE_BUFFER_ALIGNMENT,
665                         geom_state_bo_flags,
666                         &ctx_switch->geom_state_bo);
667   if (result != VK_SUCCESS)
668      goto err_pvr_bo_free_vdm_state_bo;
669
670   for (uint32_t i = 0; i < ARRAY_SIZE(ctx_switch->programs); i++) {
671      result =
672         pvr_render_ctx_switch_programs_setup(device, &ctx_switch->programs[i]);
673      if (result)
674         goto err_programs_cleanup;
675   }
676
677   return result;
678
679err_programs_cleanup:
680   for (uint32_t i = 0; i < ARRAY_SIZE(ctx_switch->programs); i++) {
681      pvr_render_ctx_switch_programs_cleanup(device, &ctx_switch->programs[i]);
682   }
683
684   pvr_bo_free(device, ctx_switch->geom_state_bo);
685
686err_pvr_bo_free_vdm_state_bo:
687   pvr_bo_free(device, ctx_switch->vdm_state_bo);
688
689   return result;
690}
691
692static void pvr_render_ctx_switch_fini(struct pvr_device *device,
693                                       struct pvr_render_ctx *ctx)
694{
695   struct pvr_render_ctx_switch *ctx_switch = &ctx->ctx_switch;
696
697   for (uint32_t i = 0; i < ARRAY_SIZE(ctx_switch->programs); i++) {
698      pvr_render_ctx_switch_programs_cleanup(device, &ctx_switch->programs[i]);
699   }
700
701   pvr_bo_free(device, ctx_switch->geom_state_bo);
702   pvr_bo_free(device, ctx_switch->vdm_state_bo);
703}
704
705static void
706pvr_rogue_get_vdmctrl_pds_state_words(struct pvr_pds_upload *pds_program,
707                                      enum PVRX(VDMCTRL_USC_TARGET) usc_target,
708                                      uint8_t usc_unified_size,
709                                      uint32_t *const state0_out,
710                                      uint32_t *const state1_out)
711{
712   pvr_csb_pack (state0_out, VDMCTRL_PDS_STATE0, state) {
713      /* Convert the data size from dwords to bytes. */
714      const uint32_t pds_data_size = pds_program->data_size * 4;
715
716      state.dm_target = PVRX(VDMCTRL_DM_TARGET_VDM);
717      state.usc_target = usc_target;
718      state.usc_common_size = 0;
719      state.usc_unified_size = usc_unified_size;
720      state.pds_temp_size = 0;
721
722      assert(pds_data_size % PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) ==
723             0);
724      state.pds_data_size =
725         pds_data_size / PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE);
726   };
727
728   pvr_csb_pack (state1_out, VDMCTRL_PDS_STATE1, state) {
729      state.pds_data_addr = PVR_DEV_ADDR(pds_program->data_offset);
730      state.sd_type = PVRX(VDMCTRL_SD_TYPE_PDS);
731      state.sd_next_type = PVRX(VDMCTRL_SD_TYPE_PDS);
732   }
733}
734
735static void
736pvr_rogue_get_geom_state_stream_out_words(struct pvr_pds_upload *pds_program,
737                                          uint32_t *const stream_out1_out,
738                                          uint32_t *const stream_out2_out)
739{
740   pvr_csb_pack (stream_out1_out, TA_STATE_STREAM_OUT1, state) {
741      /* Convert the data size from dwords to bytes. */
742      const uint32_t pds_data_size = pds_program->data_size * 4;
743
744      state.sync = true;
745
746      assert(pds_data_size %
747                PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE) ==
748             0);
749      state.pds_data_size =
750         pds_data_size / PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE);
751
752      state.pds_temp_size = 0;
753   }
754
755   pvr_csb_pack (stream_out2_out, TA_STATE_STREAM_OUT2, state) {
756      state.pds_data_addr = PVR_DEV_ADDR(pds_program->data_offset);
757   }
758}
759
760static void pvr_render_ctx_ws_static_state_init(
761   struct pvr_render_ctx *ctx,
762   struct pvr_winsys_render_ctx_static_state *static_state)
763{
764   uint64_t *q_dst;
765   uint32_t *d_dst;
766
767   q_dst = &static_state->vdm_ctx_state_base_addr;
768   pvr_csb_pack (q_dst, CR_VDM_CONTEXT_STATE_BASE, base) {
769      base.addr = ctx->ctx_switch.vdm_state_bo->vma->dev_addr;
770   }
771
772   q_dst = &static_state->geom_ctx_state_base_addr;
773   pvr_csb_pack (q_dst, CR_TA_CONTEXT_STATE_BASE, base) {
774      base.addr = ctx->ctx_switch.geom_state_bo->vma->dev_addr;
775   }
776
777   for (uint32_t i = 0; i < ARRAY_SIZE(ctx->ctx_switch.programs); i++) {
778      struct rogue_pt_programs *pt_prog = &ctx->ctx_switch.programs[i].pt;
779      struct rogue_sr_programs *sr_prog = &ctx->ctx_switch.programs[i].sr;
780
781      /* Context store state. */
782      q_dst = &static_state->geom_state[i].vdm_ctx_store_task0;
783      pvr_csb_pack (q_dst, CR_VDM_CONTEXT_STORE_TASK0, task0) {
784         pvr_rogue_get_vdmctrl_pds_state_words(&sr_prog->pds.store_program,
785                                               PVRX(VDMCTRL_USC_TARGET_ANY),
786                                               sr_prog->usc.unified_size,
787                                               &task0.pds_state0,
788                                               &task0.pds_state1);
789      }
790
791      d_dst = &static_state->geom_state[i].vdm_ctx_store_task1;
792      pvr_csb_pack (d_dst, CR_VDM_CONTEXT_STORE_TASK1, task1) {
793         pvr_csb_pack (&task1.pds_state2, VDMCTRL_PDS_STATE2, state) {
794            state.pds_code_addr =
795               PVR_DEV_ADDR(sr_prog->pds.store_program.code_offset);
796         }
797      }
798
799      q_dst = &static_state->geom_state[i].vdm_ctx_store_task2;
800      pvr_csb_pack (q_dst, CR_VDM_CONTEXT_STORE_TASK2, task2) {
801         pvr_rogue_get_geom_state_stream_out_words(&pt_prog->pds_store_program,
802                                                   &task2.stream_out1,
803                                                   &task2.stream_out2);
804      }
805
806      /* Context resume state. */
807      q_dst = &static_state->geom_state[i].vdm_ctx_resume_task0;
808      pvr_csb_pack (q_dst, CR_VDM_CONTEXT_RESUME_TASK0, task0) {
809         pvr_rogue_get_vdmctrl_pds_state_words(&sr_prog->pds.load_program,
810                                               PVRX(VDMCTRL_USC_TARGET_ALL),
811                                               sr_prog->usc.unified_size,
812                                               &task0.pds_state0,
813                                               &task0.pds_state1);
814      }
815
816      d_dst = &static_state->geom_state[i].vdm_ctx_resume_task1;
817      pvr_csb_pack (d_dst, CR_VDM_CONTEXT_RESUME_TASK1, task1) {
818         pvr_csb_pack (&task1.pds_state2, VDMCTRL_PDS_STATE2, state) {
819            state.pds_code_addr =
820               PVR_DEV_ADDR(sr_prog->pds.load_program.code_offset);
821         }
822      }
823
824      q_dst = &static_state->geom_state[i].vdm_ctx_resume_task2;
825      pvr_csb_pack (q_dst, CR_VDM_CONTEXT_RESUME_TASK2, task2) {
826         pvr_rogue_get_geom_state_stream_out_words(&pt_prog->pds_resume_program,
827                                                   &task2.stream_out1,
828                                                   &task2.stream_out2);
829      }
830   }
831}
832
833static void pvr_render_ctx_ws_create_info_init(
834   struct pvr_render_ctx *ctx,
835   enum pvr_winsys_ctx_priority priority,
836   struct pvr_winsys_render_ctx_create_info *create_info)
837{
838   create_info->priority = priority;
839   create_info->vdm_callstack_addr = ctx->vdm_callstack_bo->vma->dev_addr;
840
841   pvr_render_ctx_ws_static_state_init(ctx, &create_info->static_state);
842}
843
844VkResult pvr_render_ctx_create(struct pvr_device *device,
845                               enum pvr_winsys_ctx_priority priority,
846                               struct pvr_render_ctx **const ctx_out)
847{
848   const uint64_t vdm_callstack_size =
849      sizeof(uint64_t) * PVR_VDM_CALLSTACK_MAX_DEPTH;
850   struct pvr_winsys_render_ctx_create_info create_info;
851   struct pvr_render_ctx *ctx;
852   VkResult result;
853
854   ctx = vk_alloc(&device->vk.alloc,
855                  sizeof(*ctx),
856                  8,
857                  VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
858   if (!ctx)
859      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
860
861   ctx->device = device;
862
863   result = pvr_bo_alloc(device,
864                         device->heaps.general_heap,
865                         vdm_callstack_size,
866                         PVRX(CR_VDM_CALL_STACK_POINTER_ADDR_ALIGNMENT),
867                         0,
868                         &ctx->vdm_callstack_bo);
869   if (result != VK_SUCCESS)
870      goto err_vk_free_ctx;
871
872   result = pvr_render_ctx_switch_init(device, ctx);
873   if (result != VK_SUCCESS)
874      goto err_free_vdm_callstack_bo;
875
876   result = pvr_ctx_reset_cmd_init(device, &ctx->reset_cmd);
877   if (result != VK_SUCCESS)
878      goto err_render_ctx_switch_fini;
879
880   /* ctx must be fully initialized by this point since
881    * pvr_render_ctx_ws_create_info_init() depends on this.
882    */
883   pvr_render_ctx_ws_create_info_init(ctx, priority, &create_info);
884
885   result = device->ws->ops->render_ctx_create(device->ws,
886                                               &create_info,
887                                               &ctx->ws_ctx);
888   if (result != VK_SUCCESS)
889      goto err_render_ctx_reset_cmd_fini;
890
891   *ctx_out = ctx;
892
893   return VK_SUCCESS;
894
895err_render_ctx_reset_cmd_fini:
896   pvr_ctx_reset_cmd_fini(device, &ctx->reset_cmd);
897
898err_render_ctx_switch_fini:
899   pvr_render_ctx_switch_fini(device, ctx);
900
901err_free_vdm_callstack_bo:
902   pvr_bo_free(device, ctx->vdm_callstack_bo);
903
904err_vk_free_ctx:
905   vk_free(&device->vk.alloc, ctx);
906
907   return result;
908}
909
910void pvr_render_ctx_destroy(struct pvr_render_ctx *ctx)
911{
912   struct pvr_device *device = ctx->device;
913
914   device->ws->ops->render_ctx_destroy(ctx->ws_ctx);
915
916   pvr_ctx_reset_cmd_fini(device, &ctx->reset_cmd);
917   pvr_render_ctx_switch_fini(device, ctx);
918   pvr_bo_free(device, ctx->vdm_callstack_bo);
919   vk_free(&device->vk.alloc, ctx);
920}
921
922static VkResult pvr_pds_sr_fence_terminate_program_create_and_upload(
923   struct pvr_device *device,
924   struct pvr_pds_upload *const pds_upload_out)
925{
926   const uint32_t pds_data_alignment =
927      PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) / 4U;
928   const struct pvr_device_runtime_info *dev_runtime_info =
929      &device->pdevice->dev_runtime_info;
930   ASSERTED const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
931   uint32_t staging_buffer[PVRX(PDS_TASK_PROGRAM_SIZE) >> 2U];
932   struct pvr_pds_fence_program program = { 0 };
933   ASSERTED uint32_t *buffer_end;
934   uint32_t code_offset;
935   uint32_t data_size;
936
937   /* SW_COMPUTE_PDS_BARRIER is not supported with 2 or more phantoms. */
938   assert(!(PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info) &&
939            dev_runtime_info->num_phantoms >= 2));
940
941   pvr_pds_generate_fence_terminate_program(&program,
942                                            staging_buffer,
943                                            PDS_GENERATE_DATA_SEGMENT,
944                                            &device->pdevice->dev_info);
945
946   /* FIXME: pvr_pds_generate_fence_terminate_program() zeros out the data_size
947    * when we generate the code segment. Implement
948    * PDS_GENERATE_CODEDATA_SEGMENTS? Or wait for the pds gen api to change?
949    * This behavior doesn't seem consistent with the rest of the api. For now
950    * we store the size in a variable.
951    */
952   data_size = program.data_size;
953   code_offset = ALIGN_POT(program.data_size, pds_data_alignment);
954
955   buffer_end =
956      pvr_pds_generate_fence_terminate_program(&program,
957                                               &staging_buffer[code_offset],
958                                               PDS_GENERATE_CODE_SEGMENT,
959                                               &device->pdevice->dev_info);
960
961   assert((uint64_t)(buffer_end - staging_buffer) * 4U <
962          ROGUE_PDS_TASK_PROGRAM_SIZE);
963
964   return pvr_gpu_upload_pds(device,
965                             staging_buffer,
966                             data_size,
967                             PVRX(CR_CDM_TERMINATE_PDS_DATA_ADDR_ALIGNMENT),
968                             &staging_buffer[code_offset],
969                             program.code_size,
970                             PVRX(CR_CDM_TERMINATE_PDS_CODE_ADDR_ALIGNMENT),
971                             0,
972                             pds_upload_out);
973}
974
975static void pvr_compute_ctx_ws_static_state_init(
976   const struct pvr_device_info *const dev_info,
977   const struct pvr_compute_ctx *const ctx,
978   struct pvr_winsys_compute_ctx_static_state *const static_state)
979{
980   const struct pvr_compute_ctx_switch *const ctx_switch = &ctx->ctx_switch;
981
982   /* CR_CDM_CONTEXT_... use state store program info. */
983
984   pvr_csb_pack (&static_state->cdm_ctx_store_pds0,
985                 CR_CDM_CONTEXT_PDS0,
986                 state) {
987      state.data_addr =
988         PVR_DEV_ADDR(ctx_switch->sr[0].pds.store_program.data_offset);
989      state.code_addr =
990         PVR_DEV_ADDR(ctx_switch->sr[0].pds.store_program.code_offset);
991   }
992
993   pvr_csb_pack (&static_state->cdm_ctx_store_pds0_b,
994                 CR_CDM_CONTEXT_PDS0,
995                 state) {
996      state.data_addr =
997         PVR_DEV_ADDR(ctx_switch->sr[1].pds.store_program.data_offset);
998      state.code_addr =
999         PVR_DEV_ADDR(ctx_switch->sr[1].pds.store_program.code_offset);
1000   }
1001
1002   pvr_csb_pack (&static_state->cdm_ctx_store_pds1,
1003                 CR_CDM_CONTEXT_PDS1,
1004                 state) {
1005      /* Convert the data size from dwords to bytes. */
1006      const uint32_t store_program_data_size =
1007         ctx_switch->sr[0].pds.store_program.data_size * 4U;
1008
1009      state.pds_seq_dep = true;
1010      state.usc_seq_dep = false;
1011      state.target = true;
1012      state.unified_size = ctx_switch->sr[0].usc.unified_size;
1013      state.common_shared = false;
1014      state.common_size = 0;
1015      state.temp_size = 0;
1016
1017      assert(store_program_data_size %
1018                PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) ==
1019             0);
1020      state.data_size = store_program_data_size /
1021                        PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE);
1022
1023      state.fence = true;
1024   }
1025
1026   /* CR_CDM_TERMINATE_... use fence terminate info. */
1027
1028   pvr_csb_pack (&static_state->cdm_ctx_terminate_pds,
1029                 CR_CDM_TERMINATE_PDS,
1030                 state) {
1031      state.data_addr =
1032         PVR_DEV_ADDR(ctx_switch->sr_fence_terminate_program.data_offset);
1033      state.code_addr =
1034         PVR_DEV_ADDR(ctx_switch->sr_fence_terminate_program.code_offset);
1035   }
1036
1037   pvr_csb_pack (&static_state->cdm_ctx_terminate_pds1,
1038                 CR_CDM_TERMINATE_PDS1,
1039                 state) {
1040      /* Convert the data size from dwords to bytes. */
1041      const uint32_t fence_terminate_program_data_size =
1042         ctx_switch->sr_fence_terminate_program.data_size * 4U;
1043
1044      state.pds_seq_dep = true;
1045      state.usc_seq_dep = false;
1046      state.target = !PVR_HAS_FEATURE(dev_info, compute_morton_capable);
1047      state.unified_size = 0;
1048      /* Common store is for shareds -- this will free the partitions. */
1049      state.common_shared = true;
1050      state.common_size = 0;
1051      state.temp_size = 0;
1052
1053      assert(fence_terminate_program_data_size %
1054                PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) ==
1055             0);
1056      state.data_size = fence_terminate_program_data_size /
1057                        PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE);
1058      state.fence = true;
1059   }
1060
1061   /* CR_CDM_RESUME_... use state load program info. */
1062
1063   pvr_csb_pack (&static_state->cdm_ctx_resume_pds0,
1064                 CR_CDM_CONTEXT_LOAD_PDS0,
1065                 state) {
1066      state.data_addr =
1067         PVR_DEV_ADDR(ctx_switch->sr[0].pds.load_program.data_offset);
1068      state.code_addr =
1069         PVR_DEV_ADDR(ctx_switch->sr[0].pds.load_program.code_offset);
1070   }
1071
1072   pvr_csb_pack (&static_state->cdm_ctx_resume_pds0_b,
1073                 CR_CDM_CONTEXT_LOAD_PDS0,
1074                 state) {
1075      state.data_addr =
1076         PVR_DEV_ADDR(ctx_switch->sr[1].pds.load_program.data_offset);
1077      state.code_addr =
1078         PVR_DEV_ADDR(ctx_switch->sr[1].pds.load_program.code_offset);
1079   }
1080}
1081
1082static void pvr_compute_ctx_ws_create_info_init(
1083   const struct pvr_compute_ctx *const ctx,
1084   enum pvr_winsys_ctx_priority priority,
1085   struct pvr_winsys_compute_ctx_create_info *const create_info)
1086{
1087   create_info->priority = priority;
1088
1089   pvr_compute_ctx_ws_static_state_init(&ctx->device->pdevice->dev_info,
1090                                        ctx,
1091                                        &create_info->static_state);
1092}
1093
1094VkResult pvr_compute_ctx_create(struct pvr_device *const device,
1095                                enum pvr_winsys_ctx_priority priority,
1096                                struct pvr_compute_ctx **const ctx_out)
1097{
1098   struct pvr_winsys_compute_ctx_create_info create_info;
1099   struct pvr_compute_ctx *ctx;
1100   VkResult result;
1101
1102   ctx = vk_alloc(&device->vk.alloc,
1103                  sizeof(*ctx),
1104                  8,
1105                  VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1106   if (!ctx)
1107      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1108
1109   ctx->device = device;
1110
1111   result = pvr_bo_alloc(
1112      device,
1113      device->heaps.general_heap,
1114      rogue_get_cdm_context_resume_buffer_size(&device->pdevice->dev_info),
1115      rogue_get_cdm_context_resume_buffer_alignment(&device->pdevice->dev_info),
1116      PVR_WINSYS_BO_FLAG_CPU_ACCESS | PVR_WINSYS_BO_FLAG_GPU_UNCACHED,
1117      &ctx->ctx_switch.compute_state_bo);
1118   if (result != VK_SUCCESS)
1119      goto err_free_ctx;
1120
1121   /* TODO: Change this so that enabling storage to B doesn't change the array
1122    * size. Instead of looping we could unroll this and have the second
1123    * programs setup depending on the B enable. Doing it that way would make
1124    * things more obvious.
1125    */
1126   for (uint32_t i = 0; i < ARRAY_SIZE(ctx->ctx_switch.sr); i++) {
1127      result = pvr_ctx_sr_programs_setup(device,
1128                                         PVR_CTX_SR_COMPUTE_TARGET,
1129                                         &ctx->ctx_switch.sr[i]);
1130      if (result != VK_SUCCESS) {
1131         for (uint32_t j = 0; j < i; j++)
1132            pvr_ctx_sr_programs_cleanup(device, &ctx->ctx_switch.sr[j]);
1133
1134         goto err_free_state_buffer;
1135      }
1136   }
1137
1138   result = pvr_pds_sr_fence_terminate_program_create_and_upload(
1139      device,
1140      &ctx->ctx_switch.sr_fence_terminate_program);
1141   if (result != VK_SUCCESS)
1142      goto err_free_sr_programs;
1143
1144   pvr_compute_ctx_ws_create_info_init(ctx, priority, &create_info);
1145
1146   result = pvr_ctx_reset_cmd_init(device, &ctx->reset_cmd);
1147   if (result != VK_SUCCESS)
1148      goto err_free_pds_fence_terminate_program;
1149
1150   result = device->ws->ops->compute_ctx_create(device->ws,
1151                                                &create_info,
1152                                                &ctx->ws_ctx);
1153   if (result != VK_SUCCESS)
1154      goto err_fini_reset_cmd;
1155
1156   *ctx_out = ctx;
1157
1158   return VK_SUCCESS;
1159
1160err_fini_reset_cmd:
1161   pvr_ctx_reset_cmd_fini(device, &ctx->reset_cmd);
1162
1163err_free_pds_fence_terminate_program:
1164   pvr_bo_free(device, ctx->ctx_switch.sr_fence_terminate_program.pvr_bo);
1165
1166err_free_sr_programs:
1167   for (uint32_t i = 0; i < ARRAY_SIZE(ctx->ctx_switch.sr); ++i)
1168      pvr_ctx_sr_programs_cleanup(device, &ctx->ctx_switch.sr[i]);
1169
1170err_free_state_buffer:
1171   pvr_bo_free(device, ctx->ctx_switch.compute_state_bo);
1172
1173err_free_ctx:
1174   vk_free(&device->vk.alloc, ctx);
1175
1176   return result;
1177}
1178
1179void pvr_compute_ctx_destroy(struct pvr_compute_ctx *const ctx)
1180{
1181   struct pvr_device *device = ctx->device;
1182
1183   device->ws->ops->compute_ctx_destroy(ctx->ws_ctx);
1184
1185   pvr_ctx_reset_cmd_fini(device, &ctx->reset_cmd);
1186
1187   pvr_bo_free(device, ctx->ctx_switch.sr_fence_terminate_program.pvr_bo);
1188   for (uint32_t i = 0; i < ARRAY_SIZE(ctx->ctx_switch.sr); ++i)
1189      pvr_ctx_sr_programs_cleanup(device, &ctx->ctx_switch.sr[i]);
1190
1191   pvr_bo_free(device, ctx->ctx_switch.compute_state_bo);
1192
1193   vk_free(&device->vk.alloc, ctx);
1194}
1195
1196static void pvr_transfer_ctx_ws_create_info_init(
1197   enum pvr_winsys_ctx_priority priority,
1198   struct pvr_winsys_transfer_ctx_create_info *const create_info)
1199{
1200   create_info->priority = priority;
1201}
1202
1203static VkResult pvr_transfer_ctx_setup_shaders(struct pvr_device *device,
1204                                               struct pvr_transfer_ctx *ctx)
1205{
1206   const uint32_t cache_line_size =
1207      rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
1208   VkResult result;
1209
1210   /* TODO: Setup USC fragments. */
1211
1212   /* Setup EOT program. */
1213   result = pvr_gpu_upload_usc(device,
1214                               pvr_transfer_eot_usc_code,
1215                               sizeof(pvr_transfer_eot_usc_code),
1216                               cache_line_size,
1217                               &ctx->usc_eot_bo);
1218   if (result != VK_SUCCESS)
1219      return result;
1220
1221   STATIC_ASSERT(ARRAY_SIZE(pvr_transfer_eot_usc_offsets) ==
1222                 ARRAY_SIZE(ctx->transfer_mrts));
1223   for (uint32_t i = 0U; i < ARRAY_SIZE(pvr_transfer_eot_usc_offsets); i++) {
1224      ctx->transfer_mrts[i] =
1225         PVR_DEV_ADDR_OFFSET(ctx->usc_eot_bo->vma->dev_addr,
1226                             pvr_transfer_eot_usc_offsets[i]);
1227   }
1228
1229   return VK_SUCCESS;
1230}
1231
1232static void pvr_transfer_ctx_fini_shaders(struct pvr_device *device,
1233                                          struct pvr_transfer_ctx *ctx)
1234{
1235   pvr_bo_free(device, ctx->usc_eot_bo);
1236}
1237
1238VkResult pvr_transfer_ctx_create(struct pvr_device *const device,
1239                                 enum pvr_winsys_ctx_priority priority,
1240                                 struct pvr_transfer_ctx **const ctx_out)
1241{
1242   struct pvr_winsys_transfer_ctx_create_info create_info;
1243   struct pvr_transfer_ctx *ctx;
1244   VkResult result;
1245
1246   ctx = vk_zalloc(&device->vk.alloc,
1247                   sizeof(*ctx),
1248                   8U,
1249                   VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1250   if (!ctx)
1251      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1252
1253   ctx->device = device;
1254
1255   result = pvr_ctx_reset_cmd_init(device, &ctx->reset_cmd);
1256   if (result != VK_SUCCESS)
1257      goto err_free_ctx;
1258
1259   pvr_transfer_ctx_ws_create_info_init(priority, &create_info);
1260
1261   result = device->ws->ops->transfer_ctx_create(device->ws,
1262                                                 &create_info,
1263                                                 &ctx->ws_ctx);
1264   if (result != VK_SUCCESS)
1265      goto err_fini_reset_cmd;
1266
1267   result = pvr_transfer_ctx_setup_shaders(device, ctx);
1268   if (result != VK_SUCCESS)
1269      goto err_destroy_transfer_ctx;
1270
1271   /* Create the PDS Uniform/Tex state code segment array. */
1272   for (uint32_t i = 0U; i < ARRAY_SIZE(ctx->pds_unitex_code); i++) {
1273      for (uint32_t j = 0U; j < ARRAY_SIZE(ctx->pds_unitex_code[0U]); j++) {
1274         if (i == 0U && j == 0U)
1275            continue;
1276
1277         result = pvr_pds_unitex_state_program_create_and_upload(
1278            device,
1279            NULL,
1280            i,
1281            j,
1282            &ctx->pds_unitex_code[i][j]);
1283         if (result != VK_SUCCESS) {
1284            goto err_free_pds_unitex_bos;
1285         }
1286      }
1287   }
1288
1289   *ctx_out = ctx;
1290
1291   return VK_SUCCESS;
1292
1293err_free_pds_unitex_bos:
1294   for (uint32_t i = 0U; i < ARRAY_SIZE(ctx->pds_unitex_code); i++) {
1295      for (uint32_t j = 0U; j < ARRAY_SIZE(ctx->pds_unitex_code[0U]); j++) {
1296         if (!ctx->pds_unitex_code[i][j].pvr_bo)
1297            continue;
1298
1299         pvr_bo_free(device, ctx->pds_unitex_code[i][j].pvr_bo);
1300      }
1301   }
1302
1303   pvr_transfer_ctx_fini_shaders(device, ctx);
1304
1305err_destroy_transfer_ctx:
1306   device->ws->ops->transfer_ctx_destroy(ctx->ws_ctx);
1307
1308err_fini_reset_cmd:
1309   pvr_ctx_reset_cmd_fini(device, &ctx->reset_cmd);
1310
1311err_free_ctx:
1312   vk_free(&device->vk.alloc, ctx);
1313
1314   return result;
1315}
1316
1317void pvr_transfer_ctx_destroy(struct pvr_transfer_ctx *const ctx)
1318{
1319   struct pvr_device *device = ctx->device;
1320
1321   for (uint32_t i = 0U; i < ARRAY_SIZE(ctx->pds_unitex_code); i++) {
1322      for (uint32_t j = 0U; j < ARRAY_SIZE(ctx->pds_unitex_code[0U]); j++) {
1323         if (!ctx->pds_unitex_code[i][j].pvr_bo)
1324            continue;
1325
1326         pvr_bo_free(device, ctx->pds_unitex_code[i][j].pvr_bo);
1327      }
1328   }
1329
1330   pvr_transfer_ctx_fini_shaders(device, ctx);
1331   device->ws->ops->transfer_ctx_destroy(ctx->ws_ctx);
1332   pvr_ctx_reset_cmd_fini(device, &ctx->reset_cmd);
1333   vk_free(&device->vk.alloc, ctx);
1334}
1335