1/* 2 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Rob Clark <robclark@freedesktop.org> 25 */ 26 27#include "ir3/ir3_nir.h" 28 29/* This has to reach into the fd_context a bit more than the rest of 30 * ir3, but it needs to be aligned with the compiler, so both agree 31 * on which const regs hold what. And the logic is identical between 32 * ir3 generations, the only difference is small details in the actual 33 * CP_LOAD_STATE packets (which is handled inside the generation 34 * specific ctx->emit_const(_bo)() fxns) 35 * 36 * This file should be included in only a single .c file per gen, which 37 * defines the following functions: 38 */ 39 40static bool is_stateobj(struct fd_ringbuffer *ring); 41 42static void emit_const_user(struct fd_ringbuffer *ring, 43 const struct ir3_shader_variant *v, uint32_t regid, 44 uint32_t size, const uint32_t *user_buffer); 45 46static void emit_const_bo(struct fd_ringbuffer *ring, 47 const struct ir3_shader_variant *v, uint32_t regid, 48 uint32_t offset, uint32_t size, struct fd_bo *bo); 49 50static void 51emit_const_prsc(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v, 52 uint32_t regid, uint32_t offset, uint32_t size, 53 struct pipe_resource *buffer) 54{ 55 struct fd_resource *rsc = fd_resource(buffer); 56 emit_const_bo(ring, v, regid, offset, size, rsc->bo); 57} 58 59static void emit_const_ptrs(struct fd_ringbuffer *ring, 60 const struct ir3_shader_variant *v, 61 uint32_t dst_offset, uint32_t num, 62 struct fd_bo **bos, uint32_t *offsets); 63 64static void 65emit_const_asserts(struct fd_ringbuffer *ring, 66 const struct ir3_shader_variant *v, uint32_t regid, 67 uint32_t sizedwords) 68{ 69 assert((regid % 4) == 0); 70 assert((sizedwords % 4) == 0); 71 assert(regid + sizedwords <= v->constlen * 4); 72} 73 74static void 75ring_wfi(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt 76{ 77 /* when we emit const state via ring (IB2) we need a WFI, but when 78 * it is emit'd via stateobj, we don't 79 */ 80 if (is_stateobj(ring)) 81 return; 82 83 fd_wfi(batch, ring); 84} 85 86/** 87 * Indirectly calculates size of cmdstream needed for ir3_emit_user_consts(). 88 * Returns number of packets, and total size of all the payload. 89 * 90 * The value can be a worst-case, ie. some shader variants may not read all 91 * consts, etc. 92 * 93 * Returns size in dwords. 94 */ 95static inline void 96ir3_user_consts_size(struct ir3_ubo_analysis_state *state, unsigned *packets, 97 unsigned *size) 98{ 99 *packets = *size = 0; 100 101 for (uint32_t i = 0; i < ARRAY_SIZE(state->range); i++) { 102 if (state->range[i].start < state->range[i].end) { 103 *size += state->range[i].end - state->range[i].start; 104 (*packets)++; 105 } 106 } 107} 108 109/** 110 * Uploads the referenced subranges of the nir constant_data to the hardware's 111 * constant buffer. 112 */ 113static inline void 114ir3_emit_constant_data(struct fd_screen *screen, 115 const struct ir3_shader_variant *v, 116 struct fd_ringbuffer *ring) 117{ 118 const struct ir3_const_state *const_state = ir3_const_state(v); 119 const struct ir3_ubo_analysis_state *state = &const_state->ubo_state; 120 121 for (unsigned i = 0; i < state->num_enabled; i++) { 122 unsigned ubo = state->range[i].ubo.block; 123 if (ubo != const_state->constant_data_ubo) 124 continue; 125 126 uint32_t size = state->range[i].end - state->range[i].start; 127 128 /* Pre-a6xx, we might have ranges enabled in the shader that aren't 129 * used in the binning variant. 130 */ 131 if (16 * v->constlen <= state->range[i].offset) 132 continue; 133 134 /* and even if the start of the const buffer is before 135 * first_immediate, the end may not be: 136 */ 137 size = MIN2(size, (16 * v->constlen) - state->range[i].offset); 138 139 if (size == 0) 140 continue; 141 142 emit_const_bo(ring, v, state->range[i].offset / 4, 143 v->info.constant_data_offset + state->range[i].start, 144 size / 4, v->bo); 145 } 146} 147 148/** 149 * Uploads sub-ranges of UBOs to the hardware's constant buffer (UBO access 150 * outside of these ranges will be done using full UBO accesses in the 151 * shader). 152 */ 153static inline void 154ir3_emit_user_consts(struct fd_screen *screen, 155 const struct ir3_shader_variant *v, 156 struct fd_ringbuffer *ring, 157 struct fd_constbuf_stateobj *constbuf) 158{ 159 const struct ir3_const_state *const_state = ir3_const_state(v); 160 const struct ir3_ubo_analysis_state *state = &const_state->ubo_state; 161 162 for (unsigned i = 0; i < state->num_enabled; i++) { 163 assert(!state->range[i].ubo.bindless); 164 unsigned ubo = state->range[i].ubo.block; 165 if (!(constbuf->enabled_mask & (1 << ubo)) || 166 ubo == const_state->constant_data_ubo) { 167 continue; 168 } 169 struct pipe_constant_buffer *cb = &constbuf->cb[ubo]; 170 171 uint32_t size = state->range[i].end - state->range[i].start; 172 uint32_t offset = cb->buffer_offset + state->range[i].start; 173 174 /* Pre-a6xx, we might have ranges enabled in the shader that aren't 175 * used in the binning variant. 176 */ 177 if (16 * v->constlen <= state->range[i].offset) 178 continue; 179 180 /* and even if the start of the const buffer is before 181 * first_immediate, the end may not be: 182 */ 183 size = MIN2(size, (16 * v->constlen) - state->range[i].offset); 184 185 if (size == 0) 186 continue; 187 188 /* things should be aligned to vec4: */ 189 assert((state->range[i].offset % 16) == 0); 190 assert((size % 16) == 0); 191 assert((offset % 16) == 0); 192 193 if (cb->user_buffer) { 194 emit_const_user(ring, v, state->range[i].offset / 4, size / 4, 195 cb->user_buffer + state->range[i].start); 196 } else { 197 emit_const_prsc(ring, v, state->range[i].offset / 4, offset, size / 4, 198 cb->buffer); 199 } 200 } 201} 202 203static inline void 204ir3_emit_ubos(struct fd_context *ctx, const struct ir3_shader_variant *v, 205 struct fd_ringbuffer *ring, struct fd_constbuf_stateobj *constbuf) 206{ 207 const struct ir3_const_state *const_state = ir3_const_state(v); 208 uint32_t offset = const_state->offsets.ubo; 209 210 /* a6xx+ uses UBO state and ldc instead of pointers emitted in 211 * const state and ldg: 212 */ 213 if (ctx->screen->gen >= 6) 214 return; 215 216 if (v->constlen > offset) { 217 uint32_t params = const_state->num_ubos; 218 uint32_t offsets[params]; 219 struct fd_bo *bos[params]; 220 221 for (uint32_t i = 0; i < params; i++) { 222 if (i == const_state->constant_data_ubo) { 223 bos[i] = v->bo; 224 offsets[i] = v->info.constant_data_offset; 225 continue; 226 } 227 228 struct pipe_constant_buffer *cb = &constbuf->cb[i]; 229 230 /* If we have user pointers (constbuf 0, aka GL uniforms), upload 231 * them to a buffer now, and save it in the constbuf so that we 232 * don't have to reupload until they get changed. 233 */ 234 if (cb->user_buffer) { 235 struct pipe_context *pctx = &ctx->base; 236 u_upload_data(pctx->stream_uploader, 0, cb->buffer_size, 64, 237 cb->user_buffer, &cb->buffer_offset, &cb->buffer); 238 cb->user_buffer = NULL; 239 } 240 241 if ((constbuf->enabled_mask & (1 << i)) && cb->buffer) { 242 offsets[i] = cb->buffer_offset; 243 bos[i] = fd_resource(cb->buffer)->bo; 244 } else { 245 offsets[i] = 0; 246 bos[i] = NULL; 247 } 248 } 249 250 assert(offset * 4 + params <= v->constlen * 4); 251 252 emit_const_ptrs(ring, v, offset * 4, params, bos, offsets); 253 } 254} 255 256static inline void 257ir3_emit_image_dims(struct fd_screen *screen, 258 const struct ir3_shader_variant *v, 259 struct fd_ringbuffer *ring, 260 struct fd_shaderimg_stateobj *si) 261{ 262 const struct ir3_const_state *const_state = ir3_const_state(v); 263 uint32_t offset = const_state->offsets.image_dims; 264 if (v->constlen > offset) { 265 uint32_t dims[align(const_state->image_dims.count, 4)]; 266 unsigned mask = const_state->image_dims.mask; 267 268 while (mask) { 269 struct pipe_image_view *img; 270 struct fd_resource *rsc; 271 unsigned index = u_bit_scan(&mask); 272 unsigned off = const_state->image_dims.off[index]; 273 274 img = &si->si[index]; 275 rsc = fd_resource(img->resource); 276 277 dims[off + 0] = util_format_get_blocksize(img->format); 278 if (img->resource->target != PIPE_BUFFER) { 279 struct fdl_slice *slice = fd_resource_slice(rsc, img->u.tex.level); 280 /* note for 2d/cube/etc images, even if re-interpreted 281 * as a different color format, the pixel size should 282 * be the same, so use original dimensions for y and z 283 * stride: 284 */ 285 dims[off + 1] = fd_resource_pitch(rsc, img->u.tex.level); 286 /* see corresponding logic in fd_resource_offset(): */ 287 if (rsc->layout.layer_first) { 288 dims[off + 2] = rsc->layout.layer_size; 289 } else { 290 dims[off + 2] = slice->size0; 291 } 292 } else { 293 /* For buffer-backed images, the log2 of the format's 294 * bytes-per-pixel is placed on the 2nd slot. This is useful 295 * when emitting image_size instructions, for which we need 296 * to divide by bpp for image buffers. Since the bpp 297 * can only be power-of-two, the division is implemented 298 * as a SHR, and for that it is handy to have the log2 of 299 * bpp as a constant. (log2 = first-set-bit - 1) 300 */ 301 dims[off + 1] = ffs(dims[off + 0]) - 1; 302 } 303 } 304 uint32_t size = MIN2(ARRAY_SIZE(dims), v->constlen * 4 - offset * 4); 305 306 emit_const_user(ring, v, offset * 4, size, dims); 307 } 308} 309 310static inline void 311ir3_emit_immediates(struct fd_screen *screen, 312 const struct ir3_shader_variant *v, 313 struct fd_ringbuffer *ring) 314{ 315 const struct ir3_const_state *const_state = ir3_const_state(v); 316 uint32_t base = const_state->offsets.immediate; 317 int size = DIV_ROUND_UP(const_state->immediates_count, 4); 318 319 /* truncate size to avoid writing constants that shader 320 * does not use: 321 */ 322 size = MIN2(size + base, v->constlen) - base; 323 324 /* convert out of vec4: */ 325 base *= 4; 326 size *= 4; 327 328 if (size > 0) 329 emit_const_user(ring, v, base, size, const_state->immediates); 330 331 /* NIR constant data has the same lifetime as immediates, so upload it 332 * now, too. 333 */ 334 ir3_emit_constant_data(screen, v, ring); 335} 336 337static inline void 338ir3_emit_link_map(struct fd_screen *screen, 339 const struct ir3_shader_variant *producer, 340 const struct ir3_shader_variant *v, 341 struct fd_ringbuffer *ring) 342{ 343 const struct ir3_const_state *const_state = ir3_const_state(v); 344 uint32_t base = const_state->offsets.primitive_map; 345 int size = DIV_ROUND_UP(v->input_size, 4); 346 347 /* truncate size to avoid writing constants that shader 348 * does not use: 349 */ 350 size = MIN2(size + base, v->constlen) - base; 351 352 /* convert out of vec4: */ 353 base *= 4; 354 size *= 4; 355 356 if (size > 0) 357 emit_const_user(ring, v, base, size, producer->output_loc); 358} 359 360/* emit stream-out buffers: */ 361static inline void 362emit_tfbos(struct fd_context *ctx, const struct ir3_shader_variant *v, 363 struct fd_ringbuffer *ring) 364{ 365 /* streamout addresses after driver-params: */ 366 const struct ir3_const_state *const_state = ir3_const_state(v); 367 uint32_t offset = const_state->offsets.tfbo; 368 if (v->constlen > offset) { 369 struct fd_streamout_stateobj *so = &ctx->streamout; 370 const struct ir3_stream_output_info *info = &v->stream_output; 371 uint32_t params = 4; 372 uint32_t offsets[params]; 373 struct fd_bo *bos[params]; 374 375 for (uint32_t i = 0; i < params; i++) { 376 struct pipe_stream_output_target *target = so->targets[i]; 377 378 if (target) { 379 offsets[i] = 380 (so->offsets[i] * info->stride[i] * 4) + target->buffer_offset; 381 bos[i] = fd_resource(target->buffer)->bo; 382 } else { 383 offsets[i] = 0; 384 bos[i] = NULL; 385 } 386 } 387 388 assert(offset * 4 + params <= v->constlen * 4); 389 390 emit_const_ptrs(ring, v, offset * 4, params, bos, offsets); 391 } 392} 393 394static inline void 395emit_common_consts(const struct ir3_shader_variant *v, 396 struct fd_ringbuffer *ring, struct fd_context *ctx, 397 enum pipe_shader_type t) assert_dt 398{ 399 enum fd_dirty_shader_state dirty = ctx->dirty_shader[t]; 400 401 /* When we use CP_SET_DRAW_STATE objects to emit constant state, 402 * if we emit any of it we need to emit all. This is because 403 * we are using the same state-group-id each time for uniform 404 * state, and if previous update is never evaluated (due to no 405 * visible primitives in the current tile) then the new stateobj 406 * completely replaces the old one. 407 * 408 * Possibly if we split up different parts of the const state to 409 * different state-objects we could avoid this. 410 */ 411 if (dirty && is_stateobj(ring)) 412 dirty = ~0; 413 414 if (dirty & (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_CONST)) { 415 struct fd_constbuf_stateobj *constbuf; 416 bool shader_dirty; 417 418 constbuf = &ctx->constbuf[t]; 419 shader_dirty = !!(dirty & FD_DIRTY_SHADER_PROG); 420 421 ring_wfi(ctx->batch, ring); 422 423 ir3_emit_user_consts(ctx->screen, v, ring, constbuf); 424 ir3_emit_ubos(ctx, v, ring, constbuf); 425 if (shader_dirty) 426 ir3_emit_immediates(ctx->screen, v, ring); 427 } 428 429 if (dirty & (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_IMAGE)) { 430 struct fd_shaderimg_stateobj *si = &ctx->shaderimg[t]; 431 ring_wfi(ctx->batch, ring); 432 ir3_emit_image_dims(ctx->screen, v, ring, si); 433 } 434} 435 436/* emit kernel params */ 437static inline void 438emit_kernel_params(struct fd_context *ctx, const struct ir3_shader_variant *v, 439 struct fd_ringbuffer *ring, const struct pipe_grid_info *info) 440 assert_dt 441{ 442 const struct ir3_const_state *const_state = ir3_const_state(v); 443 uint32_t offset = const_state->offsets.kernel_params; 444 if (v->constlen > offset) { 445 ring_wfi(ctx->batch, ring); 446 emit_const_user(ring, v, offset * 4, 447 align(v->cs.req_input_mem, 4), 448 info->input); 449 } 450} 451 452static inline void 453ir3_emit_driver_params(const struct ir3_shader_variant *v, 454 struct fd_ringbuffer *ring, struct fd_context *ctx, 455 const struct pipe_draw_info *info, 456 const struct pipe_draw_indirect_info *indirect, 457 const struct pipe_draw_start_count_bias *draw) assert_dt 458{ 459 assert(v->need_driver_params); 460 461 const struct ir3_const_state *const_state = ir3_const_state(v); 462 uint32_t offset = const_state->offsets.driver_param; 463 uint32_t vertex_params[IR3_DP_VS_COUNT] = { 464 [IR3_DP_DRAWID] = 0, /* filled by hw (CP_DRAW_INDIRECT_MULTI) */ 465 [IR3_DP_VTXID_BASE] = info->index_size ? draw->index_bias : draw->start, 466 [IR3_DP_INSTID_BASE] = info->start_instance, 467 [IR3_DP_VTXCNT_MAX] = ctx->streamout.max_tf_vtx, 468 }; 469 if (v->key.ucp_enables) { 470 struct pipe_clip_state *ucp = &ctx->ucp; 471 unsigned pos = IR3_DP_UCP0_X; 472 for (unsigned i = 0; pos <= IR3_DP_UCP7_W; i++) { 473 for (unsigned j = 0; j < 4; j++) { 474 vertex_params[pos] = fui(ucp->ucp[i][j]); 475 pos++; 476 } 477 } 478 } 479 480 /* Only emit as many params as needed, i.e. up to the highest enabled UCP 481 * plane. However a binning pass may drop even some of these, so limit to 482 * program max. 483 */ 484 const uint32_t vertex_params_size = 485 MIN2(const_state->num_driver_params, (v->constlen - offset) * 4); 486 assert(vertex_params_size <= IR3_DP_VS_COUNT); 487 488 bool needs_vtxid_base = 489 ir3_find_sysval_regid(v, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) != 490 regid(63, 0); 491 492 /* for indirect draw, we need to copy VTXID_BASE from 493 * indirect-draw parameters buffer.. which is annoying 494 * and means we can't easily emit these consts in cmd 495 * stream so need to copy them to bo. 496 */ 497 if (indirect && needs_vtxid_base) { 498 uint32_t vertex_params_area = align(vertex_params_size, 16); 499 struct pipe_resource *vertex_params_rsc = 500 pipe_buffer_create(&ctx->screen->base, PIPE_BIND_CONSTANT_BUFFER, 501 PIPE_USAGE_STREAM, vertex_params_area * 4); 502 unsigned src_off = indirect->offset; 503 void *ptr; 504 505 ptr = fd_bo_map(fd_resource(vertex_params_rsc)->bo); 506 memcpy(ptr, vertex_params, vertex_params_size * 4); 507 508 if (info->index_size) { 509 /* indexed draw, index_bias is 4th field: */ 510 src_off += 3 * 4; 511 } else { 512 /* non-indexed draw, start is 3rd field: */ 513 src_off += 2 * 4; 514 } 515 516 /* copy index_bias or start from draw params: */ 517 ctx->screen->mem_to_mem(ring, vertex_params_rsc, 0, indirect->buffer, 518 src_off, 1); 519 520 emit_const_prsc(ring, v, offset * 4, 0, vertex_params_area, 521 vertex_params_rsc); 522 523 pipe_resource_reference(&vertex_params_rsc, NULL); 524 } else { 525 emit_const_user(ring, v, offset * 4, vertex_params_size, vertex_params); 526 } 527 528 /* if needed, emit stream-out buffer addresses: */ 529 if (vertex_params[IR3_DP_VTXCNT_MAX] > 0) { 530 emit_tfbos(ctx, v, ring); 531 } 532} 533 534static inline void 535ir3_emit_vs_consts(const struct ir3_shader_variant *v, 536 struct fd_ringbuffer *ring, struct fd_context *ctx, 537 const struct pipe_draw_info *info, 538 const struct pipe_draw_indirect_info *indirect, 539 const struct pipe_draw_start_count_bias *draw) assert_dt 540{ 541 assert(v->type == MESA_SHADER_VERTEX); 542 543 emit_common_consts(v, ring, ctx, PIPE_SHADER_VERTEX); 544 545 /* emit driver params every time: */ 546 if (info && v->need_driver_params) { 547 ring_wfi(ctx->batch, ring); 548 ir3_emit_driver_params(v, ring, ctx, info, indirect, draw); 549 } 550} 551 552static inline void 553ir3_emit_fs_consts(const struct ir3_shader_variant *v, 554 struct fd_ringbuffer *ring, struct fd_context *ctx) assert_dt 555{ 556 assert(v->type == MESA_SHADER_FRAGMENT); 557 558 emit_common_consts(v, ring, ctx, PIPE_SHADER_FRAGMENT); 559} 560 561/* emit compute-shader consts: */ 562static inline void 563ir3_emit_cs_consts(const struct ir3_shader_variant *v, 564 struct fd_ringbuffer *ring, struct fd_context *ctx, 565 const struct pipe_grid_info *info) assert_dt 566{ 567 assert(gl_shader_stage_is_compute(v->type)); 568 569 emit_common_consts(v, ring, ctx, PIPE_SHADER_COMPUTE); 570 emit_kernel_params(ctx, v, ring, info); 571 572 /* a3xx/a4xx can inject these directly */ 573 if (ctx->screen->gen <= 4) 574 return; 575 576 /* emit compute-shader driver-params: */ 577 const struct ir3_const_state *const_state = ir3_const_state(v); 578 uint32_t offset = const_state->offsets.driver_param; 579 if (v->constlen > offset) { 580 ring_wfi(ctx->batch, ring); 581 582 if (info->indirect) { 583 struct pipe_resource *indirect = NULL; 584 unsigned indirect_offset; 585 586 /* This is a bit awkward, but CP_LOAD_STATE.EXT_SRC_ADDR needs 587 * to be aligned more strongly than 4 bytes. So in this case 588 * we need a temporary buffer to copy NumWorkGroups.xyz to. 589 * 590 * TODO if previous compute job is writing to info->indirect, 591 * we might need a WFI.. but since we currently flush for each 592 * compute job, we are probably ok for now. 593 */ 594 if (info->indirect_offset & 0xf) { 595 indirect = pipe_buffer_create(&ctx->screen->base, 596 PIPE_BIND_COMMAND_ARGS_BUFFER, 597 PIPE_USAGE_STREAM, 0x1000); 598 indirect_offset = 0; 599 600 ctx->screen->mem_to_mem(ring, indirect, 0, info->indirect, 601 info->indirect_offset, 3); 602 } else { 603 pipe_resource_reference(&indirect, info->indirect); 604 indirect_offset = info->indirect_offset; 605 } 606 607 emit_const_prsc(ring, v, offset * 4, indirect_offset, 16, indirect); 608 609 pipe_resource_reference(&indirect, NULL); 610 } else { 611 uint32_t compute_params[IR3_DP_CS_COUNT] = { 612 [IR3_DP_NUM_WORK_GROUPS_X] = info->grid[0], 613 [IR3_DP_NUM_WORK_GROUPS_Y] = info->grid[1], 614 [IR3_DP_NUM_WORK_GROUPS_Z] = info->grid[2], 615 [IR3_DP_WORK_DIM] = info->work_dim, 616 [IR3_DP_LOCAL_GROUP_SIZE_X] = info->block[0], 617 [IR3_DP_LOCAL_GROUP_SIZE_Y] = info->block[1], 618 [IR3_DP_LOCAL_GROUP_SIZE_Z] = info->block[2], 619 }; 620 uint32_t size = 621 MIN2(const_state->num_driver_params, v->constlen * 4 - offset * 4); 622 623 emit_const_user(ring, v, offset * 4, size, compute_params); 624 } 625 } 626} 627