1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright (C) 2021 Collabora, Ltd. 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21bf215546Sopenharmony_ci * SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "compiler.h" 25bf215546Sopenharmony_ci#include "bi_builder.h" 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci/* Bifrost v7 can preload up to two messages of the form: 28bf215546Sopenharmony_ci * 29bf215546Sopenharmony_ci * 1. +LD_VAR_IMM, register_format f32/f16, sample mode 30bf215546Sopenharmony_ci * 2. +VAR_TEX, register format f32/f16, sample mode (TODO) 31bf215546Sopenharmony_ci * 32bf215546Sopenharmony_ci * Analyze the shader for these instructions and push accordingly. 33bf215546Sopenharmony_ci */ 34bf215546Sopenharmony_ci 35bf215546Sopenharmony_cistatic bool 36bf215546Sopenharmony_cibi_is_regfmt_float(enum bi_register_format regfmt) 37bf215546Sopenharmony_ci{ 38bf215546Sopenharmony_ci return (regfmt == BI_REGISTER_FORMAT_F32) || 39bf215546Sopenharmony_ci (regfmt == BI_REGISTER_FORMAT_F16); 40bf215546Sopenharmony_ci} 41bf215546Sopenharmony_ci 42bf215546Sopenharmony_ci/* 43bf215546Sopenharmony_ci * Preloaded varyings are interpolated at the sample location. Check if an 44bf215546Sopenharmony_ci * instruction can use this interpolation mode. 45bf215546Sopenharmony_ci */ 46bf215546Sopenharmony_cistatic bool 47bf215546Sopenharmony_cibi_can_interp_at_sample(bi_instr *I) 48bf215546Sopenharmony_ci{ 49bf215546Sopenharmony_ci /* .sample mode with r61 corresponds to per-sample interpolation */ 50bf215546Sopenharmony_ci if (I->sample == BI_SAMPLE_SAMPLE) 51bf215546Sopenharmony_ci return bi_is_value_equiv(I->src[0], bi_register(61)); 52bf215546Sopenharmony_ci 53bf215546Sopenharmony_ci /* If the shader runs with pixel-frequency shading, .sample is 54bf215546Sopenharmony_ci * equivalent to .center, so allow .center 55bf215546Sopenharmony_ci * 56bf215546Sopenharmony_ci * If the shader runs with sample-frequency shading, .sample and .center 57bf215546Sopenharmony_ci * are not equivalent. However, the ESSL 3.20 specification 58bf215546Sopenharmony_ci * stipulates in section 4.5 ("Interpolation Qualifiers"): 59bf215546Sopenharmony_ci * 60bf215546Sopenharmony_ci * for fragment shader input variables qualified with neither 61bf215546Sopenharmony_ci * centroid nor sample, the value of the assigned variable may be 62bf215546Sopenharmony_ci * interpolated anywhere within the pixel and a single value may be 63bf215546Sopenharmony_ci * assigned to each sample within the pixel, to the extent permitted 64bf215546Sopenharmony_ci * by the OpenGL ES Specification. 65bf215546Sopenharmony_ci * 66bf215546Sopenharmony_ci * We only produce .center for variables qualified with neither centroid 67bf215546Sopenharmony_ci * nor sample, so if .center is specified this section applies. This 68bf215546Sopenharmony_ci * suggests that, although per-pixel interpolation is allowed, it is not 69bf215546Sopenharmony_ci * mandated ("may" rather than "must" or "should"). Therefore it appears 70bf215546Sopenharmony_ci * safe to substitute sample. 71bf215546Sopenharmony_ci */ 72bf215546Sopenharmony_ci return (I->sample == BI_SAMPLE_CENTER); 73bf215546Sopenharmony_ci} 74bf215546Sopenharmony_ci 75bf215546Sopenharmony_cistatic bool 76bf215546Sopenharmony_cibi_can_preload_ld_var(bi_instr *I) 77bf215546Sopenharmony_ci{ 78bf215546Sopenharmony_ci return (I->op == BI_OPCODE_LD_VAR_IMM) && 79bf215546Sopenharmony_ci bi_can_interp_at_sample(I) && 80bf215546Sopenharmony_ci bi_is_regfmt_float(I->register_format); 81bf215546Sopenharmony_ci} 82bf215546Sopenharmony_ci 83bf215546Sopenharmony_cistatic bool 84bf215546Sopenharmony_cibi_is_var_tex(enum bi_opcode op) 85bf215546Sopenharmony_ci{ 86bf215546Sopenharmony_ci return (op == BI_OPCODE_VAR_TEX_F32) || (op == BI_OPCODE_VAR_TEX_F16); 87bf215546Sopenharmony_ci} 88bf215546Sopenharmony_ci 89bf215546Sopenharmony_civoid 90bf215546Sopenharmony_cibi_opt_message_preload(bi_context *ctx) 91bf215546Sopenharmony_ci{ 92bf215546Sopenharmony_ci unsigned nr_preload = 0; 93bf215546Sopenharmony_ci 94bf215546Sopenharmony_ci /* We only preload from the first block */ 95bf215546Sopenharmony_ci bi_block *block = bi_start_block(&ctx->blocks); 96bf215546Sopenharmony_ci bi_builder b = bi_init_builder(ctx, bi_before_nonempty_block(block)); 97bf215546Sopenharmony_ci 98bf215546Sopenharmony_ci bi_foreach_instr_in_block_safe(block, I) { 99bf215546Sopenharmony_ci if (!bi_is_ssa(I->dest[0])) continue; 100bf215546Sopenharmony_ci 101bf215546Sopenharmony_ci struct bifrost_message_preload msg; 102bf215546Sopenharmony_ci 103bf215546Sopenharmony_ci if (bi_can_preload_ld_var(I)) { 104bf215546Sopenharmony_ci msg = (struct bifrost_message_preload) { 105bf215546Sopenharmony_ci .enabled = true, 106bf215546Sopenharmony_ci .varying_index = I->varying_index, 107bf215546Sopenharmony_ci .fp16 = (I->register_format == BI_REGISTER_FORMAT_F16), 108bf215546Sopenharmony_ci .num_components = I->vecsize + 1 109bf215546Sopenharmony_ci }; 110bf215546Sopenharmony_ci } else if (bi_is_var_tex(I->op)) { 111bf215546Sopenharmony_ci msg = (struct bifrost_message_preload) { 112bf215546Sopenharmony_ci .enabled = true, 113bf215546Sopenharmony_ci .texture = true, 114bf215546Sopenharmony_ci .varying_index = I->varying_index, 115bf215546Sopenharmony_ci .texture_index = I->texture_index, 116bf215546Sopenharmony_ci .fp16 = (I->op == BI_OPCODE_VAR_TEX_F16), 117bf215546Sopenharmony_ci .skip = I->skip, 118bf215546Sopenharmony_ci .zero_lod = I->lod_mode 119bf215546Sopenharmony_ci }; 120bf215546Sopenharmony_ci } else { 121bf215546Sopenharmony_ci continue; 122bf215546Sopenharmony_ci } 123bf215546Sopenharmony_ci 124bf215546Sopenharmony_ci /* Report the preloading */ 125bf215546Sopenharmony_ci ctx->info.bifrost->messages[nr_preload] = msg; 126bf215546Sopenharmony_ci 127bf215546Sopenharmony_ci /* Replace with a collect of preloaded registers. The collect 128bf215546Sopenharmony_ci * kills the moves, so the collect is free (it is coalesced). 129bf215546Sopenharmony_ci */ 130bf215546Sopenharmony_ci b.cursor = bi_before_instr(I); 131bf215546Sopenharmony_ci 132bf215546Sopenharmony_ci bi_instr *collect = bi_collect_i32_to(&b, I->dest[0]); 133bf215546Sopenharmony_ci collect->nr_srcs = bi_count_write_registers(I, 0); 134bf215546Sopenharmony_ci 135bf215546Sopenharmony_ci /* The registers themselves must be preloaded at the start of 136bf215546Sopenharmony_ci * the program. Preloaded registers are coalesced, so these 137bf215546Sopenharmony_ci * moves are free. 138bf215546Sopenharmony_ci */ 139bf215546Sopenharmony_ci b.cursor = bi_before_block(block); 140bf215546Sopenharmony_ci for (unsigned i = 0; i < collect->nr_srcs; ++i) { 141bf215546Sopenharmony_ci unsigned reg = (nr_preload * 4) + i; 142bf215546Sopenharmony_ci 143bf215546Sopenharmony_ci collect->src[i] = bi_mov_i32(&b, bi_register(reg)); 144bf215546Sopenharmony_ci } 145bf215546Sopenharmony_ci 146bf215546Sopenharmony_ci bi_remove_instruction(I); 147bf215546Sopenharmony_ci 148bf215546Sopenharmony_ci /* Maximum number of preloaded messages */ 149bf215546Sopenharmony_ci if ((++nr_preload) == 2) 150bf215546Sopenharmony_ci break; 151bf215546Sopenharmony_ci } 152bf215546Sopenharmony_ci} 153