1bf215546Sopenharmony_ci/**************************************************************************
2bf215546Sopenharmony_ci *
3bf215546Sopenharmony_ci * Copyright 2010 Christian König
4bf215546Sopenharmony_ci * All Rights Reserved.
5bf215546Sopenharmony_ci *
6bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
7bf215546Sopenharmony_ci * copy of this software and associated documentation files (the
8bf215546Sopenharmony_ci * "Software"), to deal in the Software without restriction, including
9bf215546Sopenharmony_ci * without limitation the rights to use, copy, modify, merge, publish,
10bf215546Sopenharmony_ci * distribute, sub license, and/or sell copies of the Software, and to
11bf215546Sopenharmony_ci * permit persons to whom the Software is furnished to do so, subject to
12bf215546Sopenharmony_ci * the following conditions:
13bf215546Sopenharmony_ci *
14bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the
15bf215546Sopenharmony_ci * next paragraph) shall be included in all copies or substantial portions
16bf215546Sopenharmony_ci * of the Software.
17bf215546Sopenharmony_ci *
18bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19bf215546Sopenharmony_ci * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20bf215546Sopenharmony_ci * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21bf215546Sopenharmony_ci * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22bf215546Sopenharmony_ci * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23bf215546Sopenharmony_ci * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24bf215546Sopenharmony_ci * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25bf215546Sopenharmony_ci *
26bf215546Sopenharmony_ci **************************************************************************/
27bf215546Sopenharmony_ci
28bf215546Sopenharmony_ci#include <assert.h>
29bf215546Sopenharmony_ci
30bf215546Sopenharmony_ci#include "pipe/p_context.h"
31bf215546Sopenharmony_ci#include "pipe/p_screen.h"
32bf215546Sopenharmony_ci
33bf215546Sopenharmony_ci#include "util/u_draw.h"
34bf215546Sopenharmony_ci#include "util/u_sampler.h"
35bf215546Sopenharmony_ci#include "util/u_memory.h"
36bf215546Sopenharmony_ci
37bf215546Sopenharmony_ci#include "tgsi/tgsi_ureg.h"
38bf215546Sopenharmony_ci
39bf215546Sopenharmony_ci#include "vl_defines.h"
40bf215546Sopenharmony_ci#include "vl_types.h"
41bf215546Sopenharmony_ci#include "vl_vertex_buffers.h"
42bf215546Sopenharmony_ci#include "vl_idct.h"
43bf215546Sopenharmony_ci
44bf215546Sopenharmony_cienum VS_OUTPUT
45bf215546Sopenharmony_ci{
46bf215546Sopenharmony_ci   VS_O_VPOS = 0,
47bf215546Sopenharmony_ci   VS_O_L_ADDR0 = 0,
48bf215546Sopenharmony_ci   VS_O_L_ADDR1,
49bf215546Sopenharmony_ci   VS_O_R_ADDR0,
50bf215546Sopenharmony_ci   VS_O_R_ADDR1
51bf215546Sopenharmony_ci};
52bf215546Sopenharmony_ci
53bf215546Sopenharmony_ci/**
54bf215546Sopenharmony_ci * The DCT matrix stored as hex representation of floats. Equal to the following equation:
55bf215546Sopenharmony_ci * for (i = 0; i < 8; ++i)
56bf215546Sopenharmony_ci *    for (j = 0; j < 8; ++j)
57bf215546Sopenharmony_ci *       if (i == 0) const_matrix[i][j] = 1.0f / sqrtf(8.0f);
58bf215546Sopenharmony_ci *       else const_matrix[i][j] = sqrtf(2.0f / 8.0f) * cosf((2 * j + 1) * i * M_PI / (2.0f * 8.0f));
59bf215546Sopenharmony_ci */
60bf215546Sopenharmony_cistatic const uint32_t const_matrix[8][8] = {
61bf215546Sopenharmony_ci   { 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3 },
62bf215546Sopenharmony_ci   { 0x3efb14be, 0x3ed4db31, 0x3e8e39da, 0x3dc7c5c4, 0xbdc7c5c2, 0xbe8e39d9, 0xbed4db32, 0xbefb14bf },
63bf215546Sopenharmony_ci   { 0x3eec835f, 0x3e43ef15, 0xbe43ef14, 0xbeec835e, 0xbeec835f, 0xbe43ef1a, 0x3e43ef1b, 0x3eec835f },
64bf215546Sopenharmony_ci   { 0x3ed4db31, 0xbdc7c5c2, 0xbefb14bf, 0xbe8e39dd, 0x3e8e39d7, 0x3efb14bf, 0x3dc7c5d0, 0xbed4db34 },
65bf215546Sopenharmony_ci   { 0x3eb504f3, 0xbeb504f3, 0xbeb504f4, 0x3eb504f1, 0x3eb504f3, 0xbeb504f0, 0xbeb504ef, 0x3eb504f4 },
66bf215546Sopenharmony_ci   { 0x3e8e39da, 0xbefb14bf, 0x3dc7c5c8, 0x3ed4db32, 0xbed4db34, 0xbdc7c5bb, 0x3efb14bf, 0xbe8e39d7 },
67bf215546Sopenharmony_ci   { 0x3e43ef15, 0xbeec835f, 0x3eec835f, 0xbe43ef07, 0xbe43ef23, 0x3eec8361, 0xbeec835c, 0x3e43ef25 },
68bf215546Sopenharmony_ci   { 0x3dc7c5c4, 0xbe8e39dd, 0x3ed4db32, 0xbefb14c0, 0x3efb14be, 0xbed4db31, 0x3e8e39ce, 0xbdc7c596 },
69bf215546Sopenharmony_ci};
70bf215546Sopenharmony_ci
71bf215546Sopenharmony_cistatic void
72bf215546Sopenharmony_cicalc_addr(struct ureg_program *shader, struct ureg_dst addr[2],
73bf215546Sopenharmony_ci          struct ureg_src tc, struct ureg_src start, bool right_side,
74bf215546Sopenharmony_ci          bool transposed, float size)
75bf215546Sopenharmony_ci{
76bf215546Sopenharmony_ci   unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y;
77bf215546Sopenharmony_ci   unsigned sw_start = right_side ? TGSI_SWIZZLE_Y : TGSI_SWIZZLE_X;
78bf215546Sopenharmony_ci
79bf215546Sopenharmony_ci   unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X;
80bf215546Sopenharmony_ci   unsigned sw_tc = right_side ? TGSI_SWIZZLE_X : TGSI_SWIZZLE_Y;
81bf215546Sopenharmony_ci
82bf215546Sopenharmony_ci   /*
83bf215546Sopenharmony_ci    * addr[0..1].(start) = right_side ? start.x : tc.x
84bf215546Sopenharmony_ci    * addr[0..1].(tc) = right_side ? tc.y : start.y
85bf215546Sopenharmony_ci    * addr[0..1].z = tc.z
86bf215546Sopenharmony_ci    * addr[1].(start) += 1.0f / scale
87bf215546Sopenharmony_ci    */
88bf215546Sopenharmony_ci   ureg_MOV(shader, ureg_writemask(addr[0], wm_start), ureg_scalar(start, sw_start));
89bf215546Sopenharmony_ci   ureg_MOV(shader, ureg_writemask(addr[0], wm_tc), ureg_scalar(tc, sw_tc));
90bf215546Sopenharmony_ci
91bf215546Sopenharmony_ci   ureg_ADD(shader, ureg_writemask(addr[1], wm_start), ureg_scalar(start, sw_start), ureg_imm1f(shader, 1.0f / size));
92bf215546Sopenharmony_ci   ureg_MOV(shader, ureg_writemask(addr[1], wm_tc), ureg_scalar(tc, sw_tc));
93bf215546Sopenharmony_ci}
94bf215546Sopenharmony_ci
95bf215546Sopenharmony_cistatic void
96bf215546Sopenharmony_ciincrement_addr(struct ureg_program *shader, struct ureg_dst daddr[2],
97bf215546Sopenharmony_ci               struct ureg_src saddr[2], bool right_side, bool transposed,
98bf215546Sopenharmony_ci               int pos, float size)
99bf215546Sopenharmony_ci{
100bf215546Sopenharmony_ci   unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y;
101bf215546Sopenharmony_ci   unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X;
102bf215546Sopenharmony_ci
103bf215546Sopenharmony_ci   /*
104bf215546Sopenharmony_ci    * daddr[0..1].(start) = saddr[0..1].(start)
105bf215546Sopenharmony_ci    * daddr[0..1].(tc) = saddr[0..1].(tc)
106bf215546Sopenharmony_ci    */
107bf215546Sopenharmony_ci
108bf215546Sopenharmony_ci   ureg_MOV(shader, ureg_writemask(daddr[0], wm_start), saddr[0]);
109bf215546Sopenharmony_ci   ureg_ADD(shader, ureg_writemask(daddr[0], wm_tc), saddr[0], ureg_imm1f(shader, pos / size));
110bf215546Sopenharmony_ci   ureg_MOV(shader, ureg_writemask(daddr[1], wm_start), saddr[1]);
111bf215546Sopenharmony_ci   ureg_ADD(shader, ureg_writemask(daddr[1], wm_tc), saddr[1], ureg_imm1f(shader, pos / size));
112bf215546Sopenharmony_ci}
113bf215546Sopenharmony_ci
114bf215546Sopenharmony_cistatic void
115bf215546Sopenharmony_cifetch_four(struct ureg_program *shader, struct ureg_dst m[2], struct ureg_src addr[2],
116bf215546Sopenharmony_ci           struct ureg_src sampler, bool resource3d)
117bf215546Sopenharmony_ci{
118bf215546Sopenharmony_ci   ureg_TEX(shader, m[0], resource3d ? TGSI_TEXTURE_3D : TGSI_TEXTURE_2D, addr[0], sampler);
119bf215546Sopenharmony_ci   ureg_TEX(shader, m[1], resource3d ? TGSI_TEXTURE_3D : TGSI_TEXTURE_2D, addr[1], sampler);
120bf215546Sopenharmony_ci}
121bf215546Sopenharmony_ci
122bf215546Sopenharmony_cistatic void
123bf215546Sopenharmony_cimatrix_mul(struct ureg_program *shader, struct ureg_dst dst, struct ureg_dst l[2], struct ureg_dst r[2])
124bf215546Sopenharmony_ci{
125bf215546Sopenharmony_ci   struct ureg_dst tmp;
126bf215546Sopenharmony_ci
127bf215546Sopenharmony_ci   tmp = ureg_DECL_temporary(shader);
128bf215546Sopenharmony_ci
129bf215546Sopenharmony_ci   /*
130bf215546Sopenharmony_ci    * tmp.xy = dot4(m[0][0..1], m[1][0..1])
131bf215546Sopenharmony_ci    * dst = tmp.x + tmp.y
132bf215546Sopenharmony_ci    */
133bf215546Sopenharmony_ci   ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(l[0]), ureg_src(r[0]));
134bf215546Sopenharmony_ci   ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(l[1]), ureg_src(r[1]));
135bf215546Sopenharmony_ci   ureg_ADD(shader, dst,
136bf215546Sopenharmony_ci      ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X),
137bf215546Sopenharmony_ci      ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
138bf215546Sopenharmony_ci
139bf215546Sopenharmony_ci   ureg_release_temporary(shader, tmp);
140bf215546Sopenharmony_ci}
141bf215546Sopenharmony_ci
142bf215546Sopenharmony_cistatic void *
143bf215546Sopenharmony_cicreate_mismatch_vert_shader(struct vl_idct *idct)
144bf215546Sopenharmony_ci{
145bf215546Sopenharmony_ci   struct ureg_program *shader;
146bf215546Sopenharmony_ci   struct ureg_src vpos;
147bf215546Sopenharmony_ci   struct ureg_src scale;
148bf215546Sopenharmony_ci   struct ureg_dst t_tex;
149bf215546Sopenharmony_ci   struct ureg_dst o_vpos, o_addr[2];
150bf215546Sopenharmony_ci
151bf215546Sopenharmony_ci   shader = ureg_create(PIPE_SHADER_VERTEX);
152bf215546Sopenharmony_ci   if (!shader)
153bf215546Sopenharmony_ci      return NULL;
154bf215546Sopenharmony_ci
155bf215546Sopenharmony_ci   vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
156bf215546Sopenharmony_ci
157bf215546Sopenharmony_ci   t_tex = ureg_DECL_temporary(shader);
158bf215546Sopenharmony_ci
159bf215546Sopenharmony_ci   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
160bf215546Sopenharmony_ci
161bf215546Sopenharmony_ci   o_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0);
162bf215546Sopenharmony_ci   o_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1);
163bf215546Sopenharmony_ci
164bf215546Sopenharmony_ci   /*
165bf215546Sopenharmony_ci    * scale = (VL_BLOCK_WIDTH, VL_BLOCK_HEIGHT) / (dst.width, dst.height)
166bf215546Sopenharmony_ci    *
167bf215546Sopenharmony_ci    * t_vpos = vpos + 7 / VL_BLOCK_WIDTH
168bf215546Sopenharmony_ci    * o_vpos.xy = t_vpos * scale
169bf215546Sopenharmony_ci    *
170bf215546Sopenharmony_ci    * o_addr = calc_addr(...)
171bf215546Sopenharmony_ci    *
172bf215546Sopenharmony_ci    */
173bf215546Sopenharmony_ci
174bf215546Sopenharmony_ci   scale = ureg_imm2f(shader,
175bf215546Sopenharmony_ci      (float)VL_BLOCK_WIDTH / idct->buffer_width,
176bf215546Sopenharmony_ci      (float)VL_BLOCK_HEIGHT / idct->buffer_height);
177bf215546Sopenharmony_ci
178bf215546Sopenharmony_ci   ureg_MAD(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), vpos, scale, scale);
179bf215546Sopenharmony_ci   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f));
180bf215546Sopenharmony_ci
181bf215546Sopenharmony_ci   ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, scale);
182bf215546Sopenharmony_ci   calc_addr(shader, o_addr, ureg_src(t_tex), ureg_src(t_tex), false, false, idct->buffer_width / 4);
183bf215546Sopenharmony_ci
184bf215546Sopenharmony_ci   ureg_release_temporary(shader, t_tex);
185bf215546Sopenharmony_ci
186bf215546Sopenharmony_ci   ureg_END(shader);
187bf215546Sopenharmony_ci
188bf215546Sopenharmony_ci   return ureg_create_shader_and_destroy(shader, idct->pipe);
189bf215546Sopenharmony_ci}
190bf215546Sopenharmony_ci
191bf215546Sopenharmony_cistatic void *
192bf215546Sopenharmony_cicreate_mismatch_frag_shader(struct vl_idct *idct)
193bf215546Sopenharmony_ci{
194bf215546Sopenharmony_ci   struct ureg_program *shader;
195bf215546Sopenharmony_ci
196bf215546Sopenharmony_ci   struct ureg_src addr[2];
197bf215546Sopenharmony_ci
198bf215546Sopenharmony_ci   struct ureg_dst m[8][2];
199bf215546Sopenharmony_ci   struct ureg_dst fragment;
200bf215546Sopenharmony_ci
201bf215546Sopenharmony_ci   unsigned i;
202bf215546Sopenharmony_ci
203bf215546Sopenharmony_ci   shader = ureg_create(PIPE_SHADER_FRAGMENT);
204bf215546Sopenharmony_ci   if (!shader)
205bf215546Sopenharmony_ci      return NULL;
206bf215546Sopenharmony_ci
207bf215546Sopenharmony_ci   addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR);
208bf215546Sopenharmony_ci   addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR);
209bf215546Sopenharmony_ci
210bf215546Sopenharmony_ci   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
211bf215546Sopenharmony_ci
212bf215546Sopenharmony_ci   for (i = 0; i < 8; ++i) {
213bf215546Sopenharmony_ci      m[i][0] = ureg_DECL_temporary(shader);
214bf215546Sopenharmony_ci      m[i][1] = ureg_DECL_temporary(shader);
215bf215546Sopenharmony_ci   }
216bf215546Sopenharmony_ci
217bf215546Sopenharmony_ci   for (i = 0; i < 8; ++i) {
218bf215546Sopenharmony_ci      increment_addr(shader, m[i], addr, false, false, i, idct->buffer_height);
219bf215546Sopenharmony_ci   }
220bf215546Sopenharmony_ci
221bf215546Sopenharmony_ci   for (i = 0; i < 8; ++i) {
222bf215546Sopenharmony_ci      struct ureg_src s_addr[2];
223bf215546Sopenharmony_ci      s_addr[0] = ureg_src(m[i][0]);
224bf215546Sopenharmony_ci      s_addr[1] = ureg_src(m[i][1]);
225bf215546Sopenharmony_ci      fetch_four(shader, m[i], s_addr, ureg_DECL_sampler(shader, 0), false);
226bf215546Sopenharmony_ci   }
227bf215546Sopenharmony_ci
228bf215546Sopenharmony_ci   for (i = 1; i < 8; ++i) {
229bf215546Sopenharmony_ci      ureg_ADD(shader, m[0][0], ureg_src(m[0][0]), ureg_src(m[i][0]));
230bf215546Sopenharmony_ci      ureg_ADD(shader, m[0][1], ureg_src(m[0][1]), ureg_src(m[i][1]));
231bf215546Sopenharmony_ci   }
232bf215546Sopenharmony_ci
233bf215546Sopenharmony_ci   ureg_ADD(shader, m[0][0], ureg_src(m[0][0]), ureg_src(m[0][1]));
234bf215546Sopenharmony_ci   ureg_DP4(shader, m[0][0], ureg_abs(ureg_src(m[0][0])), ureg_imm1f(shader, 1 << 14));
235bf215546Sopenharmony_ci
236bf215546Sopenharmony_ci   ureg_MUL(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_abs(ureg_src(m[7][1])), ureg_imm1f(shader, 1 << 14));
237bf215546Sopenharmony_ci   ureg_FRC(shader, m[0][0], ureg_src(m[0][0]));
238bf215546Sopenharmony_ci   ureg_SGT(shader, m[0][0], ureg_imm1f(shader, 0.5f), ureg_abs(ureg_src(m[0][0])));
239bf215546Sopenharmony_ci
240bf215546Sopenharmony_ci   ureg_CMP(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_negate(ureg_src(m[0][0])),
241bf215546Sopenharmony_ci            ureg_imm1f(shader, 1.0f / (1 << 15)), ureg_imm1f(shader, -1.0f / (1 << 15)));
242bf215546Sopenharmony_ci   ureg_MUL(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_src(m[0][0]),
243bf215546Sopenharmony_ci            ureg_scalar(ureg_src(m[0][0]), TGSI_SWIZZLE_X));
244bf215546Sopenharmony_ci
245bf215546Sopenharmony_ci   ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), ureg_src(m[7][1]));
246bf215546Sopenharmony_ci   ureg_ADD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_src(m[0][0]), ureg_src(m[7][1]));
247bf215546Sopenharmony_ci
248bf215546Sopenharmony_ci   for (i = 0; i < 8; ++i) {
249bf215546Sopenharmony_ci      ureg_release_temporary(shader, m[i][0]);
250bf215546Sopenharmony_ci      ureg_release_temporary(shader, m[i][1]);
251bf215546Sopenharmony_ci   }
252bf215546Sopenharmony_ci
253bf215546Sopenharmony_ci   ureg_END(shader);
254bf215546Sopenharmony_ci
255bf215546Sopenharmony_ci   return ureg_create_shader_and_destroy(shader, idct->pipe);
256bf215546Sopenharmony_ci}
257bf215546Sopenharmony_ci
258bf215546Sopenharmony_cistatic void *
259bf215546Sopenharmony_cicreate_stage1_vert_shader(struct vl_idct *idct)
260bf215546Sopenharmony_ci{
261bf215546Sopenharmony_ci   struct ureg_program *shader;
262bf215546Sopenharmony_ci   struct ureg_src vrect, vpos;
263bf215546Sopenharmony_ci   struct ureg_src scale;
264bf215546Sopenharmony_ci   struct ureg_dst t_tex, t_start;
265bf215546Sopenharmony_ci   struct ureg_dst o_vpos, o_l_addr[2], o_r_addr[2];
266bf215546Sopenharmony_ci
267bf215546Sopenharmony_ci   shader = ureg_create(PIPE_SHADER_VERTEX);
268bf215546Sopenharmony_ci   if (!shader)
269bf215546Sopenharmony_ci      return NULL;
270bf215546Sopenharmony_ci
271bf215546Sopenharmony_ci   vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
272bf215546Sopenharmony_ci   vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
273bf215546Sopenharmony_ci
274bf215546Sopenharmony_ci   t_tex = ureg_DECL_temporary(shader);
275bf215546Sopenharmony_ci   t_start = ureg_DECL_temporary(shader);
276bf215546Sopenharmony_ci
277bf215546Sopenharmony_ci   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
278bf215546Sopenharmony_ci
279bf215546Sopenharmony_ci   o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0);
280bf215546Sopenharmony_ci   o_l_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1);
281bf215546Sopenharmony_ci
282bf215546Sopenharmony_ci   o_r_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0);
283bf215546Sopenharmony_ci   o_r_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1);
284bf215546Sopenharmony_ci
285bf215546Sopenharmony_ci   /*
286bf215546Sopenharmony_ci    * scale = (VL_BLOCK_WIDTH, VL_BLOCK_HEIGHT) / (dst.width, dst.height)
287bf215546Sopenharmony_ci    *
288bf215546Sopenharmony_ci    * t_vpos = vpos + vrect
289bf215546Sopenharmony_ci    * o_vpos.xy = t_vpos * scale
290bf215546Sopenharmony_ci    * o_vpos.zw = vpos
291bf215546Sopenharmony_ci    *
292bf215546Sopenharmony_ci    * o_l_addr = calc_addr(...)
293bf215546Sopenharmony_ci    * o_r_addr = calc_addr(...)
294bf215546Sopenharmony_ci    *
295bf215546Sopenharmony_ci    */
296bf215546Sopenharmony_ci
297bf215546Sopenharmony_ci   scale = ureg_imm2f(shader,
298bf215546Sopenharmony_ci      (float)VL_BLOCK_WIDTH / idct->buffer_width,
299bf215546Sopenharmony_ci      (float)VL_BLOCK_HEIGHT / idct->buffer_height);
300bf215546Sopenharmony_ci
301bf215546Sopenharmony_ci   ureg_ADD(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, vrect);
302bf215546Sopenharmony_ci   ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), ureg_src(t_tex), scale);
303bf215546Sopenharmony_ci
304bf215546Sopenharmony_ci   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_tex));
305bf215546Sopenharmony_ci   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f));
306bf215546Sopenharmony_ci
307bf215546Sopenharmony_ci   ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), vpos, scale);
308bf215546Sopenharmony_ci
309bf215546Sopenharmony_ci   calc_addr(shader, o_l_addr, ureg_src(t_tex), ureg_src(t_start), false, false, idct->buffer_width / 4);
310bf215546Sopenharmony_ci   calc_addr(shader, o_r_addr, vrect, ureg_imm1f(shader, 0.0f), true, true, VL_BLOCK_WIDTH / 4);
311bf215546Sopenharmony_ci
312bf215546Sopenharmony_ci   ureg_release_temporary(shader, t_tex);
313bf215546Sopenharmony_ci   ureg_release_temporary(shader, t_start);
314bf215546Sopenharmony_ci
315bf215546Sopenharmony_ci   ureg_END(shader);
316bf215546Sopenharmony_ci
317bf215546Sopenharmony_ci   return ureg_create_shader_and_destroy(shader, idct->pipe);
318bf215546Sopenharmony_ci}
319bf215546Sopenharmony_ci
320bf215546Sopenharmony_cistatic void *
321bf215546Sopenharmony_cicreate_stage1_frag_shader(struct vl_idct *idct)
322bf215546Sopenharmony_ci{
323bf215546Sopenharmony_ci   struct ureg_program *shader;
324bf215546Sopenharmony_ci   struct ureg_src l_addr[2], r_addr[2];
325bf215546Sopenharmony_ci   struct ureg_dst l[4][2], r[2];
326bf215546Sopenharmony_ci   struct ureg_dst *fragment;
327bf215546Sopenharmony_ci   unsigned i;
328bf215546Sopenharmony_ci   int j;
329bf215546Sopenharmony_ci
330bf215546Sopenharmony_ci   shader = ureg_create(PIPE_SHADER_FRAGMENT);
331bf215546Sopenharmony_ci   if (!shader)
332bf215546Sopenharmony_ci      return NULL;
333bf215546Sopenharmony_ci
334bf215546Sopenharmony_ci   fragment = MALLOC(idct->nr_of_render_targets * sizeof(struct ureg_dst));
335bf215546Sopenharmony_ci
336bf215546Sopenharmony_ci   l_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR);
337bf215546Sopenharmony_ci   l_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR);
338bf215546Sopenharmony_ci
339bf215546Sopenharmony_ci   r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR);
340bf215546Sopenharmony_ci   r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR);
341bf215546Sopenharmony_ci
342bf215546Sopenharmony_ci   for (i = 0; i < idct->nr_of_render_targets; ++i)
343bf215546Sopenharmony_ci       fragment[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, i);
344bf215546Sopenharmony_ci
345bf215546Sopenharmony_ci   for (i = 0; i < 4; ++i) {
346bf215546Sopenharmony_ci      l[i][0] = ureg_DECL_temporary(shader);
347bf215546Sopenharmony_ci      l[i][1] = ureg_DECL_temporary(shader);
348bf215546Sopenharmony_ci   }
349bf215546Sopenharmony_ci
350bf215546Sopenharmony_ci   r[0] = ureg_DECL_temporary(shader);
351bf215546Sopenharmony_ci   r[1] = ureg_DECL_temporary(shader);
352bf215546Sopenharmony_ci
353bf215546Sopenharmony_ci   for (i = 0; i < 4; ++i) {
354bf215546Sopenharmony_ci      increment_addr(shader, l[i], l_addr, false, false, i - 2, idct->buffer_height);
355bf215546Sopenharmony_ci   }
356bf215546Sopenharmony_ci
357bf215546Sopenharmony_ci   for (i = 0; i < 4; ++i) {
358bf215546Sopenharmony_ci      struct ureg_src s_addr[2];
359bf215546Sopenharmony_ci      s_addr[0] = ureg_src(l[i][0]);
360bf215546Sopenharmony_ci      s_addr[1] = ureg_src(l[i][1]);
361bf215546Sopenharmony_ci      fetch_four(shader, l[i], s_addr, ureg_DECL_sampler(shader, 0), false);
362bf215546Sopenharmony_ci   }
363bf215546Sopenharmony_ci
364bf215546Sopenharmony_ci   for (i = 0; i < idct->nr_of_render_targets; ++i) {
365bf215546Sopenharmony_ci      struct ureg_src s_addr[2];
366bf215546Sopenharmony_ci
367bf215546Sopenharmony_ci      increment_addr(shader, r, r_addr, true, true, i - (signed)idct->nr_of_render_targets / 2, VL_BLOCK_HEIGHT);
368bf215546Sopenharmony_ci
369bf215546Sopenharmony_ci      s_addr[0] = ureg_src(r[0]);
370bf215546Sopenharmony_ci      s_addr[1] = ureg_src(r[1]);
371bf215546Sopenharmony_ci      fetch_four(shader, r, s_addr, ureg_DECL_sampler(shader, 1), false);
372bf215546Sopenharmony_ci
373bf215546Sopenharmony_ci      for (j = 0; j < 4; ++j) {
374bf215546Sopenharmony_ci         matrix_mul(shader, ureg_writemask(fragment[i], TGSI_WRITEMASK_X << j), l[j], r);
375bf215546Sopenharmony_ci      }
376bf215546Sopenharmony_ci   }
377bf215546Sopenharmony_ci
378bf215546Sopenharmony_ci   for (i = 0; i < 4; ++i) {
379bf215546Sopenharmony_ci      ureg_release_temporary(shader, l[i][0]);
380bf215546Sopenharmony_ci      ureg_release_temporary(shader, l[i][1]);
381bf215546Sopenharmony_ci   }
382bf215546Sopenharmony_ci   ureg_release_temporary(shader, r[0]);
383bf215546Sopenharmony_ci   ureg_release_temporary(shader, r[1]);
384bf215546Sopenharmony_ci
385bf215546Sopenharmony_ci   ureg_END(shader);
386bf215546Sopenharmony_ci
387bf215546Sopenharmony_ci   FREE(fragment);
388bf215546Sopenharmony_ci
389bf215546Sopenharmony_ci   return ureg_create_shader_and_destroy(shader, idct->pipe);
390bf215546Sopenharmony_ci}
391bf215546Sopenharmony_ci
392bf215546Sopenharmony_civoid
393bf215546Sopenharmony_civl_idct_stage2_vert_shader(struct vl_idct *idct, struct ureg_program *shader,
394bf215546Sopenharmony_ci                           unsigned first_output, struct ureg_dst tex)
395bf215546Sopenharmony_ci{
396bf215546Sopenharmony_ci   struct ureg_src vrect, vpos;
397bf215546Sopenharmony_ci   struct ureg_src scale;
398bf215546Sopenharmony_ci   struct ureg_dst t_start;
399bf215546Sopenharmony_ci   struct ureg_dst o_l_addr[2], o_r_addr[2];
400bf215546Sopenharmony_ci
401bf215546Sopenharmony_ci   vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
402bf215546Sopenharmony_ci   vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
403bf215546Sopenharmony_ci
404bf215546Sopenharmony_ci   t_start = ureg_DECL_temporary(shader);
405bf215546Sopenharmony_ci
406bf215546Sopenharmony_ci   --first_output;
407bf215546Sopenharmony_ci
408bf215546Sopenharmony_ci   o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_L_ADDR0);
409bf215546Sopenharmony_ci   o_l_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_L_ADDR1);
410bf215546Sopenharmony_ci
411bf215546Sopenharmony_ci   o_r_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_R_ADDR0);
412bf215546Sopenharmony_ci   o_r_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_R_ADDR1);
413bf215546Sopenharmony_ci
414bf215546Sopenharmony_ci   scale = ureg_imm2f(shader,
415bf215546Sopenharmony_ci      (float)VL_BLOCK_WIDTH / idct->buffer_width,
416bf215546Sopenharmony_ci      (float)VL_BLOCK_HEIGHT / idct->buffer_height);
417bf215546Sopenharmony_ci
418bf215546Sopenharmony_ci   ureg_MUL(shader, ureg_writemask(tex, TGSI_WRITEMASK_Z),
419bf215546Sopenharmony_ci      ureg_scalar(vrect, TGSI_SWIZZLE_X),
420bf215546Sopenharmony_ci      ureg_imm1f(shader, VL_BLOCK_WIDTH / idct->nr_of_render_targets));
421bf215546Sopenharmony_ci   ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), vpos, scale);
422bf215546Sopenharmony_ci
423bf215546Sopenharmony_ci   calc_addr(shader, o_l_addr, vrect, ureg_imm1f(shader, 0.0f), false, false, VL_BLOCK_WIDTH / 4);
424bf215546Sopenharmony_ci   calc_addr(shader, o_r_addr, ureg_src(tex), ureg_src(t_start), true, false, idct->buffer_height / 4);
425bf215546Sopenharmony_ci
426bf215546Sopenharmony_ci   ureg_MOV(shader, ureg_writemask(o_r_addr[0], TGSI_WRITEMASK_Z), ureg_src(tex));
427bf215546Sopenharmony_ci   ureg_MOV(shader, ureg_writemask(o_r_addr[1], TGSI_WRITEMASK_Z), ureg_src(tex));
428bf215546Sopenharmony_ci}
429bf215546Sopenharmony_ci
430bf215546Sopenharmony_civoid
431bf215546Sopenharmony_civl_idct_stage2_frag_shader(struct vl_idct *idct, struct ureg_program *shader,
432bf215546Sopenharmony_ci                           unsigned first_input, struct ureg_dst fragment)
433bf215546Sopenharmony_ci{
434bf215546Sopenharmony_ci   struct ureg_src l_addr[2], r_addr[2];
435bf215546Sopenharmony_ci
436bf215546Sopenharmony_ci   struct ureg_dst l[2], r[2];
437bf215546Sopenharmony_ci
438bf215546Sopenharmony_ci   --first_input;
439bf215546Sopenharmony_ci
440bf215546Sopenharmony_ci   l_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR);
441bf215546Sopenharmony_ci   l_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR);
442bf215546Sopenharmony_ci
443bf215546Sopenharmony_ci   r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR);
444bf215546Sopenharmony_ci   r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR);
445bf215546Sopenharmony_ci
446bf215546Sopenharmony_ci   l[0] = ureg_DECL_temporary(shader);
447bf215546Sopenharmony_ci   l[1] = ureg_DECL_temporary(shader);
448bf215546Sopenharmony_ci   r[0] = ureg_DECL_temporary(shader);
449bf215546Sopenharmony_ci   r[1] = ureg_DECL_temporary(shader);
450bf215546Sopenharmony_ci
451bf215546Sopenharmony_ci   fetch_four(shader, l, l_addr, ureg_DECL_sampler(shader, 1), false);
452bf215546Sopenharmony_ci   fetch_four(shader, r, r_addr, ureg_DECL_sampler(shader, 0), true);
453bf215546Sopenharmony_ci
454bf215546Sopenharmony_ci   matrix_mul(shader, fragment, l, r);
455bf215546Sopenharmony_ci
456bf215546Sopenharmony_ci   ureg_release_temporary(shader, l[0]);
457bf215546Sopenharmony_ci   ureg_release_temporary(shader, l[1]);
458bf215546Sopenharmony_ci   ureg_release_temporary(shader, r[0]);
459bf215546Sopenharmony_ci   ureg_release_temporary(shader, r[1]);
460bf215546Sopenharmony_ci}
461bf215546Sopenharmony_ci
462bf215546Sopenharmony_cistatic bool
463bf215546Sopenharmony_ciinit_shaders(struct vl_idct *idct)
464bf215546Sopenharmony_ci{
465bf215546Sopenharmony_ci   idct->vs_mismatch = create_mismatch_vert_shader(idct);
466bf215546Sopenharmony_ci   if (!idct->vs_mismatch)
467bf215546Sopenharmony_ci      goto error_vs_mismatch;
468bf215546Sopenharmony_ci
469bf215546Sopenharmony_ci   idct->fs_mismatch = create_mismatch_frag_shader(idct);
470bf215546Sopenharmony_ci   if (!idct->fs_mismatch)
471bf215546Sopenharmony_ci      goto error_fs_mismatch;
472bf215546Sopenharmony_ci
473bf215546Sopenharmony_ci   idct->vs = create_stage1_vert_shader(idct);
474bf215546Sopenharmony_ci   if (!idct->vs)
475bf215546Sopenharmony_ci      goto error_vs;
476bf215546Sopenharmony_ci
477bf215546Sopenharmony_ci   idct->fs = create_stage1_frag_shader(idct);
478bf215546Sopenharmony_ci   if (!idct->fs)
479bf215546Sopenharmony_ci      goto error_fs;
480bf215546Sopenharmony_ci
481bf215546Sopenharmony_ci   return true;
482bf215546Sopenharmony_ci
483bf215546Sopenharmony_cierror_fs:
484bf215546Sopenharmony_ci   idct->pipe->delete_vs_state(idct->pipe, idct->vs);
485bf215546Sopenharmony_ci
486bf215546Sopenharmony_cierror_vs:
487bf215546Sopenharmony_ci   idct->pipe->delete_vs_state(idct->pipe, idct->vs_mismatch);
488bf215546Sopenharmony_ci
489bf215546Sopenharmony_cierror_fs_mismatch:
490bf215546Sopenharmony_ci   idct->pipe->delete_vs_state(idct->pipe, idct->fs);
491bf215546Sopenharmony_ci
492bf215546Sopenharmony_cierror_vs_mismatch:
493bf215546Sopenharmony_ci   return false;
494bf215546Sopenharmony_ci}
495bf215546Sopenharmony_ci
496bf215546Sopenharmony_cistatic void
497bf215546Sopenharmony_cicleanup_shaders(struct vl_idct *idct)
498bf215546Sopenharmony_ci{
499bf215546Sopenharmony_ci   idct->pipe->delete_vs_state(idct->pipe, idct->vs_mismatch);
500bf215546Sopenharmony_ci   idct->pipe->delete_fs_state(idct->pipe, idct->fs_mismatch);
501bf215546Sopenharmony_ci   idct->pipe->delete_vs_state(idct->pipe, idct->vs);
502bf215546Sopenharmony_ci   idct->pipe->delete_fs_state(idct->pipe, idct->fs);
503bf215546Sopenharmony_ci}
504bf215546Sopenharmony_ci
505bf215546Sopenharmony_cistatic bool
506bf215546Sopenharmony_ciinit_state(struct vl_idct *idct)
507bf215546Sopenharmony_ci{
508bf215546Sopenharmony_ci   struct pipe_blend_state blend;
509bf215546Sopenharmony_ci   struct pipe_rasterizer_state rs_state;
510bf215546Sopenharmony_ci   struct pipe_sampler_state sampler;
511bf215546Sopenharmony_ci   unsigned i;
512bf215546Sopenharmony_ci
513bf215546Sopenharmony_ci   assert(idct);
514bf215546Sopenharmony_ci
515bf215546Sopenharmony_ci   memset(&rs_state, 0, sizeof(rs_state));
516bf215546Sopenharmony_ci   rs_state.point_size = 1;
517bf215546Sopenharmony_ci   rs_state.half_pixel_center = true;
518bf215546Sopenharmony_ci   rs_state.bottom_edge_rule = true;
519bf215546Sopenharmony_ci   rs_state.depth_clip_near = 1;
520bf215546Sopenharmony_ci   rs_state.depth_clip_far = 1;
521bf215546Sopenharmony_ci
522bf215546Sopenharmony_ci   idct->rs_state = idct->pipe->create_rasterizer_state(idct->pipe, &rs_state);
523bf215546Sopenharmony_ci   if (!idct->rs_state)
524bf215546Sopenharmony_ci      goto error_rs_state;
525bf215546Sopenharmony_ci
526bf215546Sopenharmony_ci   memset(&blend, 0, sizeof blend);
527bf215546Sopenharmony_ci
528bf215546Sopenharmony_ci   blend.independent_blend_enable = 0;
529bf215546Sopenharmony_ci   blend.rt[0].blend_enable = 0;
530bf215546Sopenharmony_ci   blend.rt[0].rgb_func = PIPE_BLEND_ADD;
531bf215546Sopenharmony_ci   blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE;
532bf215546Sopenharmony_ci   blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE;
533bf215546Sopenharmony_ci   blend.rt[0].alpha_func = PIPE_BLEND_ADD;
534bf215546Sopenharmony_ci   blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE;
535bf215546Sopenharmony_ci   blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE;
536bf215546Sopenharmony_ci   blend.logicop_enable = 0;
537bf215546Sopenharmony_ci   blend.logicop_func = PIPE_LOGICOP_CLEAR;
538bf215546Sopenharmony_ci   /* Needed to allow color writes to FB, even if blending disabled */
539bf215546Sopenharmony_ci   blend.rt[0].colormask = PIPE_MASK_RGBA;
540bf215546Sopenharmony_ci   blend.dither = 0;
541bf215546Sopenharmony_ci   idct->blend = idct->pipe->create_blend_state(idct->pipe, &blend);
542bf215546Sopenharmony_ci   if (!idct->blend)
543bf215546Sopenharmony_ci      goto error_blend;
544bf215546Sopenharmony_ci
545bf215546Sopenharmony_ci   for (i = 0; i < 2; ++i) {
546bf215546Sopenharmony_ci      memset(&sampler, 0, sizeof(sampler));
547bf215546Sopenharmony_ci      sampler.wrap_s = PIPE_TEX_WRAP_REPEAT;
548bf215546Sopenharmony_ci      sampler.wrap_t = PIPE_TEX_WRAP_REPEAT;
549bf215546Sopenharmony_ci      sampler.wrap_r = PIPE_TEX_WRAP_REPEAT;
550bf215546Sopenharmony_ci      sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
551bf215546Sopenharmony_ci      sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
552bf215546Sopenharmony_ci      sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
553bf215546Sopenharmony_ci      sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
554bf215546Sopenharmony_ci      sampler.compare_func = PIPE_FUNC_ALWAYS;
555bf215546Sopenharmony_ci      sampler.normalized_coords = 1;
556bf215546Sopenharmony_ci      idct->samplers[i] = idct->pipe->create_sampler_state(idct->pipe, &sampler);
557bf215546Sopenharmony_ci      if (!idct->samplers[i])
558bf215546Sopenharmony_ci         goto error_samplers;
559bf215546Sopenharmony_ci   }
560bf215546Sopenharmony_ci
561bf215546Sopenharmony_ci   return true;
562bf215546Sopenharmony_ci
563bf215546Sopenharmony_cierror_samplers:
564bf215546Sopenharmony_ci   for (i = 0; i < 2; ++i)
565bf215546Sopenharmony_ci      if (idct->samplers[i])
566bf215546Sopenharmony_ci         idct->pipe->delete_sampler_state(idct->pipe, idct->samplers[i]);
567bf215546Sopenharmony_ci
568bf215546Sopenharmony_ci   idct->pipe->delete_rasterizer_state(idct->pipe, idct->rs_state);
569bf215546Sopenharmony_ci
570bf215546Sopenharmony_cierror_blend:
571bf215546Sopenharmony_ci   idct->pipe->delete_blend_state(idct->pipe, idct->blend);
572bf215546Sopenharmony_ci
573bf215546Sopenharmony_cierror_rs_state:
574bf215546Sopenharmony_ci   return false;
575bf215546Sopenharmony_ci}
576bf215546Sopenharmony_ci
577bf215546Sopenharmony_cistatic void
578bf215546Sopenharmony_cicleanup_state(struct vl_idct *idct)
579bf215546Sopenharmony_ci{
580bf215546Sopenharmony_ci   unsigned i;
581bf215546Sopenharmony_ci
582bf215546Sopenharmony_ci   for (i = 0; i < 2; ++i)
583bf215546Sopenharmony_ci      idct->pipe->delete_sampler_state(idct->pipe, idct->samplers[i]);
584bf215546Sopenharmony_ci
585bf215546Sopenharmony_ci   idct->pipe->delete_rasterizer_state(idct->pipe, idct->rs_state);
586bf215546Sopenharmony_ci   idct->pipe->delete_blend_state(idct->pipe, idct->blend);
587bf215546Sopenharmony_ci}
588bf215546Sopenharmony_ci
589bf215546Sopenharmony_cistatic bool
590bf215546Sopenharmony_ciinit_source(struct vl_idct *idct, struct vl_idct_buffer *buffer)
591bf215546Sopenharmony_ci{
592bf215546Sopenharmony_ci   struct pipe_resource *tex;
593bf215546Sopenharmony_ci   struct pipe_surface surf_templ;
594bf215546Sopenharmony_ci
595bf215546Sopenharmony_ci   assert(idct && buffer);
596bf215546Sopenharmony_ci
597bf215546Sopenharmony_ci   tex = buffer->sampler_views.individual.source->texture;
598bf215546Sopenharmony_ci
599bf215546Sopenharmony_ci   buffer->fb_state_mismatch.width = tex->width0;
600bf215546Sopenharmony_ci   buffer->fb_state_mismatch.height = tex->height0;
601bf215546Sopenharmony_ci   buffer->fb_state_mismatch.nr_cbufs = 1;
602bf215546Sopenharmony_ci
603bf215546Sopenharmony_ci   memset(&surf_templ, 0, sizeof(surf_templ));
604bf215546Sopenharmony_ci   surf_templ.format = tex->format;
605bf215546Sopenharmony_ci   surf_templ.u.tex.first_layer = 0;
606bf215546Sopenharmony_ci   surf_templ.u.tex.last_layer = 0;
607bf215546Sopenharmony_ci   buffer->fb_state_mismatch.cbufs[0] = idct->pipe->create_surface(idct->pipe, tex, &surf_templ);
608bf215546Sopenharmony_ci
609bf215546Sopenharmony_ci   buffer->viewport_mismatch.scale[0] = tex->width0;
610bf215546Sopenharmony_ci   buffer->viewport_mismatch.scale[1] = tex->height0;
611bf215546Sopenharmony_ci   buffer->viewport_mismatch.scale[2] = 1;
612bf215546Sopenharmony_ci   buffer->viewport_mismatch.swizzle_x = PIPE_VIEWPORT_SWIZZLE_POSITIVE_X;
613bf215546Sopenharmony_ci   buffer->viewport_mismatch.swizzle_y = PIPE_VIEWPORT_SWIZZLE_POSITIVE_Y;
614bf215546Sopenharmony_ci   buffer->viewport_mismatch.swizzle_z = PIPE_VIEWPORT_SWIZZLE_POSITIVE_Z;
615bf215546Sopenharmony_ci   buffer->viewport_mismatch.swizzle_w = PIPE_VIEWPORT_SWIZZLE_POSITIVE_W;
616bf215546Sopenharmony_ci
617bf215546Sopenharmony_ci   return true;
618bf215546Sopenharmony_ci}
619bf215546Sopenharmony_ci
620bf215546Sopenharmony_cistatic void
621bf215546Sopenharmony_cicleanup_source(struct vl_idct_buffer *buffer)
622bf215546Sopenharmony_ci{
623bf215546Sopenharmony_ci   assert(buffer);
624bf215546Sopenharmony_ci
625bf215546Sopenharmony_ci   pipe_surface_reference(&buffer->fb_state_mismatch.cbufs[0], NULL);
626bf215546Sopenharmony_ci
627bf215546Sopenharmony_ci   pipe_sampler_view_reference(&buffer->sampler_views.individual.source, NULL);
628bf215546Sopenharmony_ci}
629bf215546Sopenharmony_ci
630bf215546Sopenharmony_cistatic bool
631bf215546Sopenharmony_ciinit_intermediate(struct vl_idct *idct, struct vl_idct_buffer *buffer)
632bf215546Sopenharmony_ci{
633bf215546Sopenharmony_ci   struct pipe_resource *tex;
634bf215546Sopenharmony_ci   struct pipe_surface surf_templ;
635bf215546Sopenharmony_ci   unsigned i;
636bf215546Sopenharmony_ci
637bf215546Sopenharmony_ci   assert(idct && buffer);
638bf215546Sopenharmony_ci
639bf215546Sopenharmony_ci   tex = buffer->sampler_views.individual.intermediate->texture;
640bf215546Sopenharmony_ci
641bf215546Sopenharmony_ci   buffer->fb_state.width = tex->width0;
642bf215546Sopenharmony_ci   buffer->fb_state.height = tex->height0;
643bf215546Sopenharmony_ci   buffer->fb_state.nr_cbufs = idct->nr_of_render_targets;
644bf215546Sopenharmony_ci   for(i = 0; i < idct->nr_of_render_targets; ++i) {
645bf215546Sopenharmony_ci      memset(&surf_templ, 0, sizeof(surf_templ));
646bf215546Sopenharmony_ci      surf_templ.format = tex->format;
647bf215546Sopenharmony_ci      surf_templ.u.tex.first_layer = i;
648bf215546Sopenharmony_ci      surf_templ.u.tex.last_layer = i;
649bf215546Sopenharmony_ci      buffer->fb_state.cbufs[i] = idct->pipe->create_surface(
650bf215546Sopenharmony_ci         idct->pipe, tex, &surf_templ);
651bf215546Sopenharmony_ci
652bf215546Sopenharmony_ci      if (!buffer->fb_state.cbufs[i])
653bf215546Sopenharmony_ci         goto error_surfaces;
654bf215546Sopenharmony_ci   }
655bf215546Sopenharmony_ci
656bf215546Sopenharmony_ci   buffer->viewport.scale[0] = tex->width0;
657bf215546Sopenharmony_ci   buffer->viewport.scale[1] = tex->height0;
658bf215546Sopenharmony_ci   buffer->viewport.scale[2] = 1;
659bf215546Sopenharmony_ci   buffer->viewport.swizzle_x = PIPE_VIEWPORT_SWIZZLE_POSITIVE_X;
660bf215546Sopenharmony_ci   buffer->viewport.swizzle_y = PIPE_VIEWPORT_SWIZZLE_POSITIVE_Y;
661bf215546Sopenharmony_ci   buffer->viewport.swizzle_z = PIPE_VIEWPORT_SWIZZLE_POSITIVE_Z;
662bf215546Sopenharmony_ci   buffer->viewport.swizzle_w = PIPE_VIEWPORT_SWIZZLE_POSITIVE_W;
663bf215546Sopenharmony_ci
664bf215546Sopenharmony_ci   return true;
665bf215546Sopenharmony_ci
666bf215546Sopenharmony_cierror_surfaces:
667bf215546Sopenharmony_ci   for(i = 0; i < idct->nr_of_render_targets; ++i)
668bf215546Sopenharmony_ci      pipe_surface_reference(&buffer->fb_state.cbufs[i], NULL);
669bf215546Sopenharmony_ci
670bf215546Sopenharmony_ci   return false;
671bf215546Sopenharmony_ci}
672bf215546Sopenharmony_ci
673bf215546Sopenharmony_cistatic void
674bf215546Sopenharmony_cicleanup_intermediate(struct vl_idct_buffer *buffer)
675bf215546Sopenharmony_ci{
676bf215546Sopenharmony_ci   unsigned i;
677bf215546Sopenharmony_ci
678bf215546Sopenharmony_ci   assert(buffer);
679bf215546Sopenharmony_ci
680bf215546Sopenharmony_ci   for(i = 0; i < PIPE_MAX_COLOR_BUFS; ++i)
681bf215546Sopenharmony_ci      pipe_surface_reference(&buffer->fb_state.cbufs[i], NULL);
682bf215546Sopenharmony_ci
683bf215546Sopenharmony_ci   pipe_sampler_view_reference(&buffer->sampler_views.individual.intermediate, NULL);
684bf215546Sopenharmony_ci}
685bf215546Sopenharmony_ci
686bf215546Sopenharmony_cistruct pipe_sampler_view *
687bf215546Sopenharmony_civl_idct_upload_matrix(struct pipe_context *pipe, float scale)
688bf215546Sopenharmony_ci{
689bf215546Sopenharmony_ci   struct pipe_resource tex_templ, *matrix;
690bf215546Sopenharmony_ci   struct pipe_sampler_view sv_templ, *sv;
691bf215546Sopenharmony_ci   struct pipe_transfer *buf_transfer;
692bf215546Sopenharmony_ci   unsigned i, j, pitch;
693bf215546Sopenharmony_ci   float *f;
694bf215546Sopenharmony_ci
695bf215546Sopenharmony_ci   struct pipe_box rect =
696bf215546Sopenharmony_ci   {
697bf215546Sopenharmony_ci      0, 0, 0,
698bf215546Sopenharmony_ci      VL_BLOCK_WIDTH / 4,
699bf215546Sopenharmony_ci      VL_BLOCK_HEIGHT,
700bf215546Sopenharmony_ci      1
701bf215546Sopenharmony_ci   };
702bf215546Sopenharmony_ci
703bf215546Sopenharmony_ci   assert(pipe);
704bf215546Sopenharmony_ci
705bf215546Sopenharmony_ci   memset(&tex_templ, 0, sizeof(tex_templ));
706bf215546Sopenharmony_ci   tex_templ.target = PIPE_TEXTURE_2D;
707bf215546Sopenharmony_ci   tex_templ.format = PIPE_FORMAT_R32G32B32A32_FLOAT;
708bf215546Sopenharmony_ci   tex_templ.last_level = 0;
709bf215546Sopenharmony_ci   tex_templ.width0 = 2;
710bf215546Sopenharmony_ci   tex_templ.height0 = 8;
711bf215546Sopenharmony_ci   tex_templ.depth0 = 1;
712bf215546Sopenharmony_ci   tex_templ.array_size = 1;
713bf215546Sopenharmony_ci   tex_templ.usage = PIPE_USAGE_IMMUTABLE;
714bf215546Sopenharmony_ci   tex_templ.bind = PIPE_BIND_SAMPLER_VIEW;
715bf215546Sopenharmony_ci   tex_templ.flags = 0;
716bf215546Sopenharmony_ci
717bf215546Sopenharmony_ci   matrix = pipe->screen->resource_create(pipe->screen, &tex_templ);
718bf215546Sopenharmony_ci   if (!matrix)
719bf215546Sopenharmony_ci      goto error_matrix;
720bf215546Sopenharmony_ci
721bf215546Sopenharmony_ci   f = pipe->texture_map(pipe, matrix, 0,
722bf215546Sopenharmony_ci                                     PIPE_MAP_WRITE |
723bf215546Sopenharmony_ci                                     PIPE_MAP_DISCARD_RANGE,
724bf215546Sopenharmony_ci                                     &rect, &buf_transfer);
725bf215546Sopenharmony_ci   if (!f)
726bf215546Sopenharmony_ci      goto error_map;
727bf215546Sopenharmony_ci
728bf215546Sopenharmony_ci   pitch = buf_transfer->stride / sizeof(float);
729bf215546Sopenharmony_ci
730bf215546Sopenharmony_ci   for(i = 0; i < VL_BLOCK_HEIGHT; ++i)
731bf215546Sopenharmony_ci      for(j = 0; j < VL_BLOCK_WIDTH; ++j)
732bf215546Sopenharmony_ci         // transpose and scale
733bf215546Sopenharmony_ci         f[i * pitch + j] = ((const float (*)[8])const_matrix)[j][i] * scale;
734bf215546Sopenharmony_ci
735bf215546Sopenharmony_ci   pipe->texture_unmap(pipe, buf_transfer);
736bf215546Sopenharmony_ci
737bf215546Sopenharmony_ci   memset(&sv_templ, 0, sizeof(sv_templ));
738bf215546Sopenharmony_ci   u_sampler_view_default_template(&sv_templ, matrix, matrix->format);
739bf215546Sopenharmony_ci   sv = pipe->create_sampler_view(pipe, matrix, &sv_templ);
740bf215546Sopenharmony_ci   pipe_resource_reference(&matrix, NULL);
741bf215546Sopenharmony_ci   if (!sv)
742bf215546Sopenharmony_ci      goto error_map;
743bf215546Sopenharmony_ci
744bf215546Sopenharmony_ci   return sv;
745bf215546Sopenharmony_ci
746bf215546Sopenharmony_cierror_map:
747bf215546Sopenharmony_ci   pipe_resource_reference(&matrix, NULL);
748bf215546Sopenharmony_ci
749bf215546Sopenharmony_cierror_matrix:
750bf215546Sopenharmony_ci   return NULL;
751bf215546Sopenharmony_ci}
752bf215546Sopenharmony_ci
753bf215546Sopenharmony_cibool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe,
754bf215546Sopenharmony_ci                  unsigned buffer_width, unsigned buffer_height,
755bf215546Sopenharmony_ci                  unsigned nr_of_render_targets,
756bf215546Sopenharmony_ci                  struct pipe_sampler_view *matrix,
757bf215546Sopenharmony_ci                  struct pipe_sampler_view *transpose)
758bf215546Sopenharmony_ci{
759bf215546Sopenharmony_ci   assert(idct && pipe);
760bf215546Sopenharmony_ci   assert(matrix && transpose);
761bf215546Sopenharmony_ci
762bf215546Sopenharmony_ci   idct->pipe = pipe;
763bf215546Sopenharmony_ci   idct->buffer_width = buffer_width;
764bf215546Sopenharmony_ci   idct->buffer_height = buffer_height;
765bf215546Sopenharmony_ci   idct->nr_of_render_targets = nr_of_render_targets;
766bf215546Sopenharmony_ci
767bf215546Sopenharmony_ci   pipe_sampler_view_reference(&idct->matrix, matrix);
768bf215546Sopenharmony_ci   pipe_sampler_view_reference(&idct->transpose, transpose);
769bf215546Sopenharmony_ci
770bf215546Sopenharmony_ci   if(!init_shaders(idct))
771bf215546Sopenharmony_ci      return false;
772bf215546Sopenharmony_ci
773bf215546Sopenharmony_ci   if(!init_state(idct)) {
774bf215546Sopenharmony_ci      cleanup_shaders(idct);
775bf215546Sopenharmony_ci      return false;
776bf215546Sopenharmony_ci   }
777bf215546Sopenharmony_ci
778bf215546Sopenharmony_ci   return true;
779bf215546Sopenharmony_ci}
780bf215546Sopenharmony_ci
781bf215546Sopenharmony_civoid
782bf215546Sopenharmony_civl_idct_cleanup(struct vl_idct *idct)
783bf215546Sopenharmony_ci{
784bf215546Sopenharmony_ci   cleanup_shaders(idct);
785bf215546Sopenharmony_ci   cleanup_state(idct);
786bf215546Sopenharmony_ci
787bf215546Sopenharmony_ci   pipe_sampler_view_reference(&idct->matrix, NULL);
788bf215546Sopenharmony_ci   pipe_sampler_view_reference(&idct->transpose, NULL);
789bf215546Sopenharmony_ci}
790bf215546Sopenharmony_ci
791bf215546Sopenharmony_cibool
792bf215546Sopenharmony_civl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer,
793bf215546Sopenharmony_ci                    struct pipe_sampler_view *source,
794bf215546Sopenharmony_ci                    struct pipe_sampler_view *intermediate)
795bf215546Sopenharmony_ci{
796bf215546Sopenharmony_ci   assert(buffer && idct);
797bf215546Sopenharmony_ci   assert(source && intermediate);
798bf215546Sopenharmony_ci
799bf215546Sopenharmony_ci   memset(buffer, 0, sizeof(struct vl_idct_buffer));
800bf215546Sopenharmony_ci
801bf215546Sopenharmony_ci   pipe_sampler_view_reference(&buffer->sampler_views.individual.matrix, idct->matrix);
802bf215546Sopenharmony_ci   pipe_sampler_view_reference(&buffer->sampler_views.individual.source, source);
803bf215546Sopenharmony_ci   pipe_sampler_view_reference(&buffer->sampler_views.individual.transpose, idct->transpose);
804bf215546Sopenharmony_ci   pipe_sampler_view_reference(&buffer->sampler_views.individual.intermediate, intermediate);
805bf215546Sopenharmony_ci
806bf215546Sopenharmony_ci   if (!init_source(idct, buffer))
807bf215546Sopenharmony_ci      return false;
808bf215546Sopenharmony_ci
809bf215546Sopenharmony_ci   if (!init_intermediate(idct, buffer))
810bf215546Sopenharmony_ci      return false;
811bf215546Sopenharmony_ci
812bf215546Sopenharmony_ci   return true;
813bf215546Sopenharmony_ci}
814bf215546Sopenharmony_ci
815bf215546Sopenharmony_civoid
816bf215546Sopenharmony_civl_idct_cleanup_buffer(struct vl_idct_buffer *buffer)
817bf215546Sopenharmony_ci{
818bf215546Sopenharmony_ci   assert(buffer);
819bf215546Sopenharmony_ci
820bf215546Sopenharmony_ci   cleanup_source(buffer);
821bf215546Sopenharmony_ci   cleanup_intermediate(buffer);
822bf215546Sopenharmony_ci
823bf215546Sopenharmony_ci   pipe_sampler_view_reference(&buffer->sampler_views.individual.matrix, NULL);
824bf215546Sopenharmony_ci   pipe_sampler_view_reference(&buffer->sampler_views.individual.transpose, NULL);
825bf215546Sopenharmony_ci}
826bf215546Sopenharmony_ci
827bf215546Sopenharmony_civoid
828bf215546Sopenharmony_civl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer, unsigned num_instances)
829bf215546Sopenharmony_ci{
830bf215546Sopenharmony_ci   assert(buffer);
831bf215546Sopenharmony_ci
832bf215546Sopenharmony_ci   idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state);
833bf215546Sopenharmony_ci   idct->pipe->bind_blend_state(idct->pipe, idct->blend);
834bf215546Sopenharmony_ci
835bf215546Sopenharmony_ci   idct->pipe->bind_sampler_states(idct->pipe, PIPE_SHADER_FRAGMENT,
836bf215546Sopenharmony_ci                                   0, 2, idct->samplers);
837bf215546Sopenharmony_ci
838bf215546Sopenharmony_ci   idct->pipe->set_sampler_views(idct->pipe, PIPE_SHADER_FRAGMENT, 0, 2, 0,
839bf215546Sopenharmony_ci                                 false, buffer->sampler_views.stage[0]);
840bf215546Sopenharmony_ci
841bf215546Sopenharmony_ci   /* mismatch control */
842bf215546Sopenharmony_ci   idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state_mismatch);
843bf215546Sopenharmony_ci   idct->pipe->set_viewport_states(idct->pipe, 0, 1, &buffer->viewport_mismatch);
844bf215546Sopenharmony_ci   idct->pipe->bind_vs_state(idct->pipe, idct->vs_mismatch);
845bf215546Sopenharmony_ci   idct->pipe->bind_fs_state(idct->pipe, idct->fs_mismatch);
846bf215546Sopenharmony_ci   util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_POINTS, 0, 1, 0, num_instances);
847bf215546Sopenharmony_ci
848bf215546Sopenharmony_ci   /* first stage */
849bf215546Sopenharmony_ci   idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state);
850bf215546Sopenharmony_ci   idct->pipe->set_viewport_states(idct->pipe, 0, 1, &buffer->viewport);
851bf215546Sopenharmony_ci   idct->pipe->bind_vs_state(idct->pipe, idct->vs);
852bf215546Sopenharmony_ci   idct->pipe->bind_fs_state(idct->pipe, idct->fs);
853bf215546Sopenharmony_ci   util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances);
854bf215546Sopenharmony_ci}
855bf215546Sopenharmony_ci
856bf215546Sopenharmony_civoid
857bf215546Sopenharmony_civl_idct_prepare_stage2(struct vl_idct *idct, struct vl_idct_buffer *buffer)
858bf215546Sopenharmony_ci{
859bf215546Sopenharmony_ci   assert(buffer);
860bf215546Sopenharmony_ci
861bf215546Sopenharmony_ci   /* second stage */
862bf215546Sopenharmony_ci   idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state);
863bf215546Sopenharmony_ci   idct->pipe->bind_sampler_states(idct->pipe, PIPE_SHADER_FRAGMENT,
864bf215546Sopenharmony_ci                                   0, 2, idct->samplers);
865bf215546Sopenharmony_ci   idct->pipe->set_sampler_views(idct->pipe, PIPE_SHADER_FRAGMENT,
866bf215546Sopenharmony_ci                                 0, 2, 0, false, buffer->sampler_views.stage[1]);
867bf215546Sopenharmony_ci}
868bf215546Sopenharmony_ci
869