1/**************************************************************************
2 *
3 * Copyright 2003 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28#include "i915_batch.h"
29#include "i915_context.h"
30#include "i915_debug.h"
31#include "i915_fpc.h"
32#include "i915_reg.h"
33#include "i915_resource.h"
34
35#include "pipe/p_context.h"
36#include "pipe/p_defines.h"
37#include "pipe/p_format.h"
38
39#include "util/format/u_format.h"
40#include "util/u_math.h"
41#include "util/u_memory.h"
42
43struct i915_tracked_hw_state {
44   const char *name;
45   void (*validate)(struct i915_context *, unsigned *batch_space);
46   void (*emit)(struct i915_context *);
47   unsigned dirty, batch_space;
48};
49
50static void
51validate_flush(struct i915_context *i915, unsigned *batch_space)
52{
53   *batch_space = i915->flush_dirty ? 1 : 0;
54}
55
56static void
57emit_flush(struct i915_context *i915)
58{
59   /* Cache handling is very cheap atm. State handling can request to flushes:
60    * - I915_FLUSH_CACHE which is a flush everything request and
61    * - I915_PIPELINE_FLUSH which is specifically for the draw_offset flush.
62    * Because the cache handling is so dumb, no explicit "invalidate map cache".
63    * Also, the first is a strict superset of the latter, so the following logic
64    * works. */
65   if (i915->flush_dirty & I915_FLUSH_CACHE)
66      OUT_BATCH(MI_FLUSH | FLUSH_MAP_CACHE);
67   else if (i915->flush_dirty & I915_PIPELINE_FLUSH)
68      OUT_BATCH(MI_FLUSH | INHIBIT_FLUSH_RENDER_CACHE);
69}
70
71uint32_t invariant_state[] = {
72   _3DSTATE_AA_CMD | AA_LINE_ECAAR_WIDTH_ENABLE | AA_LINE_ECAAR_WIDTH_1_0 |
73      AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0,
74
75   _3DSTATE_DFLT_DIFFUSE_CMD, 0,
76
77   _3DSTATE_DFLT_SPEC_CMD, 0,
78
79   _3DSTATE_DFLT_Z_CMD, 0,
80
81   _3DSTATE_COORD_SET_BINDINGS | CSB_TCB(0, 0) | CSB_TCB(1, 1) | CSB_TCB(2, 2) |
82      CSB_TCB(3, 3) | CSB_TCB(4, 4) | CSB_TCB(5, 5) | CSB_TCB(6, 6) |
83      CSB_TCB(7, 7),
84
85   _3DSTATE_RASTER_RULES_CMD | ENABLE_POINT_RASTER_RULE |
86      OGL_POINT_RASTER_RULE | ENABLE_LINE_STRIP_PROVOKE_VRTX |
87      ENABLE_TRI_FAN_PROVOKE_VRTX | LINE_STRIP_PROVOKE_VRTX(1) |
88      TRI_FAN_PROVOKE_VRTX(2) | ENABLE_TEXKILL_3D_4D | TEXKILL_4D,
89
90   _3DSTATE_DEPTH_SUBRECT_DISABLE,
91
92   /* disable indirect state for now
93    */
94   _3DSTATE_LOAD_INDIRECT | 0, 0};
95
96static void
97emit_invariant(struct i915_context *i915)
98{
99   i915_winsys_batchbuffer_write(
100      i915->batch, invariant_state,
101      ARRAY_SIZE(invariant_state) * sizeof(uint32_t));
102}
103
104static void
105validate_immediate(struct i915_context *i915, unsigned *batch_space)
106{
107   unsigned dirty = (1 << I915_IMMEDIATE_S0 | 1 << I915_IMMEDIATE_S1 |
108                     1 << I915_IMMEDIATE_S2 | 1 << I915_IMMEDIATE_S3 |
109                     1 << I915_IMMEDIATE_S3 | 1 << I915_IMMEDIATE_S4 |
110                     1 << I915_IMMEDIATE_S5 | 1 << I915_IMMEDIATE_S6) &
111                    i915->immediate_dirty;
112
113   if (i915->immediate_dirty & (1 << I915_IMMEDIATE_S0) && i915->vbo)
114      i915->validation_buffers[i915->num_validation_buffers++] = i915->vbo;
115
116   *batch_space = 1 + util_bitcount(dirty);
117}
118
119static void
120emit_immediate_s5(struct i915_context *i915, uint32_t imm)
121{
122   struct i915_surface *surf = i915_surface(i915->framebuffer.cbufs[0]);
123
124   if (surf) {
125      uint32_t writemask = imm & S5_WRITEDISABLE_MASK;
126      imm &= ~S5_WRITEDISABLE_MASK;
127
128      /* The register bits are not in order. */
129      static const uint32_t writedisables[4] = {
130         S5_WRITEDISABLE_RED,
131         S5_WRITEDISABLE_GREEN,
132         S5_WRITEDISABLE_BLUE,
133         S5_WRITEDISABLE_ALPHA,
134      };
135
136      for (int i = 0; i < 4; i++) {
137         if (writemask & writedisables[surf->color_swizzle[i]])
138            imm |= writedisables[i];
139      }
140   }
141
142   OUT_BATCH(imm);
143}
144
145static void
146emit_immediate(struct i915_context *i915)
147{
148   /* remove unwanted bits and S7 */
149   unsigned dirty = (1 << I915_IMMEDIATE_S0 | 1 << I915_IMMEDIATE_S1 |
150                     1 << I915_IMMEDIATE_S2 | 1 << I915_IMMEDIATE_S3 |
151                     1 << I915_IMMEDIATE_S3 | 1 << I915_IMMEDIATE_S4 |
152                     1 << I915_IMMEDIATE_S5 | 1 << I915_IMMEDIATE_S6) &
153                    i915->immediate_dirty;
154   int i, num = util_bitcount(dirty);
155   assert(num && num <= I915_MAX_IMMEDIATE);
156
157   OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | dirty << 4 | (num - 1));
158
159   if (i915->immediate_dirty & (1 << I915_IMMEDIATE_S0)) {
160      if (i915->vbo)
161         OUT_RELOC(i915->vbo, I915_USAGE_VERTEX,
162                   i915->current.immediate[I915_IMMEDIATE_S0]);
163      else
164         OUT_BATCH(0);
165   }
166
167   for (i = 1; i < I915_MAX_IMMEDIATE; i++) {
168      if (dirty & (1 << i)) {
169         if (i == I915_IMMEDIATE_S5)
170            emit_immediate_s5(i915, i915->current.immediate[i]);
171         else
172            OUT_BATCH(i915->current.immediate[i]);
173      }
174   }
175}
176
177static void
178validate_dynamic(struct i915_context *i915, unsigned *batch_space)
179{
180   *batch_space =
181      util_bitcount(i915->dynamic_dirty & ((1 << I915_MAX_DYNAMIC) - 1));
182}
183
184static void
185emit_dynamic(struct i915_context *i915)
186{
187   int i;
188   for (i = 0; i < I915_MAX_DYNAMIC; i++) {
189      if (i915->dynamic_dirty & (1 << i))
190         OUT_BATCH(i915->current.dynamic[i]);
191   }
192}
193
194static void
195validate_static(struct i915_context *i915, unsigned *batch_space)
196{
197   *batch_space = 0;
198
199   if (i915->current.cbuf_bo && (i915->static_dirty & I915_DST_BUF_COLOR)) {
200      i915->validation_buffers[i915->num_validation_buffers++] =
201         i915->current.cbuf_bo;
202      *batch_space += 3;
203   }
204
205   if (i915->current.depth_bo && (i915->static_dirty & I915_DST_BUF_DEPTH)) {
206      i915->validation_buffers[i915->num_validation_buffers++] =
207         i915->current.depth_bo;
208      *batch_space += 3;
209   }
210
211   if (i915->static_dirty & I915_DST_VARS)
212      *batch_space += 2;
213
214   if (i915->static_dirty & I915_DST_RECT)
215      *batch_space += 5;
216}
217
218static void
219emit_static(struct i915_context *i915)
220{
221   if (i915->current.cbuf_bo && (i915->static_dirty & I915_DST_BUF_COLOR)) {
222      OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
223      OUT_BATCH(i915->current.cbuf_flags);
224      OUT_RELOC(i915->current.cbuf_bo, I915_USAGE_RENDER, 0);
225   }
226
227   /* What happens if no zbuf??
228    */
229   if (i915->current.depth_bo && (i915->static_dirty & I915_DST_BUF_DEPTH)) {
230      OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
231      OUT_BATCH(i915->current.depth_flags);
232      OUT_RELOC(i915->current.depth_bo, I915_USAGE_RENDER, 0);
233   }
234
235   if (i915->static_dirty & I915_DST_VARS) {
236      OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD);
237      OUT_BATCH(i915->current.dst_buf_vars);
238   }
239}
240
241static void
242validate_map(struct i915_context *i915, unsigned *batch_space)
243{
244   const uint32_t enabled = i915->current.sampler_enable_flags;
245   uint32_t unit;
246   struct i915_texture *tex;
247
248   *batch_space = i915->current.sampler_enable_nr
249                     ? 2 + 3 * i915->current.sampler_enable_nr
250                     : 0;
251
252   for (unit = 0; unit < I915_TEX_UNITS; unit++) {
253      if (enabled & (1 << unit)) {
254         tex = i915_texture(i915->fragment_sampler_views[unit]->texture);
255         i915->validation_buffers[i915->num_validation_buffers++] = tex->buffer;
256      }
257   }
258}
259
260static void
261emit_map(struct i915_context *i915)
262{
263   const uint32_t nr = i915->current.sampler_enable_nr;
264   if (nr) {
265      const uint32_t enabled = i915->current.sampler_enable_flags;
266      uint32_t unit;
267      uint32_t count = 0;
268      OUT_BATCH(_3DSTATE_MAP_STATE | (3 * nr));
269      OUT_BATCH(enabled);
270      for (unit = 0; unit < I915_TEX_UNITS; unit++) {
271         if (enabled & (1 << unit)) {
272            struct i915_texture *texture =
273               i915_texture(i915->fragment_sampler_views[unit]->texture);
274            struct i915_winsys_buffer *buf = texture->buffer;
275            unsigned offset = i915->current.texbuffer[unit][2];
276
277            assert(buf);
278
279            count++;
280
281            OUT_RELOC(buf, I915_USAGE_SAMPLER, offset);
282            OUT_BATCH(i915->current.texbuffer[unit][0]); /* MS3 */
283            OUT_BATCH(i915->current.texbuffer[unit][1]); /* MS4 */
284         }
285      }
286      assert(count == nr);
287   }
288}
289
290static void
291validate_sampler(struct i915_context *i915, unsigned *batch_space)
292{
293   *batch_space = i915->current.sampler_enable_nr
294                     ? 2 + 3 * i915->current.sampler_enable_nr
295                     : 0;
296}
297
298static void
299emit_sampler(struct i915_context *i915)
300{
301   if (i915->current.sampler_enable_nr) {
302      int i;
303
304      OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * i915->current.sampler_enable_nr));
305
306      OUT_BATCH(i915->current.sampler_enable_flags);
307
308      for (i = 0; i < I915_TEX_UNITS; i++) {
309         if (i915->current.sampler_enable_flags & (1 << i)) {
310            OUT_BATCH(i915->current.sampler[i][0]);
311            OUT_BATCH(i915->current.sampler[i][1]);
312            OUT_BATCH(i915->current.sampler[i][2]);
313         }
314      }
315   }
316}
317
318static void
319validate_constants(struct i915_context *i915, unsigned *batch_space)
320{
321   int nr = i915->fs->num_constants ? 2 + 4 * i915->fs->num_constants : 0;
322
323   *batch_space = nr;
324}
325
326static void
327emit_constants(struct i915_context *i915)
328{
329   /* Collate the user-defined constants with the fragment shader's
330    * immediates according to the constant_flags[] array.
331    */
332   const uint32_t nr = i915->fs->num_constants;
333
334   assert(nr <= I915_MAX_CONSTANT);
335   if (nr) {
336      uint32_t i;
337
338      OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | (nr * 4));
339      OUT_BATCH((1 << nr) - 1);
340
341      for (i = 0; i < nr; i++) {
342         const uint32_t *c;
343         if (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER) {
344            /* grab user-defined constant */
345            c = (uint32_t *)i915_buffer(i915->constants[PIPE_SHADER_FRAGMENT])
346                   ->data;
347            c += 4 * i;
348         } else {
349            /* emit program constant */
350            c = (uint32_t *)i915->fs->constants[i];
351         }
352#if 0 /* debug */
353         {
354            float *f = (float *) c;
355            printf("Const %2d: %f %f %f %f %s\n", i, f[0], f[1], f[2], f[3],
356                   (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER
357                    ? "user" : "immediate"));
358         }
359#endif
360         OUT_BATCH(*c++);
361         OUT_BATCH(*c++);
362         OUT_BATCH(*c++);
363         OUT_BATCH(*c++);
364      }
365   }
366}
367
368static void
369validate_program(struct i915_context *i915, unsigned *batch_space)
370{
371   /* we need more batch space if we want to emulate rgba framebuffers */
372   *batch_space = i915->fs->program_len + (i915->current.fixup_swizzle ? 3 : 0);
373}
374
375static void
376emit_program(struct i915_context *i915)
377{
378   /* we should always have, at least, a pass-through program */
379   assert(i915->fs->program_len > 0);
380
381   /* If we're doing a fixup swizzle, that's 3 more dwords to add. */
382   uint32_t additional_size = 0;
383   if (i915->current.fixup_swizzle)
384      additional_size = 3;
385
386   /* output the program: 1 dword of header, then 3 dwords per decl/instruction */
387   assert(i915->fs->program_len % 3 == 1);
388
389   /* first word has the size, adjust it for fixup swizzle */
390   OUT_BATCH(i915->fs->program[0] + additional_size);
391
392   for (int i = 1; i < i915->fs->program_len; i++)
393      OUT_BATCH(i915->fs->program[i]);
394
395   /* we emit an additional mov with swizzle to fake RGBA framebuffers */
396   if (i915->current.fixup_swizzle) {
397      /* mov out_color, out_color.zyxw */
398      OUT_BATCH(A0_MOV | (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) |
399                A0_DEST_CHANNEL_ALL | (REG_TYPE_OC << A0_SRC0_TYPE_SHIFT) |
400                (T_DIFFUSE << A0_SRC0_NR_SHIFT));
401      OUT_BATCH(i915->current.fixup_swizzle);
402      OUT_BATCH(0);
403   }
404}
405
406static void
407emit_draw_rect(struct i915_context *i915)
408{
409   if (i915->static_dirty & I915_DST_RECT) {
410      OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
411      OUT_BATCH(DRAW_RECT_DIS_DEPTH_OFS);
412      OUT_BATCH(i915->current.draw_offset);
413      OUT_BATCH(i915->current.draw_size);
414      OUT_BATCH(i915->current.draw_offset);
415   }
416}
417
418static bool
419i915_validate_state(struct i915_context *i915, unsigned *batch_space)
420{
421   unsigned tmp;
422
423   i915->num_validation_buffers = 0;
424   if (i915->hardware_dirty & I915_HW_INVARIANT)
425      *batch_space = ARRAY_SIZE(invariant_state);
426   else
427      *batch_space = 0;
428
429#if 0
430static int counter_total = 0;
431#define VALIDATE_ATOM(atom, hw_dirty)                                          \
432   if (i915->hardware_dirty & hw_dirty) {                                      \
433      static int counter_##atom = 0;                                           \
434      validate_##atom(i915, &tmp);                                             \
435      *batch_space += tmp;                                                     \
436      counter_##atom += tmp;                                                   \
437      counter_total += tmp;                                                    \
438      printf("%s: \t%d/%d \t%2.2f\n", #atom, counter_##atom, counter_total,    \
439             counter_##atom * 100.f / counter_total);                          \
440   }
441#else
442#define VALIDATE_ATOM(atom, hw_dirty)                                          \
443   if (i915->hardware_dirty & hw_dirty) {                                      \
444      validate_##atom(i915, &tmp);                                             \
445      *batch_space += tmp;                                                     \
446   }
447#endif
448   VALIDATE_ATOM(flush, I915_HW_FLUSH);
449   VALIDATE_ATOM(immediate, I915_HW_IMMEDIATE);
450   VALIDATE_ATOM(dynamic, I915_HW_DYNAMIC);
451   VALIDATE_ATOM(static, I915_HW_STATIC);
452   VALIDATE_ATOM(map, I915_HW_MAP);
453   VALIDATE_ATOM(sampler, I915_HW_SAMPLER);
454   VALIDATE_ATOM(constants, I915_HW_CONSTANTS);
455   VALIDATE_ATOM(program, I915_HW_PROGRAM);
456#undef VALIDATE_ATOM
457
458   if (i915->num_validation_buffers == 0)
459      return true;
460
461   if (!i915_winsys_validate_buffers(i915->batch, i915->validation_buffers,
462                                     i915->num_validation_buffers))
463      return false;
464
465   return true;
466}
467
468/* Push the state into the sarea and/or texture memory.
469 */
470void
471i915_emit_hardware_state(struct i915_context *i915)
472{
473   unsigned batch_space;
474   uintptr_t save_ptr;
475
476   assert(i915->dirty == 0);
477
478   if (I915_DBG_ON(DBG_ATOMS))
479      i915_dump_hardware_dirty(i915, __FUNCTION__);
480
481   if (!i915_validate_state(i915, &batch_space)) {
482      FLUSH_BATCH(NULL, I915_FLUSH_ASYNC);
483      assert(i915_validate_state(i915, &batch_space));
484   }
485
486   if (!BEGIN_BATCH(batch_space)) {
487      FLUSH_BATCH(NULL, I915_FLUSH_ASYNC);
488      assert(i915_validate_state(i915, &batch_space));
489      assert(BEGIN_BATCH(batch_space));
490   }
491
492   save_ptr = (uintptr_t)i915->batch->ptr;
493
494#define EMIT_ATOM(atom, hw_dirty)                                              \
495   if (i915->hardware_dirty & hw_dirty)                                        \
496      emit_##atom(i915);
497   EMIT_ATOM(flush, I915_HW_FLUSH);
498   EMIT_ATOM(invariant, I915_HW_INVARIANT);
499   EMIT_ATOM(immediate, I915_HW_IMMEDIATE);
500   EMIT_ATOM(dynamic, I915_HW_DYNAMIC);
501   EMIT_ATOM(static, I915_HW_STATIC);
502   EMIT_ATOM(map, I915_HW_MAP);
503   EMIT_ATOM(sampler, I915_HW_SAMPLER);
504   EMIT_ATOM(constants, I915_HW_CONSTANTS);
505   EMIT_ATOM(program, I915_HW_PROGRAM);
506   EMIT_ATOM(draw_rect, I915_HW_STATIC);
507#undef EMIT_ATOM
508
509   I915_DBG(DBG_EMIT, "%s: used %d dwords, %d dwords reserved\n", __FUNCTION__,
510            ((uintptr_t)i915->batch->ptr - save_ptr) / 4, batch_space);
511   assert(((uintptr_t)i915->batch->ptr - save_ptr) / 4 == batch_space);
512
513   i915->hardware_dirty = 0;
514   i915->immediate_dirty = 0;
515   i915->dynamic_dirty = 0;
516   i915->static_dirty = 0;
517   i915->flush_dirty = 0;
518}
519