1/*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23/* blt command encoding for gen4/5 */
24#include "crocus_context.h"
25
26#include "crocus_genx_macros.h"
27#include "crocus_genx_protos.h"
28#include "crocus_resource.h"
29
30#define FILE_DEBUG_FLAG DEBUG_BLIT
31
32#if GFX_VER <= 5
33
34static uint32_t
35color_depth_for_cpp(int cpp)
36{
37   switch (cpp) {
38   case 4: return COLOR_DEPTH_32bit;
39   case 2: return COLOR_DEPTH_565;
40   case 1: return COLOR_DEPTH_8bit;
41   default:
42      unreachable("not reached");
43   }
44}
45
46static void
47blt_set_alpha_to_one(struct crocus_batch *batch,
48		     struct crocus_resource *dst,
49		     int x, int y, int width, int height)
50{
51   const struct isl_format_layout *fmtl = isl_format_get_layout(dst->surf.format);
52   unsigned cpp = fmtl->bpb / 8;
53   uint32_t pitch = dst->surf.row_pitch_B;
54
55   if (dst->surf.tiling != ISL_TILING_LINEAR)
56      pitch /= 4;
57   /* We need to split the blit into chunks that each fit within the blitter's
58    * restrictions.  We can't use a chunk size of 32768 because we need to
59    * ensure that src_tile_x + chunk_size fits.  We choose 16384 because it's
60    * a nice round power of two, big enough that performance won't suffer, and
61    * small enough to guarantee everything fits.
62    */
63   const uint32_t max_chunk_size = 16384;
64
65   for (uint32_t chunk_x = 0; chunk_x < width; chunk_x += max_chunk_size) {
66      for (uint32_t chunk_y = 0; chunk_y < height; chunk_y += max_chunk_size) {
67         const uint32_t chunk_w = MIN2(max_chunk_size, width - chunk_x);
68         const uint32_t chunk_h = MIN2(max_chunk_size, height - chunk_y);
69         uint32_t tile_x, tile_y;
70         uint64_t offset_B;
71         ASSERTED uint32_t z_offset_el, array_offset;
72         isl_tiling_get_intratile_offset_el(dst->surf.tiling, dst->surf.dim,
73                                            dst->surf.msaa_layout,
74                                            cpp * 8, dst->surf.samples,
75                                            dst->surf.row_pitch_B,
76                                            dst->surf.array_pitch_el_rows,
77                                            chunk_x, chunk_y, 0, 0,
78                                            &offset_B,
79                                            &tile_x, &tile_y,
80                                            &z_offset_el, &array_offset);
81         assert(z_offset_el == 0);
82         assert(array_offset == 0);
83	 crocus_emit_cmd(batch, GENX(XY_COLOR_BLT), xyblt) {
84            xyblt.TilingEnable = dst->surf.tiling != ISL_TILING_LINEAR;
85            xyblt.ColorDepth = color_depth_for_cpp(cpp);
86            xyblt.RasterOperation = 0xF0;
87            xyblt.DestinationPitch = pitch;
88            xyblt._32bppByteMask = 2;
89            xyblt.DestinationBaseAddress = rw_bo(dst->bo, offset_B);
90            xyblt.DestinationX1Coordinate = tile_x;
91            xyblt.DestinationY1Coordinate = tile_y;
92            xyblt.DestinationX2Coordinate = tile_x + chunk_w;
93            xyblt.DestinationY2Coordinate = tile_y + chunk_h;
94            xyblt.SolidPatternColor = 0xffffffff;
95	 }
96      }
97   }
98}
99
100static bool validate_blit_for_blt(struct crocus_batch *batch,
101                                  const struct pipe_blit_info *info)
102{
103   /* If the source and destination are the same size with no mirroring,
104    * the rectangles are within the size of the texture and there is no
105    * scissor, then we can probably use the blit engine.
106    */
107   if (info->dst.box.width != info->src.box.width ||
108       info->dst.box.height != info->src.box.height)
109      return false;
110
111   if (info->scissor_enable)
112      return false;
113
114   if (info->dst.box.height < 0 || info->src.box.height < 0)
115      return false;
116
117   if (info->dst.box.depth > 1 || info->src.box.depth > 1)
118      return false;
119
120   const struct util_format_description *desc =
121      util_format_description(info->src.format);
122   int i = util_format_get_first_non_void_channel(info->src.format);
123   if (i == -1)
124      return false;
125
126   /* can't do the alpha to 1 setting for these. */
127   if ((util_format_has_alpha1(info->src.format) &&
128        util_format_has_alpha(info->dst.format) &&
129        desc->channel[i].size > 8))
130      return false;
131   return true;
132}
133
134static inline int crocus_resource_blt_pitch(struct crocus_resource *res)
135{
136   int pitch = res->surf.row_pitch_B;
137   if (res->surf.tiling != ISL_TILING_LINEAR)
138      pitch /= 4;
139   return pitch;
140}
141
142
143static bool emit_copy_blt(struct crocus_batch *batch,
144                          struct crocus_resource *src,
145                          struct crocus_resource *dst,
146                          unsigned cpp,
147                          int32_t src_pitch,
148                          unsigned src_offset,
149                          int32_t dst_pitch,
150                          unsigned dst_offset,
151                          uint16_t src_x, uint16_t src_y,
152                          uint16_t dst_x, uint16_t dst_y,
153                          uint16_t w, uint16_t h)
154
155{
156   uint32_t src_tile_w, src_tile_h;
157   uint32_t dst_tile_w, dst_tile_h;
158   int dst_y2 = dst_y + h;
159   int dst_x2 = dst_x + w;
160
161   DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
162       __func__,
163       src, src_pitch, src_offset, src_x, src_y,
164       dst, dst_pitch, dst_offset, dst_x, dst_y, w, h);
165
166   isl_get_tile_dims(src->surf.tiling, cpp, &src_tile_w, &src_tile_h);
167   isl_get_tile_dims(dst->surf.tiling, cpp, &dst_tile_w, &dst_tile_h);
168
169   /* For Tiled surfaces, the pitch has to be a multiple of the Tile width
170    * (X direction width of the Tile). This is ensured while allocating the
171    * buffer object.
172    */
173   assert(src->surf.tiling == ISL_TILING_LINEAR || (src_pitch % src_tile_w) == 0);
174   assert(dst->surf.tiling == ISL_TILING_LINEAR || (dst_pitch % dst_tile_w) == 0);
175
176   /* For big formats (such as floating point), do the copy using 16 or
177    * 32bpp and multiply the coordinates.
178    */
179   if (cpp > 4) {
180      if (cpp % 4 == 2) {
181         dst_x *= cpp / 2;
182         dst_x2 *= cpp / 2;
183         src_x *= cpp / 2;
184         cpp = 2;
185      } else {
186         assert(cpp % 4 == 0);
187         dst_x *= cpp / 4;
188         dst_x2 *= cpp / 4;
189         src_x *= cpp / 4;
190         cpp = 4;
191      }
192   }
193
194   /* Blit pitch must be dword-aligned.  Otherwise, the hardware appears to drop
195    * the low bits.  Offsets must be naturally aligned.
196    */
197   if (src_pitch % 4 != 0 || src_offset % cpp != 0 ||
198       dst_pitch % 4 != 0 || dst_offset % cpp != 0)
199     return false;
200
201   /* For tiled source and destination, pitch value should be specified
202    * as a number of Dwords.
203    */
204   if (dst->surf.tiling != ISL_TILING_LINEAR)
205      dst_pitch /= 4;
206
207   if (src->surf.tiling != ISL_TILING_LINEAR)
208      src_pitch /= 4;
209
210   assert(cpp <= 4);
211   crocus_emit_cmd(batch, GENX(XY_SRC_COPY_BLT), xyblt) {
212      xyblt.RasterOperation = 0xCC;
213      xyblt.DestinationTilingEnable = dst->surf.tiling != ISL_TILING_LINEAR;
214      xyblt.SourceTilingEnable = src->surf.tiling != ISL_TILING_LINEAR;
215      xyblt.SourceBaseAddress = ro_bo(src->bo, src_offset);
216      xyblt.DestinationBaseAddress = rw_bo(dst->bo, dst_offset);
217      xyblt.ColorDepth = color_depth_for_cpp(cpp);
218      xyblt._32bppByteMask = cpp == 4 ? 0x3 : 0x1;
219      xyblt.DestinationX1Coordinate = dst_x;
220      xyblt.DestinationY1Coordinate = dst_y;
221      xyblt.DestinationX2Coordinate = dst_x2;
222      xyblt.DestinationY2Coordinate = dst_y2;
223      xyblt.DestinationPitch = dst_pitch;
224      xyblt.SourceX1Coordinate = src_x;
225      xyblt.SourceY1Coordinate = src_y;
226      xyblt.SourcePitch = src_pitch;
227   };
228
229   crocus_emit_mi_flush(batch);
230   return true;
231}
232
233static bool crocus_emit_blt(struct crocus_batch *batch,
234                            struct crocus_resource *src,
235                            struct crocus_resource *dst,
236                            unsigned dst_level,
237                            unsigned dst_x, unsigned dst_y,
238                            unsigned dst_z,
239                            unsigned src_level,
240                            const struct pipe_box *src_box)
241{
242   const struct isl_format_layout *src_fmtl = isl_format_get_layout(src->surf.format);
243   unsigned src_cpp = src_fmtl->bpb / 8;
244   const struct isl_format_layout *dst_fmtl = isl_format_get_layout(dst->surf.format);
245   const unsigned dst_cpp = dst_fmtl->bpb / 8;
246   uint16_t src_x, src_y;
247   uint32_t src_image_x, src_image_y, dst_image_x, dst_image_y;
248   uint32_t src_width = src_box->width, src_height = src_box->height;
249
250   /* gen4/5 can't handle Y tiled blits. */
251   if (src->surf.tiling == ISL_TILING_Y0 || dst->surf.tiling == ISL_TILING_Y0)
252      return false;
253
254   if (src->surf.format != dst->surf.format)
255      return false;
256
257   if (src_cpp != dst_cpp)
258      return false;
259
260   src_x = src_box->x;
261   src_y = src_box->y;
262
263   assert(src_cpp == dst_cpp);
264
265   crocus_resource_get_image_offset(src, src_level, src_box->z, &src_image_x,
266                                    &src_image_y);
267   if (util_format_is_compressed(src->base.b.format)) {
268      int bw = util_format_get_blockwidth(src->base.b.format);
269      int bh = util_format_get_blockheight(src->base.b.format);
270      assert(src_x % bw == 0);
271      assert(src_y % bh == 0);
272      src_x /= (int)bw;
273      src_y /= (int)bh;
274      src_width = DIV_ROUND_UP(src_width, (int)bw);
275      src_height = DIV_ROUND_UP(src_height, (int)bh);
276   }
277
278   crocus_resource_get_image_offset(dst, dst_level, dst_z, &dst_image_x,
279                                    &dst_image_y);
280   if (util_format_is_compressed(dst->base.b.format)) {
281      int bw = util_format_get_blockwidth(dst->base.b.format);
282      int bh = util_format_get_blockheight(dst->base.b.format);
283      assert(dst_x % bw == 0);
284      assert(dst_y % bh == 0);
285      dst_x /= (int)bw;
286      dst_y /= (int)bh;
287   }
288   src_x += src_image_x;
289   src_y += src_image_y;
290   dst_x += dst_image_x;
291   dst_y += dst_image_y;
292
293   /* According to the Ivy Bridge PRM, Vol1 Part4, section 1.2.1.2 (Graphics
294    * Data Size Limitations):
295    *
296    *    The BLT engine is capable of transferring very large quantities of
297    *    graphics data. Any graphics data read from and written to the
298    *    destination is permitted to represent a number of pixels that
299    *    occupies up to 65,536 scan lines and up to 32,768 bytes per scan line
300    *    at the destination. The maximum number of pixels that may be
301    *    represented per scan line’s worth of graphics data depends on the
302    *    color depth.
303    *
304    * The blitter's pitch is a signed 16-bit integer, but measured in bytes
305    * for linear surfaces and DWords for tiled surfaces.  So the maximum
306    * pitch is 32k linear and 128k tiled.
307    */
308   if (crocus_resource_blt_pitch(src) >= 32768 ||
309       crocus_resource_blt_pitch(dst) >= 32768) {
310      return false;
311   }
312
313   /* We need to split the blit into chunks that each fit within the blitter's
314    * restrictions.  We can't use a chunk size of 32768 because we need to
315    * ensure that src_tile_x + chunk_size fits.  We choose 16384 because it's
316    * a nice round power of two, big enough that performance won't suffer, and
317    * small enough to guarantee everything fits.
318    */
319   const uint32_t max_chunk_size = 16384;
320
321   for (uint32_t chunk_x = 0; chunk_x < src_width; chunk_x += max_chunk_size) {
322      for (uint32_t chunk_y = 0; chunk_y < src_height; chunk_y += max_chunk_size) {
323         const uint32_t chunk_w = MIN2(max_chunk_size, src_width - chunk_x);
324         const uint32_t chunk_h = MIN2(max_chunk_size, src_height - chunk_y);
325
326         uint64_t src_offset;
327         uint32_t src_tile_x, src_tile_y;
328         ASSERTED uint32_t z_offset_el, array_offset;
329         isl_tiling_get_intratile_offset_el(src->surf.tiling, src->surf.dim,
330                                            src->surf.msaa_layout,
331                                            src_cpp * 8, src->surf.samples,
332                                            src->surf.row_pitch_B,
333                                            src->surf.array_pitch_el_rows,
334                                            src_x + chunk_x, src_y + chunk_y, 0, 0,
335                                            &src_offset,
336                                            &src_tile_x, &src_tile_y,
337                                            &z_offset_el, &array_offset);
338         assert(z_offset_el == 0);
339         assert(array_offset == 0);
340
341         uint64_t dst_offset;
342         uint32_t dst_tile_x, dst_tile_y;
343         isl_tiling_get_intratile_offset_el(dst->surf.tiling, dst->surf.dim,
344                                            dst->surf.msaa_layout,
345                                            dst_cpp * 8, dst->surf.samples,
346                                            dst->surf.row_pitch_B,
347                                            dst->surf.array_pitch_el_rows,
348                                            dst_x + chunk_x, dst_y + chunk_y, 0, 0,
349                                            &dst_offset,
350                                            &dst_tile_x, &dst_tile_y,
351                                            &z_offset_el, &array_offset);
352         assert(z_offset_el == 0);
353         assert(array_offset == 0);
354         if (!emit_copy_blt(batch, src, dst,
355                            src_cpp, src->surf.row_pitch_B,
356                            src_offset,
357                            dst->surf.row_pitch_B, dst_offset,
358                            src_tile_x, src_tile_y,
359                            dst_tile_x, dst_tile_y,
360                            chunk_w, chunk_h)) {
361            return false;
362         }
363      }
364   }
365
366   if (util_format_has_alpha1(src->base.b.format) &&
367       util_format_has_alpha(dst->base.b.format))
368      blt_set_alpha_to_one(batch, dst, 0, 0, src_width, src_height);
369   return true;
370}
371
372static bool crocus_blit_blt(struct crocus_batch *batch,
373                            const struct pipe_blit_info *info)
374{
375   if (!validate_blit_for_blt(batch, info))
376      return false;
377
378   return crocus_emit_blt(batch,
379                          (struct crocus_resource *)info->src.resource,
380                          (struct crocus_resource *)info->dst.resource,
381                          info->dst.level,
382                          info->dst.box.x,
383                          info->dst.box.y,
384                          info->dst.box.z,
385                          info->src.level,
386                          &info->src.box);
387}
388
389
390static bool crocus_copy_region_blt(struct crocus_batch *batch,
391                                   struct crocus_resource *dst,
392                                   unsigned dst_level,
393                                   unsigned dstx, unsigned dsty, unsigned dstz,
394                                   struct crocus_resource *src,
395                                   unsigned src_level,
396                                   const struct pipe_box *src_box)
397{
398   if (dst->base.b.target == PIPE_BUFFER || src->base.b.target == PIPE_BUFFER)
399      return false;
400   return crocus_emit_blt(batch,
401                          src,
402                          dst,
403                          dst_level,
404                          dstx, dsty, dstz,
405                          src_level,
406                          src_box);
407}
408#endif
409
410void
411genX(crocus_init_blt)(struct crocus_screen *screen)
412{
413#if GFX_VER <= 5
414   screen->vtbl.blit_blt = crocus_blit_blt;
415   screen->vtbl.copy_region_blt = crocus_copy_region_blt;
416#else
417   screen->vtbl.blit_blt = NULL;
418   screen->vtbl.copy_region_blt = NULL;
419#endif
420}
421