1/* 2 * Copyright © 2018 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included 12 * in all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 * DEALINGS IN THE SOFTWARE. 21 */ 22 23/* blt command encoding for gen4/5 */ 24#include "crocus_context.h" 25 26#include "crocus_genx_macros.h" 27#include "crocus_genx_protos.h" 28#include "crocus_resource.h" 29 30#define FILE_DEBUG_FLAG DEBUG_BLIT 31 32#if GFX_VER <= 5 33 34static uint32_t 35color_depth_for_cpp(int cpp) 36{ 37 switch (cpp) { 38 case 4: return COLOR_DEPTH_32bit; 39 case 2: return COLOR_DEPTH_565; 40 case 1: return COLOR_DEPTH_8bit; 41 default: 42 unreachable("not reached"); 43 } 44} 45 46static void 47blt_set_alpha_to_one(struct crocus_batch *batch, 48 struct crocus_resource *dst, 49 int x, int y, int width, int height) 50{ 51 const struct isl_format_layout *fmtl = isl_format_get_layout(dst->surf.format); 52 unsigned cpp = fmtl->bpb / 8; 53 uint32_t pitch = dst->surf.row_pitch_B; 54 55 if (dst->surf.tiling != ISL_TILING_LINEAR) 56 pitch /= 4; 57 /* We need to split the blit into chunks that each fit within the blitter's 58 * restrictions. We can't use a chunk size of 32768 because we need to 59 * ensure that src_tile_x + chunk_size fits. We choose 16384 because it's 60 * a nice round power of two, big enough that performance won't suffer, and 61 * small enough to guarantee everything fits. 62 */ 63 const uint32_t max_chunk_size = 16384; 64 65 for (uint32_t chunk_x = 0; chunk_x < width; chunk_x += max_chunk_size) { 66 for (uint32_t chunk_y = 0; chunk_y < height; chunk_y += max_chunk_size) { 67 const uint32_t chunk_w = MIN2(max_chunk_size, width - chunk_x); 68 const uint32_t chunk_h = MIN2(max_chunk_size, height - chunk_y); 69 uint32_t tile_x, tile_y; 70 uint64_t offset_B; 71 ASSERTED uint32_t z_offset_el, array_offset; 72 isl_tiling_get_intratile_offset_el(dst->surf.tiling, dst->surf.dim, 73 dst->surf.msaa_layout, 74 cpp * 8, dst->surf.samples, 75 dst->surf.row_pitch_B, 76 dst->surf.array_pitch_el_rows, 77 chunk_x, chunk_y, 0, 0, 78 &offset_B, 79 &tile_x, &tile_y, 80 &z_offset_el, &array_offset); 81 assert(z_offset_el == 0); 82 assert(array_offset == 0); 83 crocus_emit_cmd(batch, GENX(XY_COLOR_BLT), xyblt) { 84 xyblt.TilingEnable = dst->surf.tiling != ISL_TILING_LINEAR; 85 xyblt.ColorDepth = color_depth_for_cpp(cpp); 86 xyblt.RasterOperation = 0xF0; 87 xyblt.DestinationPitch = pitch; 88 xyblt._32bppByteMask = 2; 89 xyblt.DestinationBaseAddress = rw_bo(dst->bo, offset_B); 90 xyblt.DestinationX1Coordinate = tile_x; 91 xyblt.DestinationY1Coordinate = tile_y; 92 xyblt.DestinationX2Coordinate = tile_x + chunk_w; 93 xyblt.DestinationY2Coordinate = tile_y + chunk_h; 94 xyblt.SolidPatternColor = 0xffffffff; 95 } 96 } 97 } 98} 99 100static bool validate_blit_for_blt(struct crocus_batch *batch, 101 const struct pipe_blit_info *info) 102{ 103 /* If the source and destination are the same size with no mirroring, 104 * the rectangles are within the size of the texture and there is no 105 * scissor, then we can probably use the blit engine. 106 */ 107 if (info->dst.box.width != info->src.box.width || 108 info->dst.box.height != info->src.box.height) 109 return false; 110 111 if (info->scissor_enable) 112 return false; 113 114 if (info->dst.box.height < 0 || info->src.box.height < 0) 115 return false; 116 117 if (info->dst.box.depth > 1 || info->src.box.depth > 1) 118 return false; 119 120 const struct util_format_description *desc = 121 util_format_description(info->src.format); 122 int i = util_format_get_first_non_void_channel(info->src.format); 123 if (i == -1) 124 return false; 125 126 /* can't do the alpha to 1 setting for these. */ 127 if ((util_format_has_alpha1(info->src.format) && 128 util_format_has_alpha(info->dst.format) && 129 desc->channel[i].size > 8)) 130 return false; 131 return true; 132} 133 134static inline int crocus_resource_blt_pitch(struct crocus_resource *res) 135{ 136 int pitch = res->surf.row_pitch_B; 137 if (res->surf.tiling != ISL_TILING_LINEAR) 138 pitch /= 4; 139 return pitch; 140} 141 142 143static bool emit_copy_blt(struct crocus_batch *batch, 144 struct crocus_resource *src, 145 struct crocus_resource *dst, 146 unsigned cpp, 147 int32_t src_pitch, 148 unsigned src_offset, 149 int32_t dst_pitch, 150 unsigned dst_offset, 151 uint16_t src_x, uint16_t src_y, 152 uint16_t dst_x, uint16_t dst_y, 153 uint16_t w, uint16_t h) 154 155{ 156 uint32_t src_tile_w, src_tile_h; 157 uint32_t dst_tile_w, dst_tile_h; 158 int dst_y2 = dst_y + h; 159 int dst_x2 = dst_x + w; 160 161 DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", 162 __func__, 163 src, src_pitch, src_offset, src_x, src_y, 164 dst, dst_pitch, dst_offset, dst_x, dst_y, w, h); 165 166 isl_get_tile_dims(src->surf.tiling, cpp, &src_tile_w, &src_tile_h); 167 isl_get_tile_dims(dst->surf.tiling, cpp, &dst_tile_w, &dst_tile_h); 168 169 /* For Tiled surfaces, the pitch has to be a multiple of the Tile width 170 * (X direction width of the Tile). This is ensured while allocating the 171 * buffer object. 172 */ 173 assert(src->surf.tiling == ISL_TILING_LINEAR || (src_pitch % src_tile_w) == 0); 174 assert(dst->surf.tiling == ISL_TILING_LINEAR || (dst_pitch % dst_tile_w) == 0); 175 176 /* For big formats (such as floating point), do the copy using 16 or 177 * 32bpp and multiply the coordinates. 178 */ 179 if (cpp > 4) { 180 if (cpp % 4 == 2) { 181 dst_x *= cpp / 2; 182 dst_x2 *= cpp / 2; 183 src_x *= cpp / 2; 184 cpp = 2; 185 } else { 186 assert(cpp % 4 == 0); 187 dst_x *= cpp / 4; 188 dst_x2 *= cpp / 4; 189 src_x *= cpp / 4; 190 cpp = 4; 191 } 192 } 193 194 /* Blit pitch must be dword-aligned. Otherwise, the hardware appears to drop 195 * the low bits. Offsets must be naturally aligned. 196 */ 197 if (src_pitch % 4 != 0 || src_offset % cpp != 0 || 198 dst_pitch % 4 != 0 || dst_offset % cpp != 0) 199 return false; 200 201 /* For tiled source and destination, pitch value should be specified 202 * as a number of Dwords. 203 */ 204 if (dst->surf.tiling != ISL_TILING_LINEAR) 205 dst_pitch /= 4; 206 207 if (src->surf.tiling != ISL_TILING_LINEAR) 208 src_pitch /= 4; 209 210 assert(cpp <= 4); 211 crocus_emit_cmd(batch, GENX(XY_SRC_COPY_BLT), xyblt) { 212 xyblt.RasterOperation = 0xCC; 213 xyblt.DestinationTilingEnable = dst->surf.tiling != ISL_TILING_LINEAR; 214 xyblt.SourceTilingEnable = src->surf.tiling != ISL_TILING_LINEAR; 215 xyblt.SourceBaseAddress = ro_bo(src->bo, src_offset); 216 xyblt.DestinationBaseAddress = rw_bo(dst->bo, dst_offset); 217 xyblt.ColorDepth = color_depth_for_cpp(cpp); 218 xyblt._32bppByteMask = cpp == 4 ? 0x3 : 0x1; 219 xyblt.DestinationX1Coordinate = dst_x; 220 xyblt.DestinationY1Coordinate = dst_y; 221 xyblt.DestinationX2Coordinate = dst_x2; 222 xyblt.DestinationY2Coordinate = dst_y2; 223 xyblt.DestinationPitch = dst_pitch; 224 xyblt.SourceX1Coordinate = src_x; 225 xyblt.SourceY1Coordinate = src_y; 226 xyblt.SourcePitch = src_pitch; 227 }; 228 229 crocus_emit_mi_flush(batch); 230 return true; 231} 232 233static bool crocus_emit_blt(struct crocus_batch *batch, 234 struct crocus_resource *src, 235 struct crocus_resource *dst, 236 unsigned dst_level, 237 unsigned dst_x, unsigned dst_y, 238 unsigned dst_z, 239 unsigned src_level, 240 const struct pipe_box *src_box) 241{ 242 const struct isl_format_layout *src_fmtl = isl_format_get_layout(src->surf.format); 243 unsigned src_cpp = src_fmtl->bpb / 8; 244 const struct isl_format_layout *dst_fmtl = isl_format_get_layout(dst->surf.format); 245 const unsigned dst_cpp = dst_fmtl->bpb / 8; 246 uint16_t src_x, src_y; 247 uint32_t src_image_x, src_image_y, dst_image_x, dst_image_y; 248 uint32_t src_width = src_box->width, src_height = src_box->height; 249 250 /* gen4/5 can't handle Y tiled blits. */ 251 if (src->surf.tiling == ISL_TILING_Y0 || dst->surf.tiling == ISL_TILING_Y0) 252 return false; 253 254 if (src->surf.format != dst->surf.format) 255 return false; 256 257 if (src_cpp != dst_cpp) 258 return false; 259 260 src_x = src_box->x; 261 src_y = src_box->y; 262 263 assert(src_cpp == dst_cpp); 264 265 crocus_resource_get_image_offset(src, src_level, src_box->z, &src_image_x, 266 &src_image_y); 267 if (util_format_is_compressed(src->base.b.format)) { 268 int bw = util_format_get_blockwidth(src->base.b.format); 269 int bh = util_format_get_blockheight(src->base.b.format); 270 assert(src_x % bw == 0); 271 assert(src_y % bh == 0); 272 src_x /= (int)bw; 273 src_y /= (int)bh; 274 src_width = DIV_ROUND_UP(src_width, (int)bw); 275 src_height = DIV_ROUND_UP(src_height, (int)bh); 276 } 277 278 crocus_resource_get_image_offset(dst, dst_level, dst_z, &dst_image_x, 279 &dst_image_y); 280 if (util_format_is_compressed(dst->base.b.format)) { 281 int bw = util_format_get_blockwidth(dst->base.b.format); 282 int bh = util_format_get_blockheight(dst->base.b.format); 283 assert(dst_x % bw == 0); 284 assert(dst_y % bh == 0); 285 dst_x /= (int)bw; 286 dst_y /= (int)bh; 287 } 288 src_x += src_image_x; 289 src_y += src_image_y; 290 dst_x += dst_image_x; 291 dst_y += dst_image_y; 292 293 /* According to the Ivy Bridge PRM, Vol1 Part4, section 1.2.1.2 (Graphics 294 * Data Size Limitations): 295 * 296 * The BLT engine is capable of transferring very large quantities of 297 * graphics data. Any graphics data read from and written to the 298 * destination is permitted to represent a number of pixels that 299 * occupies up to 65,536 scan lines and up to 32,768 bytes per scan line 300 * at the destination. The maximum number of pixels that may be 301 * represented per scan line’s worth of graphics data depends on the 302 * color depth. 303 * 304 * The blitter's pitch is a signed 16-bit integer, but measured in bytes 305 * for linear surfaces and DWords for tiled surfaces. So the maximum 306 * pitch is 32k linear and 128k tiled. 307 */ 308 if (crocus_resource_blt_pitch(src) >= 32768 || 309 crocus_resource_blt_pitch(dst) >= 32768) { 310 return false; 311 } 312 313 /* We need to split the blit into chunks that each fit within the blitter's 314 * restrictions. We can't use a chunk size of 32768 because we need to 315 * ensure that src_tile_x + chunk_size fits. We choose 16384 because it's 316 * a nice round power of two, big enough that performance won't suffer, and 317 * small enough to guarantee everything fits. 318 */ 319 const uint32_t max_chunk_size = 16384; 320 321 for (uint32_t chunk_x = 0; chunk_x < src_width; chunk_x += max_chunk_size) { 322 for (uint32_t chunk_y = 0; chunk_y < src_height; chunk_y += max_chunk_size) { 323 const uint32_t chunk_w = MIN2(max_chunk_size, src_width - chunk_x); 324 const uint32_t chunk_h = MIN2(max_chunk_size, src_height - chunk_y); 325 326 uint64_t src_offset; 327 uint32_t src_tile_x, src_tile_y; 328 ASSERTED uint32_t z_offset_el, array_offset; 329 isl_tiling_get_intratile_offset_el(src->surf.tiling, src->surf.dim, 330 src->surf.msaa_layout, 331 src_cpp * 8, src->surf.samples, 332 src->surf.row_pitch_B, 333 src->surf.array_pitch_el_rows, 334 src_x + chunk_x, src_y + chunk_y, 0, 0, 335 &src_offset, 336 &src_tile_x, &src_tile_y, 337 &z_offset_el, &array_offset); 338 assert(z_offset_el == 0); 339 assert(array_offset == 0); 340 341 uint64_t dst_offset; 342 uint32_t dst_tile_x, dst_tile_y; 343 isl_tiling_get_intratile_offset_el(dst->surf.tiling, dst->surf.dim, 344 dst->surf.msaa_layout, 345 dst_cpp * 8, dst->surf.samples, 346 dst->surf.row_pitch_B, 347 dst->surf.array_pitch_el_rows, 348 dst_x + chunk_x, dst_y + chunk_y, 0, 0, 349 &dst_offset, 350 &dst_tile_x, &dst_tile_y, 351 &z_offset_el, &array_offset); 352 assert(z_offset_el == 0); 353 assert(array_offset == 0); 354 if (!emit_copy_blt(batch, src, dst, 355 src_cpp, src->surf.row_pitch_B, 356 src_offset, 357 dst->surf.row_pitch_B, dst_offset, 358 src_tile_x, src_tile_y, 359 dst_tile_x, dst_tile_y, 360 chunk_w, chunk_h)) { 361 return false; 362 } 363 } 364 } 365 366 if (util_format_has_alpha1(src->base.b.format) && 367 util_format_has_alpha(dst->base.b.format)) 368 blt_set_alpha_to_one(batch, dst, 0, 0, src_width, src_height); 369 return true; 370} 371 372static bool crocus_blit_blt(struct crocus_batch *batch, 373 const struct pipe_blit_info *info) 374{ 375 if (!validate_blit_for_blt(batch, info)) 376 return false; 377 378 return crocus_emit_blt(batch, 379 (struct crocus_resource *)info->src.resource, 380 (struct crocus_resource *)info->dst.resource, 381 info->dst.level, 382 info->dst.box.x, 383 info->dst.box.y, 384 info->dst.box.z, 385 info->src.level, 386 &info->src.box); 387} 388 389 390static bool crocus_copy_region_blt(struct crocus_batch *batch, 391 struct crocus_resource *dst, 392 unsigned dst_level, 393 unsigned dstx, unsigned dsty, unsigned dstz, 394 struct crocus_resource *src, 395 unsigned src_level, 396 const struct pipe_box *src_box) 397{ 398 if (dst->base.b.target == PIPE_BUFFER || src->base.b.target == PIPE_BUFFER) 399 return false; 400 return crocus_emit_blt(batch, 401 src, 402 dst, 403 dst_level, 404 dstx, dsty, dstz, 405 src_level, 406 src_box); 407} 408#endif 409 410void 411genX(crocus_init_blt)(struct crocus_screen *screen) 412{ 413#if GFX_VER <= 5 414 screen->vtbl.blit_blt = crocus_blit_blt; 415 screen->vtbl.copy_region_blt = crocus_copy_region_blt; 416#else 417 screen->vtbl.blit_blt = NULL; 418 screen->vtbl.copy_region_blt = NULL; 419#endif 420} 421