1/* 2 * Copyright 2015 Philip Taylor <philip@zaynar.co.uk> 3 * Copyright 2018 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22 * DEALINGS IN THE SOFTWARE. 23 */ 24 25/** 26 * \file texcompress_astc.c 27 * 28 * Decompression code for GL_KHR_texture_compression_astc_ldr, which is just 29 * ASTC 2D LDR. 30 * 31 * The ASTC 2D LDR decoder (without the sRGB part) was copied from the OASTC 32 * library written by Philip Taylor. I added sRGB support and adjusted it for 33 * Mesa. - Marek 34 */ 35 36#include "texcompress_astc.h" 37#include "macros.h" 38#include "util/half_float.h" 39#include <stdio.h> 40#include <cstdlib> // for abort() on windows 41 42static bool VERBOSE_DECODE = false; 43static bool VERBOSE_WRITE = false; 44 45class decode_error 46{ 47public: 48 enum type { 49 ok, 50 unsupported_hdr_void_extent, 51 reserved_block_mode_1, 52 reserved_block_mode_2, 53 dual_plane_and_too_many_partitions, 54 invalid_range_in_void_extent, 55 weight_grid_exceeds_block_size, 56 invalid_colour_endpoints_size, 57 invalid_colour_endpoints_count, 58 invalid_weight_bits, 59 invalid_num_weights, 60 }; 61}; 62 63 64struct cem_range { 65 uint8_t max; 66 uint8_t t, q, b; 67}; 68 69/* Based on the Color Unquantization Parameters table, 70 * plus the bit-only representations, sorted by increasing size 71 */ 72static cem_range cem_ranges[] = { 73 { 5, 1, 0, 1 }, 74 { 7, 0, 0, 3 }, 75 { 9, 0, 1, 1 }, 76 { 11, 1, 0, 2 }, 77 { 15, 0, 0, 4 }, 78 { 19, 0, 1, 2 }, 79 { 23, 1, 0, 3 }, 80 { 31, 0, 0, 5 }, 81 { 39, 0, 1, 3 }, 82 { 47, 1, 0, 4 }, 83 { 63, 0, 0, 6 }, 84 { 79, 0, 1, 4 }, 85 { 95, 1, 0, 5 }, 86 { 127, 0, 0, 7 }, 87 { 159, 0, 1, 5 }, 88 { 191, 1, 0, 6 }, 89 { 255, 0, 0, 8 }, 90}; 91 92#define CAT_BITS_2(a, b) ( ((a) << 1) | (b) ) 93#define CAT_BITS_3(a, b, c) ( ((a) << 2) | ((b) << 1) | (c) ) 94#define CAT_BITS_4(a, b, c, d) ( ((a) << 3) | ((b) << 2) | ((c) << 1) | (d) ) 95#define CAT_BITS_5(a, b, c, d, e) ( ((a) << 4) | ((b) << 3) | ((c) << 2) | ((d) << 1) | (e) ) 96 97/** 98 * Unpack 5n+8 bits from 'in' into 5 output values. 99 * If n <= 4 then T should be uint32_t, else it must be uint64_t. 100 */ 101template <typename T> 102static void unpack_trit_block(int n, T in, uint8_t *out) 103{ 104 assert(n <= 6); /* else output will overflow uint8_t */ 105 106 uint8_t T0 = (in >> (n)) & 0x1; 107 uint8_t T1 = (in >> (n+1)) & 0x1; 108 uint8_t T2 = (in >> (2*n+2)) & 0x1; 109 uint8_t T3 = (in >> (2*n+3)) & 0x1; 110 uint8_t T4 = (in >> (3*n+4)) & 0x1; 111 uint8_t T5 = (in >> (4*n+5)) & 0x1; 112 uint8_t T6 = (in >> (4*n+6)) & 0x1; 113 uint8_t T7 = (in >> (5*n+7)) & 0x1; 114 uint8_t mmask = (1 << n) - 1; 115 uint8_t m0 = (in >> (0)) & mmask; 116 uint8_t m1 = (in >> (n+2)) & mmask; 117 uint8_t m2 = (in >> (2*n+4)) & mmask; 118 uint8_t m3 = (in >> (3*n+5)) & mmask; 119 uint8_t m4 = (in >> (4*n+7)) & mmask; 120 121 uint8_t C; 122 uint8_t t4, t3, t2, t1, t0; 123 if (CAT_BITS_3(T4, T3, T2) == 0x7) { 124 C = CAT_BITS_5(T7, T6, T5, T1, T0); 125 t4 = t3 = 2; 126 } else { 127 C = CAT_BITS_5(T4, T3, T2, T1, T0); 128 if (CAT_BITS_2(T6, T5) == 0x3) { 129 t4 = 2; 130 t3 = T7; 131 } else { 132 t4 = T7; 133 t3 = CAT_BITS_2(T6, T5); 134 } 135 } 136 137 if ((C & 0x3) == 0x3) { 138 t2 = 2; 139 t1 = (C >> 4) & 0x1; 140 uint8_t C3 = (C >> 3) & 0x1; 141 uint8_t C2 = (C >> 2) & 0x1; 142 t0 = (C3 << 1) | (C2 & ~C3); 143 } else if (((C >> 2) & 0x3) == 0x3) { 144 t2 = 2; 145 t1 = 2; 146 t0 = C & 0x3; 147 } else { 148 t2 = (C >> 4) & 0x1; 149 t1 = (C >> 2) & 0x3; 150 uint8_t C1 = (C >> 1) & 0x1; 151 uint8_t C0 = (C >> 0) & 0x1; 152 t0 = (C1 << 1) | (C0 & ~C1); 153 } 154 155 out[0] = (t0 << n) | m0; 156 out[1] = (t1 << n) | m1; 157 out[2] = (t2 << n) | m2; 158 out[3] = (t3 << n) | m3; 159 out[4] = (t4 << n) | m4; 160} 161 162/** 163 * Unpack 3n+7 bits from 'in' into 3 output values 164 */ 165static void unpack_quint_block(int n, uint32_t in, uint8_t *out) 166{ 167 assert(n <= 5); /* else output will overflow uint8_t */ 168 169 uint8_t Q0 = (in >> (n)) & 0x1; 170 uint8_t Q1 = (in >> (n+1)) & 0x1; 171 uint8_t Q2 = (in >> (n+2)) & 0x1; 172 uint8_t Q3 = (in >> (2*n+3)) & 0x1; 173 uint8_t Q4 = (in >> (2*n+4)) & 0x1; 174 uint8_t Q5 = (in >> (3*n+5)) & 0x1; 175 uint8_t Q6 = (in >> (3*n+6)) & 0x1; 176 uint8_t mmask = (1 << n) - 1; 177 uint8_t m0 = (in >> (0)) & mmask; 178 uint8_t m1 = (in >> (n+3)) & mmask; 179 uint8_t m2 = (in >> (2*n+5)) & mmask; 180 181 uint8_t C; 182 uint8_t q2, q1, q0; 183 if (CAT_BITS_4(Q6, Q5, Q2, Q1) == 0x3) { 184 q2 = CAT_BITS_3(Q0, Q4 & ~Q0, Q3 & ~Q0); 185 q1 = 4; 186 q0 = 4; 187 } else { 188 if (CAT_BITS_2(Q2, Q1) == 0x3) { 189 q2 = 4; 190 C = CAT_BITS_5(Q4, Q3, 0x1 & ~Q6, 0x1 & ~Q5, Q0); 191 } else { 192 q2 = CAT_BITS_2(Q6, Q5); 193 C = CAT_BITS_5(Q4, Q3, Q2, Q1, Q0); 194 } 195 if ((C & 0x7) == 0x5) { 196 q1 = 4; 197 q0 = (C >> 3) & 0x3; 198 } else { 199 q1 = (C >> 3) & 0x3; 200 q0 = C & 0x7; 201 } 202 } 203 out[0] = (q0 << n) | m0; 204 out[1] = (q1 << n) | m1; 205 out[2] = (q2 << n) | m2; 206} 207 208 209struct uint8x4_t 210{ 211 uint8_t v[4]; 212 213 uint8x4_t() { } 214 215 uint8x4_t(int a, int b, int c, int d) 216 { 217 assert(0 <= a && a <= 255); 218 assert(0 <= b && b <= 255); 219 assert(0 <= c && c <= 255); 220 assert(0 <= d && d <= 255); 221 v[0] = a; 222 v[1] = b; 223 v[2] = c; 224 v[3] = d; 225 } 226 227 static uint8x4_t clamped(int a, int b, int c, int d) 228 { 229 uint8x4_t r; 230 r.v[0] = MAX2(0, MIN2(255, a)); 231 r.v[1] = MAX2(0, MIN2(255, b)); 232 r.v[2] = MAX2(0, MIN2(255, c)); 233 r.v[3] = MAX2(0, MIN2(255, d)); 234 return r; 235 } 236}; 237 238static uint8x4_t blue_contract(int r, int g, int b, int a) 239{ 240 return uint8x4_t((r+b) >> 1, (g+b) >> 1, b, a); 241} 242 243static uint8x4_t blue_contract_clamped(int r, int g, int b, int a) 244{ 245 return uint8x4_t::clamped((r+b) >> 1, (g+b) >> 1, b, a); 246} 247 248static void bit_transfer_signed(int &a, int &b) 249{ 250 b >>= 1; 251 b |= a & 0x80; 252 a >>= 1; 253 a &= 0x3f; 254 if (a & 0x20) 255 a -= 0x40; 256} 257 258static uint32_t hash52(uint32_t p) 259{ 260 p ^= p >> 15; 261 p -= p << 17; 262 p += p << 7; 263 p += p << 4; 264 p ^= p >> 5; 265 p += p << 16; 266 p ^= p >> 7; 267 p ^= p >> 3; 268 p ^= p << 6; 269 p ^= p >> 17; 270 return p; 271} 272 273static int select_partition(int seed, int x, int y, int z, int partitioncount, 274 int small_block) 275{ 276 if (small_block) { 277 x <<= 1; 278 y <<= 1; 279 z <<= 1; 280 } 281 seed += (partitioncount - 1) * 1024; 282 uint32_t rnum = hash52(seed); 283 uint8_t seed1 = rnum & 0xF; 284 uint8_t seed2 = (rnum >> 4) & 0xF; 285 uint8_t seed3 = (rnum >> 8) & 0xF; 286 uint8_t seed4 = (rnum >> 12) & 0xF; 287 uint8_t seed5 = (rnum >> 16) & 0xF; 288 uint8_t seed6 = (rnum >> 20) & 0xF; 289 uint8_t seed7 = (rnum >> 24) & 0xF; 290 uint8_t seed8 = (rnum >> 28) & 0xF; 291 uint8_t seed9 = (rnum >> 18) & 0xF; 292 uint8_t seed10 = (rnum >> 22) & 0xF; 293 uint8_t seed11 = (rnum >> 26) & 0xF; 294 uint8_t seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF; 295 296 seed1 *= seed1; 297 seed2 *= seed2; 298 seed3 *= seed3; 299 seed4 *= seed4; 300 seed5 *= seed5; 301 seed6 *= seed6; 302 seed7 *= seed7; 303 seed8 *= seed8; 304 seed9 *= seed9; 305 seed10 *= seed10; 306 seed11 *= seed11; 307 seed12 *= seed12; 308 309 int sh1, sh2, sh3; 310 if (seed & 1) { 311 sh1 = (seed & 2 ? 4 : 5); 312 sh2 = (partitioncount == 3 ? 6 : 5); 313 } else { 314 sh1 = (partitioncount == 3 ? 6 : 5); 315 sh2 = (seed & 2 ? 4 : 5); 316 } 317 sh3 = (seed & 0x10) ? sh1 : sh2; 318 319 seed1 >>= sh1; 320 seed2 >>= sh2; 321 seed3 >>= sh1; 322 seed4 >>= sh2; 323 seed5 >>= sh1; 324 seed6 >>= sh2; 325 seed7 >>= sh1; 326 seed8 >>= sh2; 327 seed9 >>= sh3; 328 seed10 >>= sh3; 329 seed11 >>= sh3; 330 seed12 >>= sh3; 331 332 int a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14); 333 int b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10); 334 int c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6); 335 int d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2); 336 337 a &= 0x3F; 338 b &= 0x3F; 339 c &= 0x3F; 340 d &= 0x3F; 341 342 if (partitioncount < 4) 343 d = 0; 344 if (partitioncount < 3) 345 c = 0; 346 347 if (a >= b && a >= c && a >= d) 348 return 0; 349 else if (b >= c && b >= d) 350 return 1; 351 else if (c >= d) 352 return 2; 353 else 354 return 3; 355} 356 357 358struct InputBitVector 359{ 360 uint32_t data[4]; 361 362 void printf_bits(int offset, int count, const char *fmt = "", ...) 363 { 364 char out[129]; 365 memset(out, '.', 128); 366 out[128] = '\0'; 367 int idx = offset; 368 for (int i = 0; i < count; ++i) { 369 out[127 - idx] = ((data[idx >> 5] >> (idx & 31)) & 1) ? '1' : '0'; 370 ++idx; 371 } 372 printf("%s ", out); 373 va_list ap; 374 va_start(ap, fmt); 375 vprintf(fmt, ap); 376 va_end(ap); 377 printf("\n"); 378 } 379 380 uint32_t get_bits(int offset, int count) 381 { 382 assert(count >= 0 && count < 32); 383 384 uint32_t out = 0; 385 if (offset < 32) 386 out |= data[0] >> offset; 387 388 if (0 < offset && offset <= 32) 389 out |= data[1] << (32 - offset); 390 if (32 < offset && offset < 64) 391 out |= data[1] >> (offset - 32); 392 393 if (32 < offset && offset <= 64) 394 out |= data[2] << (64 - offset); 395 if (64 < offset && offset < 96) 396 out |= data[2] >> (offset - 64); 397 398 if (64 < offset && offset <= 96) 399 out |= data[3] << (96 - offset); 400 if (96 < offset && offset < 128) 401 out |= data[3] >> (offset - 96); 402 403 out &= (1 << count) - 1; 404 return out; 405 } 406 407 uint64_t get_bits64(int offset, int count) 408 { 409 assert(count >= 0 && count < 64); 410 411 uint64_t out = 0; 412 if (offset < 32) 413 out |= data[0] >> offset; 414 415 if (offset <= 32) 416 out |= (uint64_t)data[1] << (32 - offset); 417 if (32 < offset && offset < 64) 418 out |= data[1] >> (offset - 32); 419 420 if (0 < offset && offset <= 64) 421 out |= (uint64_t)data[2] << (64 - offset); 422 if (64 < offset && offset < 96) 423 out |= data[2] >> (offset - 64); 424 425 if (32 < offset && offset <= 96) 426 out |= (uint64_t)data[3] << (96 - offset); 427 if (96 < offset && offset < 128) 428 out |= data[3] >> (offset - 96); 429 430 out &= ((uint64_t)1 << count) - 1; 431 return out; 432 } 433 434 uint32_t get_bits_rev(int offset, int count) 435 { 436 assert(offset >= count); 437 uint32_t tmp = get_bits(offset - count, count); 438 uint32_t out = 0; 439 for (int i = 0; i < count; ++i) 440 out |= ((tmp >> i) & 1) << (count - 1 - i); 441 return out; 442 } 443}; 444 445struct OutputBitVector 446{ 447 uint32_t data[4]; 448 int offset; 449 450 OutputBitVector() 451 : offset(0) 452 { 453 memset(data, 0, sizeof(data)); 454 } 455 456 void append(uint32_t value, int size) 457 { 458 if (VERBOSE_WRITE) 459 printf("append offset=%d size=%d values=0x%x\n", offset, size, value); 460 461 assert(offset + size <= 128); 462 463 assert(size <= 32); 464 if (size < 32) 465 assert((value >> size) == 0); 466 467 while (size) { 468 int c = MIN2(size, 32 - (offset & 31)); 469 data[offset >> 5] |= (value << (offset & 31)); 470 offset += c; 471 size -= c; 472 value >>= c; 473 } 474 } 475 476 void append64(uint64_t value, int size) 477 { 478 if (VERBOSE_WRITE) 479 printf("append offset=%d size=%d values=0x%llx\n", offset, size, (unsigned long long)value); 480 481 assert(offset + size <= 128); 482 483 assert(size <= 64); 484 if (size < 64) 485 assert((value >> size) == 0); 486 487 while (size) { 488 int c = MIN2(size, 32 - (offset & 31)); 489 data[offset >> 5] |= (value << (offset & 31)); 490 offset += c; 491 size -= c; 492 value >>= c; 493 } 494 } 495 496 void append(OutputBitVector &v, int size) 497 { 498 if (VERBOSE_WRITE) 499 printf("append vector offset=%d size=%d\n", offset, size); 500 501 assert(offset + size <= 128); 502 int i = 0; 503 while (size >= 32) { 504 append(v.data[i++], 32); 505 size -= 32; 506 } 507 if (size > 0) 508 append(v.data[i] & ((1 << size) - 1), size); 509 } 510 511 void append_end(OutputBitVector &v, int size) 512 { 513 for (int i = 0; i < size; ++i) 514 data[(127 - i) >> 5] |= ((v.data[i >> 5] >> (i & 31)) & 1) << ((127 - i) & 31); 515 } 516 517 /* Insert the given number of '1' bits. (We could use 0s instead, but 1s are 518 * more likely to flush out bugs where we accidentally read undefined bits.) 519 */ 520 void skip(int size) 521 { 522 if (VERBOSE_WRITE) 523 printf("skip offset=%d size=%d\n", offset, size); 524 525 assert(offset + size <= 128); 526 while (size >= 32) { 527 append(0xffffffff, 32); 528 size -= 32; 529 } 530 if (size > 0) 531 append(0xffffffff >> (32 - size), size); 532 } 533}; 534 535 536class Decoder 537{ 538public: 539 Decoder(int block_w, int block_h, int block_d, bool srgb, bool output_unorm8) 540 : block_w(block_w), block_h(block_h), block_d(block_d), srgb(srgb), 541 output_unorm8(output_unorm8) {} 542 543 decode_error::type decode(const uint8_t *in, uint16_t *output) const; 544 545 int block_w, block_h, block_d; 546 bool srgb, output_unorm8; 547}; 548 549struct Block 550{ 551 bool is_error; 552 bool bogus_colour_endpoints; 553 bool bogus_weights; 554 555 int high_prec; 556 int dual_plane; 557 int colour_component_selector; 558 int wt_range; 559 int wt_w, wt_h, wt_d; 560 int num_parts; 561 int partition_index; 562 563 bool is_void_extent; 564 int void_extent_d; 565 int void_extent_min_s; 566 int void_extent_max_s; 567 int void_extent_min_t; 568 int void_extent_max_t; 569 uint16_t void_extent_colour_r; 570 uint16_t void_extent_colour_g; 571 uint16_t void_extent_colour_b; 572 uint16_t void_extent_colour_a; 573 574 bool is_multi_cem; 575 int num_extra_cem_bits; 576 int colour_endpoint_data_offset; 577 int extra_cem_bits; 578 int cem_base_class; 579 int cems[4]; 580 581 int num_cem_values; 582 583 /* Calculated by unpack_weights(): */ 584 uint8_t weights_quant[64 + 4]; /* max 64 values, plus padding for overflows in trit parsing */ 585 586 /* Calculated by unquantise_weights(): */ 587 uint8_t weights[64 + 18]; /* max 64 values, plus padding for the infill interpolation */ 588 589 /* Calculated by unpack_colour_endpoints(): */ 590 uint8_t colour_endpoints_quant[18 + 4]; /* max 18 values, plus padding for overflows in trit parsing */ 591 592 /* Calculated by unquantise_colour_endpoints(): */ 593 uint8_t colour_endpoints[18]; 594 595 /* Calculated by calculate_from_weights(): */ 596 int wt_trits; 597 int wt_quints; 598 int wt_bits; 599 int wt_max; 600 int num_weights; 601 int weight_bits; 602 603 /* Calculated by calculate_remaining_bits(): */ 604 int remaining_bits; 605 606 /* Calculated by calculate_colour_endpoints_size(): */ 607 int colour_endpoint_bits; 608 int ce_max; 609 int ce_trits; 610 int ce_quints; 611 int ce_bits; 612 613 /* Calculated by compute_infill_weights(); */ 614 uint8_t infill_weights[2][216]; /* large enough for 6x6x6 */ 615 616 /* Calculated by decode_colour_endpoints(); */ 617 uint8x4_t endpoints_decoded[2][4]; 618 619 void calculate_from_weights(); 620 void calculate_remaining_bits(); 621 decode_error::type calculate_colour_endpoints_size(); 622 623 void unquantise_weights(); 624 void unquantise_colour_endpoints(); 625 626 decode_error::type decode(const Decoder &decoder, InputBitVector in); 627 628 decode_error::type decode_block_mode(InputBitVector in); 629 decode_error::type decode_void_extent(InputBitVector in); 630 void decode_cem(InputBitVector in); 631 void unpack_colour_endpoints(InputBitVector in); 632 void decode_colour_endpoints(); 633 void unpack_weights(InputBitVector in); 634 void compute_infill_weights(int block_w, int block_h, int block_d); 635 636 void write_decoded(const Decoder &decoder, uint16_t *output); 637}; 638 639 640decode_error::type Decoder::decode(const uint8_t *in, uint16_t *output) const 641{ 642 Block blk; 643 InputBitVector in_vec; 644 memcpy(&in_vec.data, in, 16); 645 decode_error::type err = blk.decode(*this, in_vec); 646 if (err == decode_error::ok) { 647 blk.write_decoded(*this, output); 648 } else { 649 /* Fill output with the error colour */ 650 for (int i = 0; i < block_w * block_h * block_d; ++i) { 651 if (output_unorm8) { 652 output[i*4+0] = 0xff; 653 output[i*4+1] = 0; 654 output[i*4+2] = 0xff; 655 output[i*4+3] = 0xff; 656 } else { 657 assert(!srgb); /* srgb must use unorm8 */ 658 659 output[i*4+0] = FP16_ONE; 660 output[i*4+1] = FP16_ZERO; 661 output[i*4+2] = FP16_ONE; 662 output[i*4+3] = FP16_ONE; 663 } 664 } 665 } 666 return err; 667} 668 669 670decode_error::type Block::decode_void_extent(InputBitVector block) 671{ 672 /* TODO: 3D */ 673 674 is_void_extent = true; 675 void_extent_d = block.get_bits(9, 1); 676 void_extent_min_s = block.get_bits(12, 13); 677 void_extent_max_s = block.get_bits(25, 13); 678 void_extent_min_t = block.get_bits(38, 13); 679 void_extent_max_t = block.get_bits(51, 13); 680 void_extent_colour_r = block.get_bits(64, 16); 681 void_extent_colour_g = block.get_bits(80, 16); 682 void_extent_colour_b = block.get_bits(96, 16); 683 void_extent_colour_a = block.get_bits(112, 16); 684 685 /* TODO: maybe we should do something useful with the extent coordinates? */ 686 687 if (void_extent_d) { 688 return decode_error::unsupported_hdr_void_extent; 689 } 690 691 if (void_extent_min_s == 0x1fff && void_extent_max_s == 0x1fff 692 && void_extent_min_t == 0x1fff && void_extent_max_t == 0x1fff) { 693 694 /* No extents */ 695 696 } else { 697 698 /* Check for illegal encoding */ 699 if (void_extent_min_s >= void_extent_max_s || void_extent_min_t >= void_extent_max_t) { 700 return decode_error::invalid_range_in_void_extent; 701 } 702 } 703 704 return decode_error::ok; 705} 706 707decode_error::type Block::decode_block_mode(InputBitVector in) 708{ 709 dual_plane = in.get_bits(10, 1); 710 high_prec = in.get_bits(9, 1); 711 712 if (in.get_bits(0, 2) != 0x0) { 713 wt_range = (in.get_bits(0, 2) << 1) | in.get_bits(4, 1); 714 int a = in.get_bits(5, 2); 715 int b = in.get_bits(7, 2); 716 switch (in.get_bits(2, 2)) { 717 case 0x0: 718 if (VERBOSE_DECODE) 719 in.printf_bits(0, 11, "DHBBAAR00RR"); 720 wt_w = b + 4; 721 wt_h = a + 2; 722 break; 723 case 0x1: 724 if (VERBOSE_DECODE) 725 in.printf_bits(0, 11, "DHBBAAR01RR"); 726 wt_w = b + 8; 727 wt_h = a + 2; 728 break; 729 case 0x2: 730 if (VERBOSE_DECODE) 731 in.printf_bits(0, 11, "DHBBAAR10RR"); 732 wt_w = a + 2; 733 wt_h = b + 8; 734 break; 735 case 0x3: 736 if ((b & 0x2) == 0) { 737 if (VERBOSE_DECODE) 738 in.printf_bits(0, 11, "DH0BAAR11RR"); 739 wt_w = a + 2; 740 wt_h = b + 6; 741 } else { 742 if (VERBOSE_DECODE) 743 in.printf_bits(0, 11, "DH1BAAR11RR"); 744 wt_w = (b & 0x1) + 2; 745 wt_h = a + 2; 746 } 747 break; 748 } 749 } else { 750 if (in.get_bits(6, 3) == 0x7) { 751 if (in.get_bits(0, 9) == 0x1fc) { 752 if (VERBOSE_DECODE) 753 in.printf_bits(0, 11, "xx111111100 (void extent)"); 754 return decode_void_extent(in); 755 } else { 756 if (VERBOSE_DECODE) 757 in.printf_bits(0, 11, "xx111xxxx00"); 758 return decode_error::reserved_block_mode_1; 759 } 760 } 761 if (in.get_bits(0, 4) == 0x0) { 762 if (VERBOSE_DECODE) 763 in.printf_bits(0, 11, "xxxxxxx0000"); 764 return decode_error::reserved_block_mode_2; 765 } 766 767 wt_range = in.get_bits(1, 3) | in.get_bits(4, 1); 768 int a = in.get_bits(5, 2); 769 int b; 770 771 switch (in.get_bits(7, 2)) { 772 case 0x0: 773 if (VERBOSE_DECODE) 774 in.printf_bits(0, 11, "DH00AARRR00"); 775 wt_w = 12; 776 wt_h = a + 2; 777 break; 778 case 0x1: 779 if (VERBOSE_DECODE) 780 in.printf_bits(0, 11, "DH01AARRR00"); 781 wt_w = a + 2; 782 wt_h = 12; 783 break; 784 case 0x3: 785 if (in.get_bits(5, 1) == 0) { 786 if (VERBOSE_DECODE) 787 in.printf_bits(0, 11, "DH1100RRR00"); 788 wt_w = 6; 789 wt_h = 10; 790 } else { 791 if (VERBOSE_DECODE) 792 in.printf_bits(0, 11, "DH1101RRR00"); 793 wt_w = 10; 794 wt_h = 6; 795 } 796 break; 797 case 0x2: 798 if (VERBOSE_DECODE) 799 in.printf_bits(0, 11, "BB10AARRR00"); 800 b = in.get_bits(9, 2); 801 wt_w = a + 6; 802 wt_h = b + 6; 803 dual_plane = 0; 804 high_prec = 0; 805 break; 806 } 807 } 808 return decode_error::ok; 809} 810 811void Block::decode_cem(InputBitVector in) 812{ 813 cems[0] = cems[1] = cems[2] = cems[3] = -1; 814 815 num_extra_cem_bits = 0; 816 extra_cem_bits = 0; 817 818 if (num_parts > 1) { 819 820 partition_index = in.get_bits(13, 10); 821 if (VERBOSE_DECODE) 822 in.printf_bits(13, 10, "partition ID (%d)", partition_index); 823 824 uint32_t cem = in.get_bits(23, 6); 825 826 if ((cem & 0x3) == 0x0) { 827 cem >>= 2; 828 cem_base_class = cem >> 2; 829 is_multi_cem = false; 830 831 for (int i = 0; i < num_parts; ++i) 832 cems[i] = cem; 833 834 if (VERBOSE_DECODE) 835 in.printf_bits(23, 6, "CEM (single, %d)", cem); 836 } else { 837 838 cem_base_class = (cem & 0x3) - 1; 839 is_multi_cem = true; 840 841 if (VERBOSE_DECODE) 842 in.printf_bits(23, 6, "CEM (multi, base class %d)", cem_base_class); 843 844 int offset = 128 - weight_bits; 845 846 if (num_parts == 2) { 847 if (VERBOSE_DECODE) { 848 in.printf_bits(25, 4, "M0M0 C1 C0"); 849 in.printf_bits(offset - 2, 2, "M1M1"); 850 } 851 852 uint32_t c0 = in.get_bits(25, 1); 853 uint32_t c1 = in.get_bits(26, 1); 854 855 extra_cem_bits = c0 + c1; 856 857 num_extra_cem_bits = 2; 858 859 uint32_t m0 = in.get_bits(27, 2); 860 uint32_t m1 = in.get_bits(offset - 2, 2); 861 862 cems[0] = ((cem_base_class + c0) << 2) | m0; 863 cems[1] = ((cem_base_class + c1) << 2) | m1; 864 865 } else if (num_parts == 3) { 866 if (VERBOSE_DECODE) { 867 in.printf_bits(25, 4, "M0 C2 C1 C0"); 868 in.printf_bits(offset - 5, 5, "M2M2 M1M1 M0"); 869 } 870 871 uint32_t c0 = in.get_bits(25, 1); 872 uint32_t c1 = in.get_bits(26, 1); 873 uint32_t c2 = in.get_bits(27, 1); 874 875 extra_cem_bits = c0 + c1 + c2; 876 877 num_extra_cem_bits = 5; 878 879 uint32_t m0 = in.get_bits(28, 1) | (in.get_bits(128 - weight_bits - 5, 1) << 1); 880 uint32_t m1 = in.get_bits(offset - 4, 2); 881 uint32_t m2 = in.get_bits(offset - 2, 2); 882 883 cems[0] = ((cem_base_class + c0) << 2) | m0; 884 cems[1] = ((cem_base_class + c1) << 2) | m1; 885 cems[2] = ((cem_base_class + c2) << 2) | m2; 886 887 } else if (num_parts == 4) { 888 if (VERBOSE_DECODE) { 889 in.printf_bits(25, 4, "C3 C2 C1 C0"); 890 in.printf_bits(offset - 8, 8, "M3M3 M2M2 M1M1 M0M0"); 891 } 892 893 uint32_t c0 = in.get_bits(25, 1); 894 uint32_t c1 = in.get_bits(26, 1); 895 uint32_t c2 = in.get_bits(27, 1); 896 uint32_t c3 = in.get_bits(28, 1); 897 898 extra_cem_bits = c0 + c1 + c2 + c3; 899 900 num_extra_cem_bits = 8; 901 902 uint32_t m0 = in.get_bits(offset - 8, 2); 903 uint32_t m1 = in.get_bits(offset - 6, 2); 904 uint32_t m2 = in.get_bits(offset - 4, 2); 905 uint32_t m3 = in.get_bits(offset - 2, 2); 906 907 cems[0] = ((cem_base_class + c0) << 2) | m0; 908 cems[1] = ((cem_base_class + c1) << 2) | m1; 909 cems[2] = ((cem_base_class + c2) << 2) | m2; 910 cems[3] = ((cem_base_class + c3) << 2) | m3; 911 } else { 912 unreachable(""); 913 } 914 } 915 916 colour_endpoint_data_offset = 29; 917 918 } else { 919 uint32_t cem = in.get_bits(13, 4); 920 921 cem_base_class = cem >> 2; 922 is_multi_cem = false; 923 924 cems[0] = cem; 925 926 partition_index = -1; 927 928 if (VERBOSE_DECODE) 929 in.printf_bits(13, 4, "CEM = %d (class %d)", cem, cem_base_class); 930 931 colour_endpoint_data_offset = 17; 932 } 933} 934 935void Block::unpack_colour_endpoints(InputBitVector in) 936{ 937 if (ce_trits) { 938 int offset = colour_endpoint_data_offset; 939 int bits_left = colour_endpoint_bits; 940 for (int i = 0; i < num_cem_values; i += 5) { 941 int bits_to_read = MIN2(bits_left, 8 + ce_bits * 5); 942 /* If ce_trits then ce_bits <= 6, so bits_to_read <= 38 and we have to use uint64_t */ 943 uint64_t raw = in.get_bits64(offset, bits_to_read); 944 unpack_trit_block(ce_bits, raw, &colour_endpoints_quant[i]); 945 946 if (VERBOSE_DECODE) 947 in.printf_bits(offset, bits_to_read, 948 "trits [%d,%d,%d,%d,%d]", 949 colour_endpoints_quant[i+0], colour_endpoints_quant[i+1], 950 colour_endpoints_quant[i+2], colour_endpoints_quant[i+3], 951 colour_endpoints_quant[i+4]); 952 953 offset += 8 + ce_bits * 5; 954 bits_left -= 8 + ce_bits * 5; 955 } 956 } else if (ce_quints) { 957 int offset = colour_endpoint_data_offset; 958 int bits_left = colour_endpoint_bits; 959 for (int i = 0; i < num_cem_values; i += 3) { 960 int bits_to_read = MIN2(bits_left, 7 + ce_bits * 3); 961 /* If ce_quints then ce_bits <= 5, so bits_to_read <= 22 and we can use uint32_t */ 962 uint32_t raw = in.get_bits(offset, bits_to_read); 963 unpack_quint_block(ce_bits, raw, &colour_endpoints_quant[i]); 964 965 if (VERBOSE_DECODE) 966 in.printf_bits(offset, bits_to_read, 967 "quints [%d,%d,%d]", 968 colour_endpoints_quant[i], colour_endpoints_quant[i+1], colour_endpoints_quant[i+2]); 969 970 offset += 7 + ce_bits * 3; 971 bits_left -= 7 + ce_bits * 3; 972 } 973 } else { 974 assert((colour_endpoint_bits % ce_bits) == 0); 975 int offset = colour_endpoint_data_offset; 976 for (int i = 0; i < num_cem_values; i++) { 977 colour_endpoints_quant[i] = in.get_bits(offset, ce_bits); 978 979 if (VERBOSE_DECODE) 980 in.printf_bits(offset, ce_bits, "bits [%d]", colour_endpoints_quant[i]); 981 982 offset += ce_bits; 983 } 984 } 985} 986 987void Block::decode_colour_endpoints() 988{ 989 int cem_values_idx = 0; 990 for (int part = 0; part < num_parts; ++part) { 991 uint8_t *v = &colour_endpoints[cem_values_idx]; 992 int v0 = v[0]; 993 int v1 = v[1]; 994 int v2 = v[2]; 995 int v3 = v[3]; 996 int v4 = v[4]; 997 int v5 = v[5]; 998 int v6 = v[6]; 999 int v7 = v[7]; 1000 cem_values_idx += ((cems[part] >> 2) + 1) * 2; 1001 1002 uint8x4_t e0, e1; 1003 int s0, s1, L0, L1; 1004 1005 switch (cems[part]) 1006 { 1007 case 0: 1008 e0 = uint8x4_t(v0, v0, v0, 0xff); 1009 e1 = uint8x4_t(v1, v1, v1, 0xff); 1010 break; 1011 case 1: 1012 L0 = (v0 >> 2) | (v1 & 0xc0); 1013 L1 = L0 + (v1 & 0x3f); 1014 if (L1 > 0xff) 1015 L1 = 0xff; 1016 e0 = uint8x4_t(L0, L0, L0, 0xff); 1017 e1 = uint8x4_t(L1, L1, L1, 0xff); 1018 break; 1019 case 4: 1020 e0 = uint8x4_t(v0, v0, v0, v2); 1021 e1 = uint8x4_t(v1, v1, v1, v3); 1022 break; 1023 case 5: 1024 bit_transfer_signed(v1, v0); 1025 bit_transfer_signed(v3, v2); 1026 e0 = uint8x4_t(v0, v0, v0, v2); 1027 e1 = uint8x4_t::clamped(v0+v1, v0+v1, v0+v1, v2+v3); 1028 break; 1029 case 6: 1030 e0 = uint8x4_t(v0*v3 >> 8, v1*v3 >> 8, v2*v3 >> 8, 0xff); 1031 e1 = uint8x4_t(v0, v1, v2, 0xff); 1032 break; 1033 case 8: 1034 s0 = v0 + v2 + v4; 1035 s1 = v1 + v3 + v5; 1036 if (s1 >= s0) { 1037 e0 = uint8x4_t(v0, v2, v4, 0xff); 1038 e1 = uint8x4_t(v1, v3, v5, 0xff); 1039 } else { 1040 e0 = blue_contract(v1, v3, v5, 0xff); 1041 e1 = blue_contract(v0, v2, v4, 0xff); 1042 } 1043 break; 1044 case 9: 1045 bit_transfer_signed(v1, v0); 1046 bit_transfer_signed(v3, v2); 1047 bit_transfer_signed(v5, v4); 1048 if (v1 + v3 + v5 >= 0) { 1049 e0 = uint8x4_t(v0, v2, v4, 0xff); 1050 e1 = uint8x4_t::clamped(v0+v1, v2+v3, v4+v5, 0xff); 1051 } else { 1052 e0 = blue_contract_clamped(v0+v1, v2+v3, v4+v5, 0xff); 1053 e1 = blue_contract(v0, v2, v4, 0xff); 1054 } 1055 break; 1056 case 10: 1057 e0 = uint8x4_t(v0*v3 >> 8, v1*v3 >> 8, v2*v3 >> 8, v4); 1058 e1 = uint8x4_t(v0, v1, v2, v5); 1059 break; 1060 case 12: 1061 s0 = v0 + v2 + v4; 1062 s1 = v1 + v3 + v5; 1063 if (s1 >= s0) { 1064 e0 = uint8x4_t(v0, v2, v4, v6); 1065 e1 = uint8x4_t(v1, v3, v5, v7); 1066 } else { 1067 e0 = blue_contract(v1, v3, v5, v7); 1068 e1 = blue_contract(v0, v2, v4, v6); 1069 } 1070 break; 1071 case 13: 1072 bit_transfer_signed(v1, v0); 1073 bit_transfer_signed(v3, v2); 1074 bit_transfer_signed(v5, v4); 1075 bit_transfer_signed(v7, v6); 1076 if (v1 + v3 + v5 >= 0) { 1077 e0 = uint8x4_t(v0, v2, v4, v6); 1078 e1 = uint8x4_t::clamped(v0+v1, v2+v3, v4+v5, v6+v7); 1079 } else { 1080 e0 = blue_contract_clamped(v0+v1, v2+v3, v4+v5, v6+v7); 1081 e1 = blue_contract(v0, v2, v4, v6); 1082 } 1083 break; 1084 default: 1085 /* HDR endpoints not supported; return error colour */ 1086 e0 = uint8x4_t(255, 0, 255, 255); 1087 e1 = uint8x4_t(255, 0, 255, 255); 1088 break; 1089 } 1090 1091 endpoints_decoded[0][part] = e0; 1092 endpoints_decoded[1][part] = e1; 1093 1094 if (VERBOSE_DECODE) { 1095 printf("cems[%d]=%d v=[", part, cems[part]); 1096 for (int i = 0; i < (cems[part] >> 2) + 1; ++i) { 1097 if (i) 1098 printf(", "); 1099 printf("%3d", v[i]); 1100 } 1101 printf("] e0=[%3d,%4d,%4d,%4d] e1=[%3d,%4d,%4d,%4d]\n", 1102 e0.v[0], e0.v[1], e0.v[2], e0.v[3], 1103 e1.v[0], e1.v[1], e1.v[2], e1.v[3]); 1104 } 1105 } 1106} 1107 1108void Block::unpack_weights(InputBitVector in) 1109{ 1110 if (wt_trits) { 1111 int offset = 128; 1112 int bits_left = weight_bits; 1113 for (int i = 0; i < num_weights; i += 5) { 1114 int bits_to_read = MIN2(bits_left, 8 + 5*wt_bits); 1115 /* If wt_trits then wt_bits <= 3, so bits_to_read <= 23 and we can use uint32_t */ 1116 uint32_t raw = in.get_bits_rev(offset, bits_to_read); 1117 unpack_trit_block(wt_bits, raw, &weights_quant[i]); 1118 1119 if (VERBOSE_DECODE) 1120 in.printf_bits(offset - bits_to_read, bits_to_read, "weight trits [%d,%d,%d,%d,%d]", 1121 weights_quant[i+0], weights_quant[i+1], 1122 weights_quant[i+2], weights_quant[i+3], 1123 weights_quant[i+4]); 1124 1125 offset -= 8 + wt_bits * 5; 1126 bits_left -= 8 + wt_bits * 5; 1127 } 1128 1129 } else if (wt_quints) { 1130 1131 int offset = 128; 1132 int bits_left = weight_bits; 1133 for (int i = 0; i < num_weights; i += 3) { 1134 int bits_to_read = MIN2(bits_left, 7 + 3*wt_bits); 1135 /* If wt_quints then wt_bits <= 2, so bits_to_read <= 13 and we can use uint32_t */ 1136 uint32_t raw = in.get_bits_rev(offset, bits_to_read); 1137 unpack_quint_block(wt_bits, raw, &weights_quant[i]); 1138 1139 if (VERBOSE_DECODE) 1140 in.printf_bits(offset - bits_to_read, bits_to_read, "weight quints [%d,%d,%d]", 1141 weights_quant[i], weights_quant[i+1], weights_quant[i+2]); 1142 1143 offset -= 7 + wt_bits * 3; 1144 bits_left -= 7 + wt_bits * 3; 1145 } 1146 1147 } else { 1148 int offset = 128; 1149 assert((weight_bits % wt_bits) == 0); 1150 for (int i = 0; i < num_weights; ++i) { 1151 weights_quant[i] = in.get_bits_rev(offset, wt_bits); 1152 1153 if (VERBOSE_DECODE) 1154 in.printf_bits(offset - wt_bits, wt_bits, "weight bits [%d]", weights_quant[i]); 1155 1156 offset -= wt_bits; 1157 } 1158 } 1159} 1160 1161void Block::unquantise_weights() 1162{ 1163 assert(num_weights <= (int)ARRAY_SIZE(weights_quant)); 1164 assert(num_weights <= (int)ARRAY_SIZE(weights)); 1165 1166 memset(weights, 0, sizeof(weights)); 1167 1168 for (int i = 0; i < num_weights; ++i) { 1169 1170 uint8_t v = weights_quant[i]; 1171 uint8_t w; 1172 1173 if (wt_trits) { 1174 1175 if (wt_bits == 0) { 1176 w = v * 32; 1177 } else { 1178 uint8_t A, B, C, D; 1179 A = (v & 0x1) ? 0x7F : 0x00; 1180 switch (wt_bits) { 1181 case 1: 1182 B = 0; 1183 C = 50; 1184 D = v >> 1; 1185 break; 1186 case 2: 1187 B = (v & 0x2) ? 0x45 : 0x00; 1188 C = 23; 1189 D = v >> 2; 1190 break; 1191 case 3: 1192 B = ((v & 0x6) >> 1) | ((v & 0x6) << 4); 1193 C = 11; 1194 D = v >> 3; 1195 break; 1196 default: 1197 unreachable(""); 1198 } 1199 uint16_t T = D * C + B; 1200 T = T ^ A; 1201 T = (A & 0x20) | (T >> 2); 1202 assert(T < 64); 1203 if (T > 32) 1204 T++; 1205 w = T; 1206 } 1207 1208 } else if (wt_quints) { 1209 1210 if (wt_bits == 0) { 1211 w = v * 16; 1212 } else { 1213 uint8_t A, B, C, D; 1214 A = (v & 0x1) ? 0x7F : 0x00; 1215 switch (wt_bits) { 1216 case 1: 1217 B = 0; 1218 C = 28; 1219 D = v >> 1; 1220 break; 1221 case 2: 1222 B = (v & 0x2) ? 0x42 : 0x00; 1223 C = 13; 1224 D = v >> 2; 1225 break; 1226 default: 1227 unreachable(""); 1228 } 1229 uint16_t T = D * C + B; 1230 T = T ^ A; 1231 T = (A & 0x20) | (T >> 2); 1232 assert(T < 64); 1233 if (T > 32) 1234 T++; 1235 w = T; 1236 } 1237 weights[i] = w; 1238 1239 } else { 1240 1241 switch (wt_bits) { 1242 case 1: w = v ? 0x3F : 0x00; break; 1243 case 2: w = v | (v << 2) | (v << 4); break; 1244 case 3: w = v | (v << 3); break; 1245 case 4: w = (v >> 2) | (v << 2); break; 1246 case 5: w = (v >> 4) | (v << 1); break; 1247 default: unreachable(""); 1248 } 1249 assert(w < 64); 1250 if (w > 32) 1251 w++; 1252 } 1253 weights[i] = w; 1254 } 1255} 1256 1257void Block::compute_infill_weights(int block_w, int block_h, int block_d) 1258{ 1259 int Ds = block_w <= 1 ? 0 : (1024 + block_w / 2) / (block_w - 1); 1260 int Dt = block_h <= 1 ? 0 : (1024 + block_h / 2) / (block_h - 1); 1261 int Dr = block_d <= 1 ? 0 : (1024 + block_d / 2) / (block_d - 1); 1262 for (int r = 0; r < block_d; ++r) { 1263 for (int t = 0; t < block_h; ++t) { 1264 for (int s = 0; s < block_w; ++s) { 1265 int cs = Ds * s; 1266 int ct = Dt * t; 1267 int cr = Dr * r; 1268 int gs = (cs * (wt_w - 1) + 32) >> 6; 1269 int gt = (ct * (wt_h - 1) + 32) >> 6; 1270 int gr = (cr * (wt_d - 1) + 32) >> 6; 1271 assert(gs >= 0 && gs <= 176); 1272 assert(gt >= 0 && gt <= 176); 1273 assert(gr >= 0 && gr <= 176); 1274 int js = gs >> 4; 1275 int fs = gs & 0xf; 1276 int jt = gt >> 4; 1277 int ft = gt & 0xf; 1278 int jr = gr >> 4; 1279 int fr = gr & 0xf; 1280 1281 /* TODO: 3D */ 1282 (void)jr; 1283 (void)fr; 1284 1285 int w11 = (fs * ft + 8) >> 4; 1286 int w10 = ft - w11; 1287 int w01 = fs - w11; 1288 int w00 = 16 - fs - ft + w11; 1289 1290 if (dual_plane) { 1291 int p00, p01, p10, p11, i0, i1; 1292 int v0 = js + jt * wt_w; 1293 p00 = weights[(v0) * 2]; 1294 p01 = weights[(v0 + 1) * 2]; 1295 p10 = weights[(v0 + wt_w) * 2]; 1296 p11 = weights[(v0 + wt_w + 1) * 2]; 1297 i0 = (p00*w00 + p01*w01 + p10*w10 + p11*w11 + 8) >> 4; 1298 p00 = weights[(v0) * 2 + 1]; 1299 p01 = weights[(v0 + 1) * 2 + 1]; 1300 p10 = weights[(v0 + wt_w) * 2 + 1]; 1301 p11 = weights[(v0 + wt_w + 1) * 2 + 1]; 1302 assert((v0 + wt_w + 1) * 2 + 1 < (int)ARRAY_SIZE(weights)); 1303 i1 = (p00*w00 + p01*w01 + p10*w10 + p11*w11 + 8) >> 4; 1304 assert(0 <= i0 && i0 <= 64); 1305 infill_weights[0][s + t*block_w + r*block_w*block_h] = i0; 1306 infill_weights[1][s + t*block_w + r*block_w*block_h] = i1; 1307 } else { 1308 int p00, p01, p10, p11, i; 1309 int v0 = js + jt * wt_w; 1310 p00 = weights[v0]; 1311 p01 = weights[v0 + 1]; 1312 p10 = weights[v0 + wt_w]; 1313 p11 = weights[v0 + wt_w + 1]; 1314 assert(v0 + wt_w + 1 < (int)ARRAY_SIZE(weights)); 1315 i = (p00*w00 + p01*w01 + p10*w10 + p11*w11 + 8) >> 4; 1316 assert(0 <= i && i <= 64); 1317 infill_weights[0][s + t*block_w + r*block_w*block_h] = i; 1318 } 1319 } 1320 } 1321 } 1322} 1323 1324void Block::unquantise_colour_endpoints() 1325{ 1326 assert(num_cem_values <= (int)ARRAY_SIZE(colour_endpoints_quant)); 1327 assert(num_cem_values <= (int)ARRAY_SIZE(colour_endpoints)); 1328 1329 for (int i = 0; i < num_cem_values; ++i) { 1330 uint8_t v = colour_endpoints_quant[i]; 1331 1332 if (ce_trits) { 1333 uint16_t A, B, C, D; 1334 uint16_t t; 1335 A = (v & 0x1) ? 0x1FF : 0x000; 1336 switch (ce_bits) { 1337 case 1: 1338 B = 0; 1339 C = 204; 1340 D = v >> 1; 1341 break; 1342 case 2: 1343 B = (v & 0x2) ? 0x116 : 0x000; 1344 C = 93; 1345 D = v >> 2; 1346 break; 1347 case 3: 1348 t = ((v >> 1) & 0x3); 1349 B = t | (t << 2) | (t << 7); 1350 C = 44; 1351 D = v >> 3; 1352 break; 1353 case 4: 1354 t = ((v >> 1) & 0x7); 1355 B = t | (t << 6); 1356 C = 22; 1357 D = v >> 4; 1358 break; 1359 case 5: 1360 t = ((v >> 1) & 0xF); 1361 B = (t >> 2) | (t << 5); 1362 C = 11; 1363 D = v >> 5; 1364 break; 1365 case 6: 1366 B = ((v & 0x3E) << 3) | ((v >> 5) & 0x1); 1367 C = 5; 1368 D = v >> 6; 1369 break; 1370 default: 1371 unreachable(""); 1372 } 1373 uint16_t T = D * C + B; 1374 T = T ^ A; 1375 T = (A & 0x80) | (T >> 2); 1376 assert(T < 256); 1377 colour_endpoints[i] = T; 1378 } else if (ce_quints) { 1379 uint16_t A, B, C, D; 1380 uint16_t t; 1381 A = (v & 0x1) ? 0x1FF : 0x000; 1382 switch (ce_bits) { 1383 case 1: 1384 B = 0; 1385 C = 113; 1386 D = v >> 1; 1387 break; 1388 case 2: 1389 B = (v & 0x2) ? 0x10C : 0x000; 1390 C = 54; 1391 D = v >> 2; 1392 break; 1393 case 3: 1394 t = ((v >> 1) & 0x3); 1395 B = (t >> 1) | (t << 1) | (t << 7); 1396 C = 26; 1397 D = v >> 3; 1398 break; 1399 case 4: 1400 t = ((v >> 1) & 0x7); 1401 B = (t >> 1) | (t << 6); 1402 C = 13; 1403 D = v >> 4; 1404 break; 1405 case 5: 1406 t = ((v >> 1) & 0xF); 1407 B = (t >> 4) | (t << 5); 1408 C = 6; 1409 D = v >> 5; 1410 break; 1411 default: 1412 unreachable(""); 1413 } 1414 uint16_t T = D * C + B; 1415 T = T ^ A; 1416 T = (A & 0x80) | (T >> 2); 1417 assert(T < 256); 1418 colour_endpoints[i] = T; 1419 } else { 1420 switch (ce_bits) { 1421 case 1: v = v ? 0xFF : 0x00; break; 1422 case 2: v = (v << 6) | (v << 4) | (v << 2) | v; break; 1423 case 3: v = (v << 5) | (v << 2) | (v >> 1); break; 1424 case 4: v = (v << 4) | v; break; 1425 case 5: v = (v << 3) | (v >> 2); break; 1426 case 6: v = (v << 2) | (v >> 4); break; 1427 case 7: v = (v << 1) | (v >> 6); break; 1428 case 8: break; 1429 default: unreachable(""); 1430 } 1431 colour_endpoints[i] = v; 1432 } 1433 } 1434} 1435 1436decode_error::type Block::decode(const Decoder &decoder, InputBitVector in) 1437{ 1438 decode_error::type err; 1439 1440 is_error = false; 1441 bogus_colour_endpoints = false; 1442 bogus_weights = false; 1443 is_void_extent = false; 1444 1445 wt_d = 1; 1446 /* TODO: 3D */ 1447 1448 /* TODO: test for all the illegal encodings */ 1449 1450 if (VERBOSE_DECODE) 1451 in.printf_bits(0, 128); 1452 1453 err = decode_block_mode(in); 1454 if (err != decode_error::ok) 1455 return err; 1456 1457 if (is_void_extent) 1458 return decode_error::ok; 1459 1460 /* TODO: 3D */ 1461 1462 calculate_from_weights(); 1463 1464 if (VERBOSE_DECODE) 1465 printf("weights_grid=%dx%dx%d dual_plane=%d num_weights=%d high_prec=%d r=%d range=0..%d (%dt %dq %db) weight_bits=%d\n", 1466 wt_w, wt_h, wt_d, dual_plane, num_weights, high_prec, wt_range, wt_max, wt_trits, wt_quints, wt_bits, weight_bits); 1467 1468 if (wt_w > decoder.block_w || wt_h > decoder.block_h || wt_d > decoder.block_d) 1469 return decode_error::weight_grid_exceeds_block_size; 1470 1471 num_parts = in.get_bits(11, 2) + 1; 1472 1473 if (VERBOSE_DECODE) 1474 in.printf_bits(11, 2, "partitions = %d", num_parts); 1475 1476 if (dual_plane && num_parts > 3) 1477 return decode_error::dual_plane_and_too_many_partitions; 1478 1479 decode_cem(in); 1480 1481 if (VERBOSE_DECODE) 1482 printf("cem=[%d,%d,%d,%d] base_cem_class=%d\n", cems[0], cems[1], cems[2], cems[3], cem_base_class); 1483 1484 int num_cem_pairs = (cem_base_class + 1) * num_parts + extra_cem_bits; 1485 num_cem_values = num_cem_pairs * 2; 1486 1487 calculate_remaining_bits(); 1488 err = calculate_colour_endpoints_size(); 1489 if (err != decode_error::ok) 1490 return err; 1491 1492 if (VERBOSE_DECODE) 1493 in.printf_bits(colour_endpoint_data_offset, colour_endpoint_bits, 1494 "endpoint data (%d bits, %d vals, %dt %dq %db)", 1495 colour_endpoint_bits, num_cem_values, ce_trits, ce_quints, ce_bits); 1496 1497 unpack_colour_endpoints(in); 1498 1499 if (VERBOSE_DECODE) { 1500 printf("cem values raw =["); 1501 for (int i = 0; i < num_cem_values; i++) { 1502 if (i) 1503 printf(", "); 1504 printf("%3d", colour_endpoints_quant[i]); 1505 } 1506 printf("]\n"); 1507 } 1508 1509 if (num_cem_values > 18) 1510 return decode_error::invalid_colour_endpoints_count; 1511 1512 unquantise_colour_endpoints(); 1513 1514 if (VERBOSE_DECODE) { 1515 printf("cem values norm=["); 1516 for (int i = 0; i < num_cem_values; i++) { 1517 if (i) 1518 printf(", "); 1519 printf("%3d", colour_endpoints[i]); 1520 } 1521 printf("]\n"); 1522 } 1523 1524 decode_colour_endpoints(); 1525 1526 if (dual_plane) { 1527 int ccs_offset = 128 - weight_bits - num_extra_cem_bits - 2; 1528 colour_component_selector = in.get_bits(ccs_offset, 2); 1529 1530 if (VERBOSE_DECODE) 1531 in.printf_bits(ccs_offset, 2, "colour component selector = %d", colour_component_selector); 1532 } else { 1533 colour_component_selector = 0; 1534 } 1535 1536 1537 if (VERBOSE_DECODE) 1538 in.printf_bits(128 - weight_bits, weight_bits, "weights (%d bits)", weight_bits); 1539 1540 if (num_weights > 64) 1541 return decode_error::invalid_num_weights; 1542 1543 if (weight_bits < 24 || weight_bits > 96) 1544 return decode_error::invalid_weight_bits; 1545 1546 unpack_weights(in); 1547 1548 unquantise_weights(); 1549 1550 if (VERBOSE_DECODE) { 1551 printf("weights=["); 1552 for (int i = 0; i < num_weights; ++i) { 1553 if (i) 1554 printf(", "); 1555 printf("%d", weights[i]); 1556 } 1557 printf("]\n"); 1558 1559 for (int plane = 0; plane <= dual_plane; ++plane) { 1560 printf("weights (plane %d):\n", plane); 1561 int i = 0; 1562 (void)i; 1563 1564 for (int r = 0; r < wt_d; ++r) { 1565 for (int t = 0; t < wt_h; ++t) { 1566 for (int s = 0; s < wt_w; ++s) { 1567 printf("%3d", weights[i++ * (1 + dual_plane) + plane]); 1568 } 1569 printf("\n"); 1570 } 1571 if (r < wt_d - 1) 1572 printf("\n"); 1573 } 1574 } 1575 } 1576 1577 compute_infill_weights(decoder.block_w, decoder.block_h, decoder.block_d); 1578 1579 if (VERBOSE_DECODE) { 1580 for (int plane = 0; plane <= dual_plane; ++plane) { 1581 printf("infilled weights (plane %d):\n", plane); 1582 int i = 0; 1583 (void)i; 1584 1585 for (int r = 0; r < decoder.block_d; ++r) { 1586 for (int t = 0; t < decoder.block_h; ++t) { 1587 for (int s = 0; s < decoder.block_w; ++s) { 1588 printf("%3d", infill_weights[plane][i++]); 1589 } 1590 printf("\n"); 1591 } 1592 if (r < decoder.block_d - 1) 1593 printf("\n"); 1594 } 1595 } 1596 } 1597 if (VERBOSE_DECODE) 1598 printf("\n"); 1599 1600 return decode_error::ok; 1601} 1602 1603void Block::write_decoded(const Decoder &decoder, uint16_t *output) 1604{ 1605 /* sRGB can only be stored as unorm8. */ 1606 assert(!decoder.srgb || decoder.output_unorm8); 1607 1608 if (is_void_extent) { 1609 for (int idx = 0; idx < decoder.block_w*decoder.block_h*decoder.block_d; ++idx) { 1610 if (decoder.output_unorm8) { 1611 output[idx*4+0] = void_extent_colour_r >> 8; 1612 output[idx*4+1] = void_extent_colour_g >> 8; 1613 output[idx*4+2] = void_extent_colour_b >> 8; 1614 output[idx*4+3] = void_extent_colour_a >> 8; 1615 } else { 1616 /* Store the color as FP16. */ 1617 output[idx*4+0] = _mesa_uint16_div_64k_to_half(void_extent_colour_r); 1618 output[idx*4+1] = _mesa_uint16_div_64k_to_half(void_extent_colour_g); 1619 output[idx*4+2] = _mesa_uint16_div_64k_to_half(void_extent_colour_b); 1620 output[idx*4+3] = _mesa_uint16_div_64k_to_half(void_extent_colour_a); 1621 } 1622 } 1623 return; 1624 } 1625 1626 int small_block = (decoder.block_w * decoder.block_h * decoder.block_d) < 31; 1627 1628 int idx = 0; 1629 for (int z = 0; z < decoder.block_d; ++z) { 1630 for (int y = 0; y < decoder.block_h; ++y) { 1631 for (int x = 0; x < decoder.block_w; ++x) { 1632 1633 int partition; 1634 if (num_parts > 1) { 1635 partition = select_partition(partition_index, x, y, z, num_parts, small_block); 1636 assert(partition < num_parts); 1637 } else { 1638 partition = 0; 1639 } 1640 1641 /* TODO: HDR */ 1642 1643 uint8x4_t e0 = endpoints_decoded[0][partition]; 1644 uint8x4_t e1 = endpoints_decoded[1][partition]; 1645 uint16_t c0[4], c1[4]; 1646 1647 /* Expand to 16 bits. */ 1648 if (decoder.srgb) { 1649 c0[0] = (uint16_t)((e0.v[0] << 8) | 0x80); 1650 c0[1] = (uint16_t)((e0.v[1] << 8) | 0x80); 1651 c0[2] = (uint16_t)((e0.v[2] << 8) | 0x80); 1652 c0[3] = (uint16_t)((e0.v[3] << 8) | 0x80); 1653 1654 c1[0] = (uint16_t)((e1.v[0] << 8) | 0x80); 1655 c1[1] = (uint16_t)((e1.v[1] << 8) | 0x80); 1656 c1[2] = (uint16_t)((e1.v[2] << 8) | 0x80); 1657 c1[3] = (uint16_t)((e1.v[3] << 8) | 0x80); 1658 } else { 1659 c0[0] = (uint16_t)((e0.v[0] << 8) | e0.v[0]); 1660 c0[1] = (uint16_t)((e0.v[1] << 8) | e0.v[1]); 1661 c0[2] = (uint16_t)((e0.v[2] << 8) | e0.v[2]); 1662 c0[3] = (uint16_t)((e0.v[3] << 8) | e0.v[3]); 1663 1664 c1[0] = (uint16_t)((e1.v[0] << 8) | e1.v[0]); 1665 c1[1] = (uint16_t)((e1.v[1] << 8) | e1.v[1]); 1666 c1[2] = (uint16_t)((e1.v[2] << 8) | e1.v[2]); 1667 c1[3] = (uint16_t)((e1.v[3] << 8) | e1.v[3]); 1668 } 1669 1670 int w[4]; 1671 if (dual_plane) { 1672 int w0 = infill_weights[0][idx]; 1673 int w1 = infill_weights[1][idx]; 1674 w[0] = w[1] = w[2] = w[3] = w0; 1675 w[colour_component_selector] = w1; 1676 } else { 1677 int w0 = infill_weights[0][idx]; 1678 w[0] = w[1] = w[2] = w[3] = w0; 1679 } 1680 1681 /* Interpolate to produce UNORM16, applying weights. */ 1682 uint16_t c[4] = { 1683 (uint16_t)((c0[0] * (64 - w[0]) + c1[0] * w[0] + 32) >> 6), 1684 (uint16_t)((c0[1] * (64 - w[1]) + c1[1] * w[1] + 32) >> 6), 1685 (uint16_t)((c0[2] * (64 - w[2]) + c1[2] * w[2] + 32) >> 6), 1686 (uint16_t)((c0[3] * (64 - w[3]) + c1[3] * w[3] + 32) >> 6), 1687 }; 1688 1689 if (decoder.output_unorm8) { 1690 output[idx*4+0] = c[0] >> 8; 1691 output[idx*4+1] = c[1] >> 8; 1692 output[idx*4+2] = c[2] >> 8; 1693 output[idx*4+3] = c[3] >> 8; 1694 } else { 1695 /* Store the color as FP16. */ 1696 output[idx*4+0] = c[0] == 65535 ? FP16_ONE : _mesa_uint16_div_64k_to_half(c[0]); 1697 output[idx*4+1] = c[1] == 65535 ? FP16_ONE : _mesa_uint16_div_64k_to_half(c[1]); 1698 output[idx*4+2] = c[2] == 65535 ? FP16_ONE : _mesa_uint16_div_64k_to_half(c[2]); 1699 output[idx*4+3] = c[3] == 65535 ? FP16_ONE : _mesa_uint16_div_64k_to_half(c[3]); 1700 } 1701 1702 idx++; 1703 } 1704 } 1705 } 1706} 1707 1708void Block::calculate_from_weights() 1709{ 1710 wt_trits = 0; 1711 wt_quints = 0; 1712 wt_bits = 0; 1713 switch (high_prec) { 1714 case 0: 1715 switch (wt_range) { 1716 case 0x2: wt_max = 1; wt_bits = 1; break; 1717 case 0x3: wt_max = 2; wt_trits = 1; break; 1718 case 0x4: wt_max = 3; wt_bits = 2; break; 1719 case 0x5: wt_max = 4; wt_quints = 1; break; 1720 case 0x6: wt_max = 5; wt_trits = 1; wt_bits = 1; break; 1721 case 0x7: wt_max = 7; wt_bits = 3; break; 1722 default: abort(); 1723 } 1724 break; 1725 case 1: 1726 switch (wt_range) { 1727 case 0x2: wt_max = 9; wt_quints = 1; wt_bits = 1; break; 1728 case 0x3: wt_max = 11; wt_trits = 1; wt_bits = 2; break; 1729 case 0x4: wt_max = 15; wt_bits = 4; break; 1730 case 0x5: wt_max = 19; wt_quints = 1; wt_bits = 2; break; 1731 case 0x6: wt_max = 23; wt_trits = 1; wt_bits = 3; break; 1732 case 0x7: wt_max = 31; wt_bits = 5; break; 1733 default: abort(); 1734 } 1735 break; 1736 } 1737 1738 assert(wt_trits || wt_quints || wt_bits); 1739 1740 num_weights = wt_w * wt_h * wt_d; 1741 1742 if (dual_plane) 1743 num_weights *= 2; 1744 1745 weight_bits = 1746 (num_weights * 8 * wt_trits + 4) / 5 1747 + (num_weights * 7 * wt_quints + 2) / 3 1748 + num_weights * wt_bits; 1749} 1750 1751void Block::calculate_remaining_bits() 1752{ 1753 int config_bits; 1754 if (num_parts > 1) { 1755 if (!is_multi_cem) 1756 config_bits = 29; 1757 else 1758 config_bits = 25 + 3 * num_parts; 1759 } else { 1760 config_bits = 17; 1761 } 1762 1763 if (dual_plane) 1764 config_bits += 2; 1765 1766 remaining_bits = 128 - config_bits - weight_bits; 1767} 1768 1769decode_error::type Block::calculate_colour_endpoints_size() 1770{ 1771 /* Specified as illegal */ 1772 if (remaining_bits < (13 * num_cem_values + 4) / 5) { 1773 colour_endpoint_bits = ce_max = ce_trits = ce_quints = ce_bits = 0; 1774 return decode_error::invalid_colour_endpoints_size; 1775 } 1776 1777 /* Find the largest cem_ranges that fits within remaining_bits */ 1778 for (int i = ARRAY_SIZE(cem_ranges)-1; i >= 0; --i) { 1779 int cem_bits; 1780 cem_bits = (num_cem_values * 8 * cem_ranges[i].t + 4) / 5 1781 + (num_cem_values * 7 * cem_ranges[i].q + 2) / 3 1782 + num_cem_values * cem_ranges[i].b; 1783 1784 if (cem_bits <= remaining_bits) 1785 { 1786 colour_endpoint_bits = cem_bits; 1787 ce_max = cem_ranges[i].max; 1788 ce_trits = cem_ranges[i].t; 1789 ce_quints = cem_ranges[i].q; 1790 ce_bits = cem_ranges[i].b; 1791 return decode_error::ok; 1792 } 1793 } 1794 1795 assert(0); 1796 return decode_error::invalid_colour_endpoints_size; 1797} 1798 1799/** 1800 * Decode ASTC 2D LDR texture data. 1801 * 1802 * \param src_width in pixels 1803 * \param src_height in pixels 1804 * \param dst_stride in bytes 1805 */ 1806extern "C" void 1807_mesa_unpack_astc_2d_ldr(uint8_t *dst_row, 1808 unsigned dst_stride, 1809 const uint8_t *src_row, 1810 unsigned src_stride, 1811 unsigned src_width, 1812 unsigned src_height, 1813 mesa_format format) 1814{ 1815 assert(_mesa_is_format_astc_2d(format)); 1816 bool srgb = _mesa_is_format_srgb(format); 1817 1818 unsigned blk_w, blk_h; 1819 _mesa_get_format_block_size(format, &blk_w, &blk_h); 1820 1821 const unsigned block_size = 16; 1822 unsigned x_blocks = (src_width + blk_w - 1) / blk_w; 1823 unsigned y_blocks = (src_height + blk_h - 1) / blk_h; 1824 1825 Decoder dec(blk_w, blk_h, 1, srgb, true); 1826 1827 for (unsigned y = 0; y < y_blocks; ++y) { 1828 for (unsigned x = 0; x < x_blocks; ++x) { 1829 /* Same size as the largest block. */ 1830 uint16_t block_out[12 * 12 * 4]; 1831 1832 dec.decode(src_row + x * block_size, block_out); 1833 1834 /* This can be smaller with NPOT dimensions. */ 1835 unsigned dst_blk_w = MIN2(blk_w, src_width - x*blk_w); 1836 unsigned dst_blk_h = MIN2(blk_h, src_height - y*blk_h); 1837 1838 for (unsigned sub_y = 0; sub_y < dst_blk_h; ++sub_y) { 1839 for (unsigned sub_x = 0; sub_x < dst_blk_w; ++sub_x) { 1840 uint8_t *dst = dst_row + sub_y * dst_stride + 1841 (x * blk_w + sub_x) * 4; 1842 const uint16_t *src = &block_out[(sub_y * blk_w + sub_x) * 4]; 1843 1844 dst[0] = src[0]; 1845 dst[1] = src[1]; 1846 dst[2] = src[2]; 1847 dst[3] = src[3]; 1848 } 1849 } 1850 } 1851 src_row += src_stride; 1852 dst_row += dst_stride * blk_h; 1853 } 1854} 1855