1// SPDX-License-Identifier: Apache-2.0 2// ---------------------------------------------------------------------------- 3// Copyright 2011-2023 Arm Limited 4// 5// Licensed under the Apache License, Version 2.0 (the "License"); you may not 6// use this file except in compliance with the License. You may obtain a copy 7// of the License at: 8// 9// http://www.apache.org/licenses/LICENSE-2.0 10// 11// Unless required by applicable law or agreed to in writing, software 12// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 13// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 14// License for the specific language governing permissions and limitations 15// under the License. 16// ---------------------------------------------------------------------------- 17 18/** 19 * @brief Functions for converting between symbolic and physical encodings. 20 */ 21 22#include "astcenc_internal.h" 23 24#include <cassert> 25 26/** 27 * @brief Reverse bits in a byte. 28 * 29 * @param p The value to reverse. 30 * 31 * @return The reversed result. 32 */ 33static inline int bitrev8(int p) 34{ 35 p = ((p & 0x0F) << 4) | ((p >> 4) & 0x0F); 36 p = ((p & 0x33) << 2) | ((p >> 2) & 0x33); 37 p = ((p & 0x55) << 1) | ((p >> 1) & 0x55); 38 return p; 39} 40 41 42/** 43 * @brief Read up to 8 bits at an arbitrary bit offset. 44 * 45 * The stored value is at most 8 bits, but can be stored at an offset of between 0 and 7 bits so may 46 * span two separate bytes in memory. 47 * 48 * @param bitcount The number of bits to read. 49 * @param bitoffset The bit offset to read from, between 0 and 7. 50 * @param[in,out] ptr The data pointer to read from. 51 * 52 * @return The read value. 53 */ 54static inline int read_bits( 55 int bitcount, 56 int bitoffset, 57 const uint8_t* ptr 58) { 59 int mask = (1 << bitcount) - 1; 60 ptr += bitoffset >> 3; 61 bitoffset &= 7; 62 int value = ptr[0] | (ptr[1] << 8); 63 value >>= bitoffset; 64 value &= mask; 65 return value; 66} 67 68#if !defined(ASTCENC_DECOMPRESS_ONLY) 69 70/** 71 * @brief Write up to 8 bits at an arbitrary bit offset. 72 * 73 * The stored value is at most 8 bits, but can be stored at an offset of between 0 and 7 bits so 74 * may span two separate bytes in memory. 75 * 76 * @param value The value to write. 77 * @param bitcount The number of bits to write, starting from LSB. 78 * @param bitoffset The bit offset to store at, between 0 and 7. 79 * @param[in,out] ptr The data pointer to write to. 80 */ 81static inline void write_bits( 82 int value, 83 int bitcount, 84 int bitoffset, 85 uint8_t* ptr 86) { 87 int mask = (1 << bitcount) - 1; 88 value &= mask; 89 ptr += bitoffset >> 3; 90 bitoffset &= 7; 91 value <<= bitoffset; 92 mask <<= bitoffset; 93 mask = ~mask; 94 95 ptr[0] &= mask; 96 ptr[0] |= value; 97 ptr[1] &= mask >> 8; 98 ptr[1] |= value >> 8; 99} 100 101static const int HIGH_SPEED_PROFILE_COLOR_BYTES = 8; 102static const int HIGH_SPEED_PROFILE_WEIGHT_BYTES = 16; 103/* See header for documentation. */ 104void symbolic_to_physical( 105 const block_size_descriptor& bsd, 106 const symbolic_compressed_block& scb, 107 uint8_t pcb[16] 108) { 109 assert(scb.block_type != SYM_BTYPE_ERROR); 110 // Constant color block using UNORM16 colors 111 if (scb.block_type == SYM_BTYPE_CONST_U16 && scb.privateProfile != HIGH_SPEED_PROFILE) 112 { 113 // There is currently no attempt to coalesce larger void-extents 114 static const uint8_t cbytes[8] { 0xFC, 0xFD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; 115 for (unsigned int i = 0; i < 8; i++) 116 { 117 pcb[i] = cbytes[i]; 118 } 119 120 for (unsigned int i = 0; i < BLOCK_MAX_COMPONENTS; i++) 121 { 122 pcb[2 * i + 8] = scb.constant_color[i] & 0xFF; 123 pcb[2 * i + 9] = (scb.constant_color[i] >> 8) & 0xFF; 124 } 125 126 return; 127 } 128 129 // Constant color block using FP16 colors 130 if (scb.block_type == SYM_BTYPE_CONST_F16 && scb.privateProfile != HIGH_SPEED_PROFILE) 131 { 132 // There is currently no attempt to coalesce larger void-extents 133 static const uint8_t cbytes[8] { 0xFC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; 134 for (unsigned int i = 0; i < 8; i++) 135 { 136 pcb[i] = cbytes[i]; 137 } 138 139 for (unsigned int i = 0; i < BLOCK_MAX_COMPONENTS; i++) 140 { 141 pcb[2 * i + 8] = scb.constant_color[i] & 0xFF; 142 pcb[2 * i + 9] = (scb.constant_color[i] >> 8) & 0xFF; 143 } 144 145 return; 146 } 147 148 unsigned int partition_count = scb.partition_count; 149 150 // Compress the weights. 151 // They are encoded as an ordinary integer-sequence, then bit-reversed 152 uint8_t weightbuf[16] { 0 }; 153 154 const auto& bm = bsd.get_block_mode(scb.block_mode); 155 const auto& di = bsd.get_decimation_info(bm.decimation_mode); 156 int weight_count = di.weight_count; 157 quant_method weight_quant_method = bm.get_weight_quant_mode(); 158 float weight_quant_levels = static_cast<float>(get_quant_level(weight_quant_method)); 159 int is_dual_plane = bm.is_dual_plane; 160 161 const auto& qat = quant_and_xfer_tables[weight_quant_method]; 162 163 if (scb.privateProfile == HIGH_SPEED_PROFILE) 164 { 165 uint8_t weights[64]; 166 for (int i = 0; i < weight_count; i++) 167 { 168 float uqw = static_cast<float>(scb.weights[i]); 169 float qw = (uqw / 64.0f) * (weight_quant_levels - 1.0f); 170 int qwi = static_cast<int>(qw + 0.5f); 171 weights[i] = qat.scramble_map[qwi]; 172 } 173 encode_ise(QUANT_6, HIGH_SPEED_PROFILE_WEIGHT_BYTES, weights, weightbuf, 0); 174 for (int i = 0; i < HIGH_SPEED_PROFILE_WEIGHT_BYTES; i++) 175 { 176 pcb[i] = static_cast<uint8_t>(bitrev8(weightbuf[HIGH_SPEED_PROFILE_WEIGHT_BYTES - 1 - i])); 177 } 178 pcb[0] = 0x43; // the first byte of every block stream is 0x43 for HIGH_SPEED_PROFILE 179 pcb[1] = 0x80; // the second byte of every block stream is 0x80 for HIGH_SPEED_PROFILE 180 pcb[2] = 0x01; // the third (2 idx) byte of every block stream is 0x01 for HIGH_SPEED_PROFILE 181 uint8_t values_to_encode[HIGH_SPEED_PROFILE_COLOR_BYTES]; 182 for (int j = 0; j < HIGH_SPEED_PROFILE_COLOR_BYTES; j++) 183 { 184 values_to_encode[j] = scb.color_values[0][j]; 185 } 186 encode_ise(scb.get_color_quant_mode(), HIGH_SPEED_PROFILE_COLOR_BYTES, 187 values_to_encode, pcb, 17); // the color is starting from 17th bit for HIGH_SPEED_PROFILE 188 return; 189 } 190 191 int real_weight_count = is_dual_plane ? 2 * weight_count : weight_count; 192 193 int bits_for_weights = get_ise_sequence_bitcount(real_weight_count, weight_quant_method); 194 195 uint8_t weights[64]; 196 if (is_dual_plane) 197 { 198 for (int i = 0; i < weight_count; i++) 199 { 200 float uqw = static_cast<float>(scb.weights[i]); 201 float qw = (uqw / 64.0f) * (weight_quant_levels - 1.0f); 202 int qwi = static_cast<int>(qw + 0.5f); 203 weights[2 * i] = qat.scramble_map[qwi]; 204 205 uqw = static_cast<float>(scb.weights[i + WEIGHTS_PLANE2_OFFSET]); 206 qw = (uqw / 64.0f) * (weight_quant_levels - 1.0f); 207 qwi = static_cast<int>(qw + 0.5f); 208 weights[2 * i + 1] = qat.scramble_map[qwi]; 209 } 210 } 211 else 212 { 213 for (int i = 0; i < weight_count; i++) 214 { 215 float uqw = static_cast<float>(scb.weights[i]); 216 float qw = (uqw / 64.0f) * (weight_quant_levels - 1.0f); 217 int qwi = static_cast<int>(qw + 0.5f); 218 weights[i] = qat.scramble_map[qwi]; 219 } 220 } 221 222 encode_ise(weight_quant_method, real_weight_count, weights, weightbuf, 0); 223 224 for (int i = 0; i < 16; i++) 225 { 226 pcb[i] = static_cast<uint8_t>(bitrev8(weightbuf[15 - i])); 227 } 228 229 write_bits(scb.block_mode, 11, 0, pcb); 230 write_bits(partition_count - 1, 2, 11, pcb); 231 232 int below_weights_pos = 128 - bits_for_weights; 233 234 // Encode partition index and color endpoint types for blocks with 2+ partitions 235 if (partition_count > 1) 236 { 237 write_bits(scb.partition_index, 6, 13, pcb); 238 write_bits(scb.partition_index >> 6, PARTITION_INDEX_BITS - 6, 19, pcb); 239 240 if (scb.color_formats_matched) 241 { 242 write_bits(scb.color_formats[0] << 2, 6, 13 + PARTITION_INDEX_BITS, pcb); 243 } 244 else 245 { 246 // Check endpoint types for each partition to determine the lowest class present 247 int low_class = 4; 248 249 for (unsigned int i = 0; i < partition_count; i++) 250 { 251 int class_of_format = scb.color_formats[i] >> 2; 252 low_class = astc::min(class_of_format, low_class); 253 } 254 255 if (low_class == 3) 256 { 257 low_class = 2; 258 } 259 260 int encoded_type = low_class + 1; 261 int bitpos = 2; 262 263 for (unsigned int i = 0; i < partition_count; i++) 264 { 265 int classbit_of_format = (scb.color_formats[i] >> 2) - low_class; 266 encoded_type |= classbit_of_format << bitpos; 267 bitpos++; 268 } 269 270 for (unsigned int i = 0; i < partition_count; i++) 271 { 272 int lowbits_of_format = scb.color_formats[i] & 3; 273 encoded_type |= lowbits_of_format << bitpos; 274 bitpos += 2; 275 } 276 277 int encoded_type_lowpart = encoded_type & 0x3F; 278 int encoded_type_highpart = encoded_type >> 6; 279 int encoded_type_highpart_size = (3 * partition_count) - 4; 280 int encoded_type_highpart_pos = 128 - bits_for_weights - encoded_type_highpart_size; 281 write_bits(encoded_type_lowpart, 6, 13 + PARTITION_INDEX_BITS, pcb); 282 write_bits(encoded_type_highpart, encoded_type_highpart_size, encoded_type_highpart_pos, pcb); 283 below_weights_pos -= encoded_type_highpart_size; 284 } 285 } 286 else 287 { 288 write_bits(scb.color_formats[0], 4, 13, pcb); 289 } 290 291 // In dual-plane mode, encode the color component of the second plane of weights 292 if (is_dual_plane) 293 { 294 write_bits(scb.plane2_component, 2, below_weights_pos - 2, pcb); 295 } 296 297 // Encode the color components 298 uint8_t values_to_encode[32]; 299 int valuecount_to_encode = 0; 300 301 const uint8_t* pack_table = color_uquant_to_scrambled_pquant_tables[scb.quant_mode - QUANT_6]; 302 for (unsigned int i = 0; i < scb.partition_count; i++) 303 { 304 int vals = 2 * (scb.color_formats[i] >> 2) + 2; 305 assert(vals <= 8); 306 for (int j = 0; j < vals; j++) 307 { 308 values_to_encode[j + valuecount_to_encode] = pack_table[scb.color_values[i][j]]; 309 } 310 valuecount_to_encode += vals; 311 } 312 313 encode_ise(scb.get_color_quant_mode(), valuecount_to_encode, values_to_encode, pcb, 314 scb.partition_count == 1 ? 17 : 19 + PARTITION_INDEX_BITS); 315} 316 317#endif 318 319/* See header for documentation. */ 320void physical_to_symbolic( 321 const block_size_descriptor& bsd, 322 const uint8_t pcb[16], 323 symbolic_compressed_block& scb 324) { 325 uint8_t bswapped[16]; 326 327 scb.block_type = SYM_BTYPE_NONCONST; 328 329 // Extract header fields 330 int block_mode = read_bits(11, 0, pcb); 331 if ((block_mode & 0x1FF) == 0x1FC) 332 { 333 // Constant color block 334 335 // Check what format the data has 336 if (block_mode & 0x200) 337 { 338 scb.block_type = SYM_BTYPE_CONST_F16; 339 } 340 else 341 { 342 scb.block_type = SYM_BTYPE_CONST_U16; 343 } 344 345 scb.partition_count = 0; 346 for (int i = 0; i < 4; i++) 347 { 348 scb.constant_color[i] = pcb[2 * i + 8] | (pcb[2 * i + 9] << 8); 349 } 350 351 // Additionally, check that the void-extent 352 if (bsd.zdim == 1) 353 { 354 // 2D void-extent 355 int rsvbits = read_bits(2, 10, pcb); 356 if (rsvbits != 3) 357 { 358 scb.block_type = SYM_BTYPE_ERROR; 359 return; 360 } 361 362 int vx_low_s = read_bits(8, 12, pcb) | (read_bits(5, 12 + 8, pcb) << 8); 363 int vx_high_s = read_bits(8, 25, pcb) | (read_bits(5, 25 + 8, pcb) << 8); 364 int vx_low_t = read_bits(8, 38, pcb) | (read_bits(5, 38 + 8, pcb) << 8); 365 int vx_high_t = read_bits(8, 51, pcb) | (read_bits(5, 51 + 8, pcb) << 8); 366 367 int all_ones = vx_low_s == 0x1FFF && vx_high_s == 0x1FFF && vx_low_t == 0x1FFF && vx_high_t == 0x1FFF; 368 369 if ((vx_low_s >= vx_high_s || vx_low_t >= vx_high_t) && !all_ones) 370 { 371 scb.block_type = SYM_BTYPE_ERROR; 372 return; 373 } 374 } 375 else 376 { 377 // 3D void-extent 378 int vx_low_s = read_bits(9, 10, pcb); 379 int vx_high_s = read_bits(9, 19, pcb); 380 int vx_low_t = read_bits(9, 28, pcb); 381 int vx_high_t = read_bits(9, 37, pcb); 382 int vx_low_p = read_bits(9, 46, pcb); 383 int vx_high_p = read_bits(9, 55, pcb); 384 385 int all_ones = vx_low_s == 0x1FF && vx_high_s == 0x1FF && vx_low_t == 0x1FF && vx_high_t == 0x1FF && vx_low_p == 0x1FF && vx_high_p == 0x1FF; 386 387 if ((vx_low_s >= vx_high_s || vx_low_t >= vx_high_t || vx_low_p >= vx_high_p) && !all_ones) 388 { 389 scb.block_type = SYM_BTYPE_ERROR; 390 return; 391 } 392 } 393 394 return; 395 } 396 397 unsigned int packed_index = bsd.block_mode_packed_index[block_mode]; 398 if (packed_index == BLOCK_BAD_BLOCK_MODE) 399 { 400 scb.block_type = SYM_BTYPE_ERROR; 401 return; 402 } 403 404 const auto& bm = bsd.get_block_mode(block_mode); 405 const auto& di = bsd.get_decimation_info(bm.decimation_mode); 406 407 int weight_count = di.weight_count; 408 promise(weight_count > 0); 409 410 quant_method weight_quant_method = static_cast<quant_method>(bm.quant_mode); 411 int is_dual_plane = bm.is_dual_plane; 412 413 int real_weight_count = is_dual_plane ? 2 * weight_count : weight_count; 414 415 int partition_count = read_bits(2, 11, pcb) + 1; 416 promise(partition_count > 0); 417 418 scb.block_mode = static_cast<uint16_t>(block_mode); 419 scb.partition_count = static_cast<uint8_t>(partition_count); 420 421 for (int i = 0; i < 16; i++) 422 { 423 bswapped[i] = static_cast<uint8_t>(bitrev8(pcb[15 - i])); 424 } 425 426 int bits_for_weights = get_ise_sequence_bitcount(real_weight_count, weight_quant_method); 427 428 int below_weights_pos = 128 - bits_for_weights; 429 430 uint8_t indices[64]; 431 const auto& qat = quant_and_xfer_tables[weight_quant_method]; 432 433 decode_ise(weight_quant_method, real_weight_count, bswapped, indices, 0); 434 435 if (is_dual_plane) 436 { 437 for (int i = 0; i < weight_count; i++) 438 { 439 scb.weights[i] = qat.unscramble_and_unquant_map[indices[2 * i]]; 440 scb.weights[i + WEIGHTS_PLANE2_OFFSET] = qat.unscramble_and_unquant_map[indices[2 * i + 1]]; 441 } 442 } 443 else 444 { 445 for (int i = 0; i < weight_count; i++) 446 { 447 scb.weights[i] = qat.unscramble_and_unquant_map[indices[i]]; 448 } 449 } 450 451 if (is_dual_plane && partition_count == 4) 452 { 453 scb.block_type = SYM_BTYPE_ERROR; 454 return; 455 } 456 457 scb.color_formats_matched = 0; 458 459 // Determine the format of each endpoint pair 460 int color_formats[BLOCK_MAX_PARTITIONS]; 461 int encoded_type_highpart_size = 0; 462 if (partition_count == 1) 463 { 464 color_formats[0] = read_bits(4, 13, pcb); 465 scb.partition_index = 0; 466 } 467 else 468 { 469 encoded_type_highpart_size = (3 * partition_count) - 4; 470 below_weights_pos -= encoded_type_highpart_size; 471 int encoded_type = read_bits(6, 13 + PARTITION_INDEX_BITS, pcb) | 472 (read_bits(encoded_type_highpart_size, below_weights_pos, pcb) << 6); 473 int baseclass = encoded_type & 0x3; 474 if (baseclass == 0) 475 { 476 for (int i = 0; i < partition_count; i++) 477 { 478 color_formats[i] = (encoded_type >> 2) & 0xF; 479 } 480 481 below_weights_pos += encoded_type_highpart_size; 482 scb.color_formats_matched = 1; 483 encoded_type_highpart_size = 0; 484 } 485 else 486 { 487 int bitpos = 2; 488 baseclass--; 489 490 for (int i = 0; i < partition_count; i++) 491 { 492 color_formats[i] = (((encoded_type >> bitpos) & 1) + baseclass) << 2; 493 bitpos++; 494 } 495 496 for (int i = 0; i < partition_count; i++) 497 { 498 color_formats[i] |= (encoded_type >> bitpos) & 3; 499 bitpos += 2; 500 } 501 } 502 scb.partition_index = static_cast<uint16_t>(read_bits(6, 13, pcb) | 503 (read_bits(PARTITION_INDEX_BITS - 6, 19, pcb) << 6)); 504 } 505 506 for (int i = 0; i < partition_count; i++) 507 { 508 scb.color_formats[i] = static_cast<uint8_t>(color_formats[i]); 509 } 510 511 // Determine number of color endpoint integers 512 int color_integer_count = 0; 513 for (int i = 0; i < partition_count; i++) 514 { 515 int endpoint_class = color_formats[i] >> 2; 516 color_integer_count += (endpoint_class + 1) * 2; 517 } 518 519 if (color_integer_count > 18) 520 { 521 scb.block_type = SYM_BTYPE_ERROR; 522 return; 523 } 524 525 // Determine the color endpoint format to use 526 static const int color_bits_arr[5] { -1, 115 - 4, 113 - 4 - PARTITION_INDEX_BITS, 113 - 4 - PARTITION_INDEX_BITS, 113 - 4 - PARTITION_INDEX_BITS }; 527 int color_bits = color_bits_arr[partition_count] - bits_for_weights - encoded_type_highpart_size; 528 if (is_dual_plane) 529 { 530 color_bits -= 2; 531 } 532 533 if (color_bits < 0) 534 { 535 color_bits = 0; 536 } 537 538 int color_quant_level = quant_mode_table[color_integer_count >> 1][color_bits]; 539 if (color_quant_level < QUANT_6) 540 { 541 scb.block_type = SYM_BTYPE_ERROR; 542 return; 543 } 544 545 // Unpack the integer color values and assign to endpoints 546 scb.quant_mode = static_cast<quant_method>(color_quant_level); 547 548 uint8_t values_to_decode[32]; 549 decode_ise(static_cast<quant_method>(color_quant_level), color_integer_count, pcb, 550 values_to_decode, (partition_count == 1 ? 17 : 19 + PARTITION_INDEX_BITS)); 551 552 int valuecount_to_decode = 0; 553 const uint8_t* unpack_table = color_scrambled_pquant_to_uquant_tables[scb.quant_mode - QUANT_6]; 554 for (int i = 0; i < partition_count; i++) 555 { 556 int vals = 2 * (color_formats[i] >> 2) + 2; 557 for (int j = 0; j < vals; j++) 558 { 559 scb.color_values[i][j] = unpack_table[values_to_decode[j + valuecount_to_decode]]; 560 } 561 valuecount_to_decode += vals; 562 } 563 564 // Fetch component for second-plane in the case of dual plane of weights. 565 scb.plane2_component = -1; 566 if (is_dual_plane) 567 { 568 scb.plane2_component = static_cast<int8_t>(read_bits(2, below_weights_pos - 2, pcb)); 569 } 570} 571