1// SPDX-License-Identifier: Apache-2.0 2// ---------------------------------------------------------------------------- 3// Copyright 2011-2023 Arm Limited 4// 5// Licensed under the Apache License, Version 2.0 (the "License"); you may not 6// use this file except in compliance with the License. You may obtain a copy 7// of the License at: 8// 9// http://www.apache.org/licenses/LICENSE-2.0 10// 11// Unless required by applicable law or agreed to in writing, software 12// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 13// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 14// License for the specific language governing permissions and limitations 15// under the License. 16// ---------------------------------------------------------------------------- 17 18/** 19 * @brief Functions for generating partition tables on demand. 20 */ 21 22#include "astcenc_internal.h" 23 24/** @brief The number of 64-bit words needed to represent a canonical partition bit pattern. */ 25#define BIT_PATTERN_WORDS (((ASTCENC_BLOCK_MAX_TEXELS * 2) + 63) / 64) 26 27/** 28 * @brief Generate a canonical representation of a partition pattern. 29 * 30 * The returned value stores two bits per texel, for up to 6x6x6 texels, where the two bits store 31 * the remapped texel index. Remapping ensures that we only match on the partition pattern, 32 * independent of the partition order generated by the hash. 33 * 34 * @param texel_count The number of texels in the block. 35 * @param partition_of_texel The partition assignments, in hash order. 36 * @param[out] bit_pattern The output bit pattern representation. 37 */ 38static void generate_canonical_partitioning( 39 unsigned int texel_count, 40 const uint8_t* partition_of_texel, 41 uint64_t bit_pattern[BIT_PATTERN_WORDS] 42) { 43 // Clear the pattern 44 for (unsigned int i = 0; i < BIT_PATTERN_WORDS; i++) 45 { 46 bit_pattern[i] = 0; 47 } 48 49 // Store a mapping to reorder the raw partitions so that the partitions are ordered such 50 // that the lowest texel index in partition N is smaller than the lowest texel index in 51 // partition N + 1. 52 int mapped_index[BLOCK_MAX_PARTITIONS]; 53 int map_weight_count = 0; 54 55 for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONS; i++) 56 { 57 mapped_index[i] = -1; 58 } 59 60 for (unsigned int i = 0; i < texel_count; i++) 61 { 62 int index = partition_of_texel[i]; 63 if (mapped_index[index] < 0) 64 { 65 mapped_index[index] = map_weight_count++; 66 } 67 68 uint64_t xlat_index = mapped_index[index]; 69 bit_pattern[i >> 5] |= xlat_index << (2 * (i & 0x1F)); 70 } 71} 72 73/** 74 * @brief Compare two canonical patterns to see if they are the same. 75 * 76 * @param part1 The first canonical bit pattern to check. 77 * @param part2 The second canonical bit pattern to check. 78 * 79 * @return @c true if the patterns are the same, @c false otherwise. 80 */ 81static bool compare_canonical_partitionings( 82 const uint64_t part1[BIT_PATTERN_WORDS], 83 const uint64_t part2[BIT_PATTERN_WORDS] 84) { 85 return (part1[0] == part2[0]) 86#if BIT_PATTERN_WORDS > 1 87 && (part1[1] == part2[1]) 88#endif 89#if BIT_PATTERN_WORDS > 2 90 && (part1[2] == part2[2]) 91#endif 92#if BIT_PATTERN_WORDS > 3 93 && (part1[3] == part2[3]) 94#endif 95#if BIT_PATTERN_WORDS > 4 96 && (part1[4] == part2[4]) 97#endif 98#if BIT_PATTERN_WORDS > 5 99 && (part1[5] == part2[5]) 100#endif 101#if BIT_PATTERN_WORDS > 6 102 && (part1[6] == part2[6]) 103#endif 104 ; 105} 106 107/** 108 * @brief Hash function used for procedural partition assignment. 109 * 110 * @param inp The hash seed. 111 * 112 * @return The hashed value. 113 */ 114static uint32_t hash52( 115 uint32_t inp 116) { 117 inp ^= inp >> 15; 118 119 // (2^4 + 1) * (2^7 + 1) * (2^17 - 1) 120 inp *= 0xEEDE0891; 121 inp ^= inp >> 5; 122 inp += inp << 16; 123 inp ^= inp >> 7; 124 inp ^= inp >> 3; 125 inp ^= inp << 6; 126 inp ^= inp >> 17; 127 return inp; 128} 129 130/** 131 * @brief Select texel assignment for a single coordinate. 132 * 133 * @param seed The seed - the partition index from the block. 134 * @param x The texel X coordinate in the block. 135 * @param y The texel Y coordinate in the block. 136 * @param z The texel Z coordinate in the block. 137 * @param partition_count The total partition count of this encoding. 138 * @param small_block @c true if the block has fewer than 32 texels. 139 * 140 * @return The assigned partition index for this texel. 141 */ 142static uint8_t select_partition( 143 int seed, 144 int x, 145 int y, 146 int z, 147 int partition_count, 148 bool small_block 149) { 150 // For small blocks bias the coordinates to get better distribution 151 if (small_block) 152 { 153 x <<= 1; 154 y <<= 1; 155 z <<= 1; 156 } 157 158 seed += (partition_count - 1) * 1024; 159 160 uint32_t rnum = hash52(seed); 161 162 uint8_t seed1 = rnum & 0xF; 163 uint8_t seed2 = (rnum >> 4) & 0xF; 164 uint8_t seed3 = (rnum >> 8) & 0xF; 165 uint8_t seed4 = (rnum >> 12) & 0xF; 166 uint8_t seed5 = (rnum >> 16) & 0xF; 167 uint8_t seed6 = (rnum >> 20) & 0xF; 168 uint8_t seed7 = (rnum >> 24) & 0xF; 169 uint8_t seed8 = (rnum >> 28) & 0xF; 170 uint8_t seed9 = (rnum >> 18) & 0xF; 171 uint8_t seed10 = (rnum >> 22) & 0xF; 172 uint8_t seed11 = (rnum >> 26) & 0xF; 173 uint8_t seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF; 174 175 // Squaring all the seeds in order to bias their distribution towards lower values. 176 seed1 *= seed1; 177 seed2 *= seed2; 178 seed3 *= seed3; 179 seed4 *= seed4; 180 seed5 *= seed5; 181 seed6 *= seed6; 182 seed7 *= seed7; 183 seed8 *= seed8; 184 seed9 *= seed9; 185 seed10 *= seed10; 186 seed11 *= seed11; 187 seed12 *= seed12; 188 189 int sh1, sh2; 190 if (seed & 1) 191 { 192 sh1 = (seed & 2 ? 4 : 5); 193 sh2 = (partition_count == 3 ? 6 : 5); 194 } 195 else 196 { 197 sh1 = (partition_count == 3 ? 6 : 5); 198 sh2 = (seed & 2 ? 4 : 5); 199 } 200 201 int sh3 = (seed & 0x10) ? sh1 : sh2; 202 203 seed1 >>= sh1; 204 seed2 >>= sh2; 205 seed3 >>= sh1; 206 seed4 >>= sh2; 207 seed5 >>= sh1; 208 seed6 >>= sh2; 209 seed7 >>= sh1; 210 seed8 >>= sh2; 211 212 seed9 >>= sh3; 213 seed10 >>= sh3; 214 seed11 >>= sh3; 215 seed12 >>= sh3; 216 217 int a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14); 218 int b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10); 219 int c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6); 220 int d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2); 221 222 // Apply the saw 223 a &= 0x3F; 224 b &= 0x3F; 225 c &= 0x3F; 226 d &= 0x3F; 227 228 // Remove some of the components if we are to output < 4 partitions. 229 if (partition_count <= 3) 230 { 231 d = 0; 232 } 233 234 if (partition_count <= 2) 235 { 236 c = 0; 237 } 238 239 if (partition_count <= 1) 240 { 241 b = 0; 242 } 243 244 uint8_t partition; 245 if (a >= b && a >= c && a >= d) 246 { 247 partition = 0; 248 } 249 else if (b >= c && b >= d) 250 { 251 partition = 1; 252 } 253 else if (c >= d) 254 { 255 partition = 2; 256 } 257 else 258 { 259 partition = 3; 260 } 261 262 return partition; 263} 264 265/** 266 * @brief Generate a single partition info structure. 267 * 268 * @param[out] bsd The block size information. 269 * @param partition_count The partition count of this partitioning. 270 * @param partition_index The partition index / seed of this partitioning. 271 * @param partition_remap_index The remapped partition index of this partitioning. 272 * @param[out] pi The partition info structure to populate. 273 * 274 * @return True if this is a useful partition index, False if we can skip it. 275 */ 276static bool generate_one_partition_info_entry( 277 block_size_descriptor& bsd, 278 unsigned int partition_count, 279 unsigned int partition_index, 280 unsigned int partition_remap_index, 281 partition_info& pi 282) { 283 int texels_per_block = bsd.texel_count; 284 bool small_block = texels_per_block < 32; 285 286 uint8_t *partition_of_texel = pi.partition_of_texel; 287 288 // Assign texels to partitions 289 int texel_idx = 0; 290 int counts[BLOCK_MAX_PARTITIONS] { 0 }; 291 for (unsigned int z = 0; z < bsd.zdim; z++) 292 { 293 for (unsigned int y = 0; y < bsd.ydim; y++) 294 { 295 for (unsigned int x = 0; x < bsd.xdim; x++) 296 { 297 uint8_t part = select_partition(partition_index, x, y, z, partition_count, small_block); 298 pi.texels_of_partition[part][counts[part]++] = static_cast<uint8_t>(texel_idx++); 299 *partition_of_texel++ = part; 300 } 301 } 302 } 303 304 // Fill loop tail so we can overfetch later 305 for (unsigned int i = 0; i < partition_count; i++) 306 { 307 int ptex_count = counts[i]; 308 int ptex_count_simd = round_up_to_simd_multiple_vla(ptex_count); 309 for (int j = ptex_count; j < ptex_count_simd; j++) 310 { 311 pi.texels_of_partition[i][j] = pi.texels_of_partition[i][ptex_count - 1]; 312 } 313 } 314 315 // Populate the actual procedural partition count 316 if (counts[0] == 0) 317 { 318 pi.partition_count = 0; 319 } 320 else if (counts[1] == 0) 321 { 322 pi.partition_count = 1; 323 } 324 else if (counts[2] == 0) 325 { 326 pi.partition_count = 2; 327 } 328 else if (counts[3] == 0) 329 { 330 pi.partition_count = 3; 331 } 332 else 333 { 334 pi.partition_count = 4; 335 } 336 337 // Populate the partition index 338 pi.partition_index = static_cast<uint16_t>(partition_index); 339 340 // Populate the coverage bitmaps for 2/3/4 partitions 341 uint64_t* bitmaps { nullptr }; 342 if (partition_count == 2) 343 { 344 bitmaps = bsd.coverage_bitmaps_2[partition_remap_index]; 345 } 346 else if (partition_count == 3) 347 { 348 bitmaps = bsd.coverage_bitmaps_3[partition_remap_index]; 349 } 350 else if (partition_count == 4) 351 { 352 bitmaps = bsd.coverage_bitmaps_4[partition_remap_index]; 353 } 354 355 for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONS; i++) 356 { 357 pi.partition_texel_count[i] = static_cast<uint8_t>(counts[i]); 358 } 359 360 // Valid partitionings have texels in all of the requested partitions 361 bool valid = pi.partition_count == partition_count; 362 363 if (bitmaps) 364 { 365 // Populate the partition coverage bitmap 366 for (unsigned int i = 0; i < partition_count; i++) 367 { 368 bitmaps[i] = 0ULL; 369 } 370 371 unsigned int texels_to_process = astc::min(bsd.texel_count, BLOCK_MAX_KMEANS_TEXELS); 372 for (unsigned int i = 0; i < texels_to_process; i++) 373 { 374 unsigned int idx = bsd.kmeans_texels[i]; 375 bitmaps[pi.partition_of_texel[idx]] |= 1ULL << i; 376 } 377 } 378 379 return valid; 380} 381 382static void build_partition_table_for_one_partition_count( 383 block_size_descriptor& bsd, 384 bool can_omit_partitionings, 385 unsigned int partition_count_cutoff, 386 unsigned int partition_count, 387 partition_info* ptab, 388 uint64_t* canonical_patterns 389) { 390 unsigned int next_index = 0; 391 bsd.partitioning_count_selected[partition_count - 1] = 0; 392 bsd.partitioning_count_all[partition_count - 1] = 0; 393 394 // Skip tables larger than config max partition count if we can omit modes 395 if (can_omit_partitionings && (partition_count > partition_count_cutoff)) 396 { 397 return; 398 } 399 400 // Iterate through twice 401 // - Pass 0: Keep selected partitionings 402 // - Pass 1: Keep non-selected partitionings (skip if in omit mode) 403 unsigned int max_iter = can_omit_partitionings ? 1 : 2; 404 405 // Tracker for things we built in the first iteration 406 uint8_t build[BLOCK_MAX_PARTITIONINGS] { 0 }; 407 for (unsigned int x = 0; x < max_iter; x++) 408 { 409 for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONINGS; i++) 410 { 411 // Don't include things we built in the first pass 412 if ((x == 1) && build[i]) 413 { 414 continue; 415 } 416 417 bool keep_useful = generate_one_partition_info_entry(bsd, partition_count, i, next_index, ptab[next_index]); 418 if ((x == 0) && !keep_useful) 419 { 420 continue; 421 } 422 423 generate_canonical_partitioning(bsd.texel_count, ptab[next_index].partition_of_texel, canonical_patterns + next_index * BIT_PATTERN_WORDS); 424 bool keep_canonical = true; 425 for (unsigned int j = 0; j < next_index; j++) 426 { 427 bool match = compare_canonical_partitionings(canonical_patterns + next_index * BIT_PATTERN_WORDS, canonical_patterns + j * BIT_PATTERN_WORDS); 428 if (match) 429 { 430 keep_canonical = false; 431 break; 432 } 433 } 434 435 if (keep_useful && keep_canonical) 436 { 437 if (x == 0) 438 { 439 bsd.partitioning_packed_index[partition_count - 2][i] = static_cast<uint16_t>(next_index); 440 bsd.partitioning_count_selected[partition_count - 1]++; 441 bsd.partitioning_count_all[partition_count - 1]++; 442 build[i] = 1; 443 next_index++; 444 } 445 } 446 else 447 { 448 if (x == 1) 449 { 450 bsd.partitioning_packed_index[partition_count - 2][i] = static_cast<uint16_t>(next_index); 451 bsd.partitioning_count_all[partition_count - 1]++; 452 next_index++; 453 } 454 } 455 } 456 } 457} 458 459/* See header for documentation. */ 460void init_partition_tables( 461 block_size_descriptor& bsd, 462 bool can_omit_partitionings, 463 unsigned int partition_count_cutoff 464) { 465 partition_info* par_tab2 = bsd.partitionings; 466 partition_info* par_tab3 = par_tab2 + BLOCK_MAX_PARTITIONINGS; 467 partition_info* par_tab4 = par_tab3 + BLOCK_MAX_PARTITIONINGS; 468 partition_info* par_tab1 = par_tab4 + BLOCK_MAX_PARTITIONINGS; 469 470 generate_one_partition_info_entry(bsd, 1, 0, 0, *par_tab1); 471 bsd.partitioning_count_selected[0] = 1; 472 bsd.partitioning_count_all[0] = 1; 473 474 uint64_t* canonical_patterns = new uint64_t[BLOCK_MAX_PARTITIONINGS * BIT_PATTERN_WORDS]; 475 476 build_partition_table_for_one_partition_count(bsd, can_omit_partitionings, partition_count_cutoff, 2, par_tab2, canonical_patterns); 477 build_partition_table_for_one_partition_count(bsd, can_omit_partitionings, partition_count_cutoff, 3, par_tab3, canonical_patterns); 478 build_partition_table_for_one_partition_count(bsd, can_omit_partitionings, partition_count_cutoff, 4, par_tab4, canonical_patterns); 479 480 delete[] canonical_patterns; 481} 482