1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright 2015 Philip Taylor <philip@zaynar.co.uk> 3bf215546Sopenharmony_ci * Copyright 2018 Advanced Micro Devices, Inc. 4bf215546Sopenharmony_ci * 5bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 6bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 7bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 8bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 10bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 11bf215546Sopenharmony_ci * 12bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 13bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 14bf215546Sopenharmony_ci * Software. 15bf215546Sopenharmony_ci * 16bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22bf215546Sopenharmony_ci * DEALINGS IN THE SOFTWARE. 23bf215546Sopenharmony_ci */ 24bf215546Sopenharmony_ci 25bf215546Sopenharmony_ci/** 26bf215546Sopenharmony_ci * \file texcompress_astc.c 27bf215546Sopenharmony_ci * 28bf215546Sopenharmony_ci * Decompression code for GL_KHR_texture_compression_astc_ldr, which is just 29bf215546Sopenharmony_ci * ASTC 2D LDR. 30bf215546Sopenharmony_ci * 31bf215546Sopenharmony_ci * The ASTC 2D LDR decoder (without the sRGB part) was copied from the OASTC 32bf215546Sopenharmony_ci * library written by Philip Taylor. I added sRGB support and adjusted it for 33bf215546Sopenharmony_ci * Mesa. - Marek 34bf215546Sopenharmony_ci */ 35bf215546Sopenharmony_ci 36bf215546Sopenharmony_ci#include "texcompress_astc.h" 37bf215546Sopenharmony_ci#include "macros.h" 38bf215546Sopenharmony_ci#include "util/half_float.h" 39bf215546Sopenharmony_ci#include <stdio.h> 40bf215546Sopenharmony_ci#include <cstdlib> // for abort() on windows 41bf215546Sopenharmony_ci 42bf215546Sopenharmony_cistatic bool VERBOSE_DECODE = false; 43bf215546Sopenharmony_cistatic bool VERBOSE_WRITE = false; 44bf215546Sopenharmony_ci 45bf215546Sopenharmony_ciclass decode_error 46bf215546Sopenharmony_ci{ 47bf215546Sopenharmony_cipublic: 48bf215546Sopenharmony_ci enum type { 49bf215546Sopenharmony_ci ok, 50bf215546Sopenharmony_ci unsupported_hdr_void_extent, 51bf215546Sopenharmony_ci reserved_block_mode_1, 52bf215546Sopenharmony_ci reserved_block_mode_2, 53bf215546Sopenharmony_ci dual_plane_and_too_many_partitions, 54bf215546Sopenharmony_ci invalid_range_in_void_extent, 55bf215546Sopenharmony_ci weight_grid_exceeds_block_size, 56bf215546Sopenharmony_ci invalid_colour_endpoints_size, 57bf215546Sopenharmony_ci invalid_colour_endpoints_count, 58bf215546Sopenharmony_ci invalid_weight_bits, 59bf215546Sopenharmony_ci invalid_num_weights, 60bf215546Sopenharmony_ci }; 61bf215546Sopenharmony_ci}; 62bf215546Sopenharmony_ci 63bf215546Sopenharmony_ci 64bf215546Sopenharmony_cistruct cem_range { 65bf215546Sopenharmony_ci uint8_t max; 66bf215546Sopenharmony_ci uint8_t t, q, b; 67bf215546Sopenharmony_ci}; 68bf215546Sopenharmony_ci 69bf215546Sopenharmony_ci/* Based on the Color Unquantization Parameters table, 70bf215546Sopenharmony_ci * plus the bit-only representations, sorted by increasing size 71bf215546Sopenharmony_ci */ 72bf215546Sopenharmony_cistatic cem_range cem_ranges[] = { 73bf215546Sopenharmony_ci { 5, 1, 0, 1 }, 74bf215546Sopenharmony_ci { 7, 0, 0, 3 }, 75bf215546Sopenharmony_ci { 9, 0, 1, 1 }, 76bf215546Sopenharmony_ci { 11, 1, 0, 2 }, 77bf215546Sopenharmony_ci { 15, 0, 0, 4 }, 78bf215546Sopenharmony_ci { 19, 0, 1, 2 }, 79bf215546Sopenharmony_ci { 23, 1, 0, 3 }, 80bf215546Sopenharmony_ci { 31, 0, 0, 5 }, 81bf215546Sopenharmony_ci { 39, 0, 1, 3 }, 82bf215546Sopenharmony_ci { 47, 1, 0, 4 }, 83bf215546Sopenharmony_ci { 63, 0, 0, 6 }, 84bf215546Sopenharmony_ci { 79, 0, 1, 4 }, 85bf215546Sopenharmony_ci { 95, 1, 0, 5 }, 86bf215546Sopenharmony_ci { 127, 0, 0, 7 }, 87bf215546Sopenharmony_ci { 159, 0, 1, 5 }, 88bf215546Sopenharmony_ci { 191, 1, 0, 6 }, 89bf215546Sopenharmony_ci { 255, 0, 0, 8 }, 90bf215546Sopenharmony_ci}; 91bf215546Sopenharmony_ci 92bf215546Sopenharmony_ci#define CAT_BITS_2(a, b) ( ((a) << 1) | (b) ) 93bf215546Sopenharmony_ci#define CAT_BITS_3(a, b, c) ( ((a) << 2) | ((b) << 1) | (c) ) 94bf215546Sopenharmony_ci#define CAT_BITS_4(a, b, c, d) ( ((a) << 3) | ((b) << 2) | ((c) << 1) | (d) ) 95bf215546Sopenharmony_ci#define CAT_BITS_5(a, b, c, d, e) ( ((a) << 4) | ((b) << 3) | ((c) << 2) | ((d) << 1) | (e) ) 96bf215546Sopenharmony_ci 97bf215546Sopenharmony_ci/** 98bf215546Sopenharmony_ci * Unpack 5n+8 bits from 'in' into 5 output values. 99bf215546Sopenharmony_ci * If n <= 4 then T should be uint32_t, else it must be uint64_t. 100bf215546Sopenharmony_ci */ 101bf215546Sopenharmony_citemplate <typename T> 102bf215546Sopenharmony_cistatic void unpack_trit_block(int n, T in, uint8_t *out) 103bf215546Sopenharmony_ci{ 104bf215546Sopenharmony_ci assert(n <= 6); /* else output will overflow uint8_t */ 105bf215546Sopenharmony_ci 106bf215546Sopenharmony_ci uint8_t T0 = (in >> (n)) & 0x1; 107bf215546Sopenharmony_ci uint8_t T1 = (in >> (n+1)) & 0x1; 108bf215546Sopenharmony_ci uint8_t T2 = (in >> (2*n+2)) & 0x1; 109bf215546Sopenharmony_ci uint8_t T3 = (in >> (2*n+3)) & 0x1; 110bf215546Sopenharmony_ci uint8_t T4 = (in >> (3*n+4)) & 0x1; 111bf215546Sopenharmony_ci uint8_t T5 = (in >> (4*n+5)) & 0x1; 112bf215546Sopenharmony_ci uint8_t T6 = (in >> (4*n+6)) & 0x1; 113bf215546Sopenharmony_ci uint8_t T7 = (in >> (5*n+7)) & 0x1; 114bf215546Sopenharmony_ci uint8_t mmask = (1 << n) - 1; 115bf215546Sopenharmony_ci uint8_t m0 = (in >> (0)) & mmask; 116bf215546Sopenharmony_ci uint8_t m1 = (in >> (n+2)) & mmask; 117bf215546Sopenharmony_ci uint8_t m2 = (in >> (2*n+4)) & mmask; 118bf215546Sopenharmony_ci uint8_t m3 = (in >> (3*n+5)) & mmask; 119bf215546Sopenharmony_ci uint8_t m4 = (in >> (4*n+7)) & mmask; 120bf215546Sopenharmony_ci 121bf215546Sopenharmony_ci uint8_t C; 122bf215546Sopenharmony_ci uint8_t t4, t3, t2, t1, t0; 123bf215546Sopenharmony_ci if (CAT_BITS_3(T4, T3, T2) == 0x7) { 124bf215546Sopenharmony_ci C = CAT_BITS_5(T7, T6, T5, T1, T0); 125bf215546Sopenharmony_ci t4 = t3 = 2; 126bf215546Sopenharmony_ci } else { 127bf215546Sopenharmony_ci C = CAT_BITS_5(T4, T3, T2, T1, T0); 128bf215546Sopenharmony_ci if (CAT_BITS_2(T6, T5) == 0x3) { 129bf215546Sopenharmony_ci t4 = 2; 130bf215546Sopenharmony_ci t3 = T7; 131bf215546Sopenharmony_ci } else { 132bf215546Sopenharmony_ci t4 = T7; 133bf215546Sopenharmony_ci t3 = CAT_BITS_2(T6, T5); 134bf215546Sopenharmony_ci } 135bf215546Sopenharmony_ci } 136bf215546Sopenharmony_ci 137bf215546Sopenharmony_ci if ((C & 0x3) == 0x3) { 138bf215546Sopenharmony_ci t2 = 2; 139bf215546Sopenharmony_ci t1 = (C >> 4) & 0x1; 140bf215546Sopenharmony_ci uint8_t C3 = (C >> 3) & 0x1; 141bf215546Sopenharmony_ci uint8_t C2 = (C >> 2) & 0x1; 142bf215546Sopenharmony_ci t0 = (C3 << 1) | (C2 & ~C3); 143bf215546Sopenharmony_ci } else if (((C >> 2) & 0x3) == 0x3) { 144bf215546Sopenharmony_ci t2 = 2; 145bf215546Sopenharmony_ci t1 = 2; 146bf215546Sopenharmony_ci t0 = C & 0x3; 147bf215546Sopenharmony_ci } else { 148bf215546Sopenharmony_ci t2 = (C >> 4) & 0x1; 149bf215546Sopenharmony_ci t1 = (C >> 2) & 0x3; 150bf215546Sopenharmony_ci uint8_t C1 = (C >> 1) & 0x1; 151bf215546Sopenharmony_ci uint8_t C0 = (C >> 0) & 0x1; 152bf215546Sopenharmony_ci t0 = (C1 << 1) | (C0 & ~C1); 153bf215546Sopenharmony_ci } 154bf215546Sopenharmony_ci 155bf215546Sopenharmony_ci out[0] = (t0 << n) | m0; 156bf215546Sopenharmony_ci out[1] = (t1 << n) | m1; 157bf215546Sopenharmony_ci out[2] = (t2 << n) | m2; 158bf215546Sopenharmony_ci out[3] = (t3 << n) | m3; 159bf215546Sopenharmony_ci out[4] = (t4 << n) | m4; 160bf215546Sopenharmony_ci} 161bf215546Sopenharmony_ci 162bf215546Sopenharmony_ci/** 163bf215546Sopenharmony_ci * Unpack 3n+7 bits from 'in' into 3 output values 164bf215546Sopenharmony_ci */ 165bf215546Sopenharmony_cistatic void unpack_quint_block(int n, uint32_t in, uint8_t *out) 166bf215546Sopenharmony_ci{ 167bf215546Sopenharmony_ci assert(n <= 5); /* else output will overflow uint8_t */ 168bf215546Sopenharmony_ci 169bf215546Sopenharmony_ci uint8_t Q0 = (in >> (n)) & 0x1; 170bf215546Sopenharmony_ci uint8_t Q1 = (in >> (n+1)) & 0x1; 171bf215546Sopenharmony_ci uint8_t Q2 = (in >> (n+2)) & 0x1; 172bf215546Sopenharmony_ci uint8_t Q3 = (in >> (2*n+3)) & 0x1; 173bf215546Sopenharmony_ci uint8_t Q4 = (in >> (2*n+4)) & 0x1; 174bf215546Sopenharmony_ci uint8_t Q5 = (in >> (3*n+5)) & 0x1; 175bf215546Sopenharmony_ci uint8_t Q6 = (in >> (3*n+6)) & 0x1; 176bf215546Sopenharmony_ci uint8_t mmask = (1 << n) - 1; 177bf215546Sopenharmony_ci uint8_t m0 = (in >> (0)) & mmask; 178bf215546Sopenharmony_ci uint8_t m1 = (in >> (n+3)) & mmask; 179bf215546Sopenharmony_ci uint8_t m2 = (in >> (2*n+5)) & mmask; 180bf215546Sopenharmony_ci 181bf215546Sopenharmony_ci uint8_t C; 182bf215546Sopenharmony_ci uint8_t q2, q1, q0; 183bf215546Sopenharmony_ci if (CAT_BITS_4(Q6, Q5, Q2, Q1) == 0x3) { 184bf215546Sopenharmony_ci q2 = CAT_BITS_3(Q0, Q4 & ~Q0, Q3 & ~Q0); 185bf215546Sopenharmony_ci q1 = 4; 186bf215546Sopenharmony_ci q0 = 4; 187bf215546Sopenharmony_ci } else { 188bf215546Sopenharmony_ci if (CAT_BITS_2(Q2, Q1) == 0x3) { 189bf215546Sopenharmony_ci q2 = 4; 190bf215546Sopenharmony_ci C = CAT_BITS_5(Q4, Q3, 0x1 & ~Q6, 0x1 & ~Q5, Q0); 191bf215546Sopenharmony_ci } else { 192bf215546Sopenharmony_ci q2 = CAT_BITS_2(Q6, Q5); 193bf215546Sopenharmony_ci C = CAT_BITS_5(Q4, Q3, Q2, Q1, Q0); 194bf215546Sopenharmony_ci } 195bf215546Sopenharmony_ci if ((C & 0x7) == 0x5) { 196bf215546Sopenharmony_ci q1 = 4; 197bf215546Sopenharmony_ci q0 = (C >> 3) & 0x3; 198bf215546Sopenharmony_ci } else { 199bf215546Sopenharmony_ci q1 = (C >> 3) & 0x3; 200bf215546Sopenharmony_ci q0 = C & 0x7; 201bf215546Sopenharmony_ci } 202bf215546Sopenharmony_ci } 203bf215546Sopenharmony_ci out[0] = (q0 << n) | m0; 204bf215546Sopenharmony_ci out[1] = (q1 << n) | m1; 205bf215546Sopenharmony_ci out[2] = (q2 << n) | m2; 206bf215546Sopenharmony_ci} 207bf215546Sopenharmony_ci 208bf215546Sopenharmony_ci 209bf215546Sopenharmony_cistruct uint8x4_t 210bf215546Sopenharmony_ci{ 211bf215546Sopenharmony_ci uint8_t v[4]; 212bf215546Sopenharmony_ci 213bf215546Sopenharmony_ci uint8x4_t() { } 214bf215546Sopenharmony_ci 215bf215546Sopenharmony_ci uint8x4_t(int a, int b, int c, int d) 216bf215546Sopenharmony_ci { 217bf215546Sopenharmony_ci assert(0 <= a && a <= 255); 218bf215546Sopenharmony_ci assert(0 <= b && b <= 255); 219bf215546Sopenharmony_ci assert(0 <= c && c <= 255); 220bf215546Sopenharmony_ci assert(0 <= d && d <= 255); 221bf215546Sopenharmony_ci v[0] = a; 222bf215546Sopenharmony_ci v[1] = b; 223bf215546Sopenharmony_ci v[2] = c; 224bf215546Sopenharmony_ci v[3] = d; 225bf215546Sopenharmony_ci } 226bf215546Sopenharmony_ci 227bf215546Sopenharmony_ci static uint8x4_t clamped(int a, int b, int c, int d) 228bf215546Sopenharmony_ci { 229bf215546Sopenharmony_ci uint8x4_t r; 230bf215546Sopenharmony_ci r.v[0] = MAX2(0, MIN2(255, a)); 231bf215546Sopenharmony_ci r.v[1] = MAX2(0, MIN2(255, b)); 232bf215546Sopenharmony_ci r.v[2] = MAX2(0, MIN2(255, c)); 233bf215546Sopenharmony_ci r.v[3] = MAX2(0, MIN2(255, d)); 234bf215546Sopenharmony_ci return r; 235bf215546Sopenharmony_ci } 236bf215546Sopenharmony_ci}; 237bf215546Sopenharmony_ci 238bf215546Sopenharmony_cistatic uint8x4_t blue_contract(int r, int g, int b, int a) 239bf215546Sopenharmony_ci{ 240bf215546Sopenharmony_ci return uint8x4_t((r+b) >> 1, (g+b) >> 1, b, a); 241bf215546Sopenharmony_ci} 242bf215546Sopenharmony_ci 243bf215546Sopenharmony_cistatic uint8x4_t blue_contract_clamped(int r, int g, int b, int a) 244bf215546Sopenharmony_ci{ 245bf215546Sopenharmony_ci return uint8x4_t::clamped((r+b) >> 1, (g+b) >> 1, b, a); 246bf215546Sopenharmony_ci} 247bf215546Sopenharmony_ci 248bf215546Sopenharmony_cistatic void bit_transfer_signed(int &a, int &b) 249bf215546Sopenharmony_ci{ 250bf215546Sopenharmony_ci b >>= 1; 251bf215546Sopenharmony_ci b |= a & 0x80; 252bf215546Sopenharmony_ci a >>= 1; 253bf215546Sopenharmony_ci a &= 0x3f; 254bf215546Sopenharmony_ci if (a & 0x20) 255bf215546Sopenharmony_ci a -= 0x40; 256bf215546Sopenharmony_ci} 257bf215546Sopenharmony_ci 258bf215546Sopenharmony_cistatic uint32_t hash52(uint32_t p) 259bf215546Sopenharmony_ci{ 260bf215546Sopenharmony_ci p ^= p >> 15; 261bf215546Sopenharmony_ci p -= p << 17; 262bf215546Sopenharmony_ci p += p << 7; 263bf215546Sopenharmony_ci p += p << 4; 264bf215546Sopenharmony_ci p ^= p >> 5; 265bf215546Sopenharmony_ci p += p << 16; 266bf215546Sopenharmony_ci p ^= p >> 7; 267bf215546Sopenharmony_ci p ^= p >> 3; 268bf215546Sopenharmony_ci p ^= p << 6; 269bf215546Sopenharmony_ci p ^= p >> 17; 270bf215546Sopenharmony_ci return p; 271bf215546Sopenharmony_ci} 272bf215546Sopenharmony_ci 273bf215546Sopenharmony_cistatic int select_partition(int seed, int x, int y, int z, int partitioncount, 274bf215546Sopenharmony_ci int small_block) 275bf215546Sopenharmony_ci{ 276bf215546Sopenharmony_ci if (small_block) { 277bf215546Sopenharmony_ci x <<= 1; 278bf215546Sopenharmony_ci y <<= 1; 279bf215546Sopenharmony_ci z <<= 1; 280bf215546Sopenharmony_ci } 281bf215546Sopenharmony_ci seed += (partitioncount - 1) * 1024; 282bf215546Sopenharmony_ci uint32_t rnum = hash52(seed); 283bf215546Sopenharmony_ci uint8_t seed1 = rnum & 0xF; 284bf215546Sopenharmony_ci uint8_t seed2 = (rnum >> 4) & 0xF; 285bf215546Sopenharmony_ci uint8_t seed3 = (rnum >> 8) & 0xF; 286bf215546Sopenharmony_ci uint8_t seed4 = (rnum >> 12) & 0xF; 287bf215546Sopenharmony_ci uint8_t seed5 = (rnum >> 16) & 0xF; 288bf215546Sopenharmony_ci uint8_t seed6 = (rnum >> 20) & 0xF; 289bf215546Sopenharmony_ci uint8_t seed7 = (rnum >> 24) & 0xF; 290bf215546Sopenharmony_ci uint8_t seed8 = (rnum >> 28) & 0xF; 291bf215546Sopenharmony_ci uint8_t seed9 = (rnum >> 18) & 0xF; 292bf215546Sopenharmony_ci uint8_t seed10 = (rnum >> 22) & 0xF; 293bf215546Sopenharmony_ci uint8_t seed11 = (rnum >> 26) & 0xF; 294bf215546Sopenharmony_ci uint8_t seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF; 295bf215546Sopenharmony_ci 296bf215546Sopenharmony_ci seed1 *= seed1; 297bf215546Sopenharmony_ci seed2 *= seed2; 298bf215546Sopenharmony_ci seed3 *= seed3; 299bf215546Sopenharmony_ci seed4 *= seed4; 300bf215546Sopenharmony_ci seed5 *= seed5; 301bf215546Sopenharmony_ci seed6 *= seed6; 302bf215546Sopenharmony_ci seed7 *= seed7; 303bf215546Sopenharmony_ci seed8 *= seed8; 304bf215546Sopenharmony_ci seed9 *= seed9; 305bf215546Sopenharmony_ci seed10 *= seed10; 306bf215546Sopenharmony_ci seed11 *= seed11; 307bf215546Sopenharmony_ci seed12 *= seed12; 308bf215546Sopenharmony_ci 309bf215546Sopenharmony_ci int sh1, sh2, sh3; 310bf215546Sopenharmony_ci if (seed & 1) { 311bf215546Sopenharmony_ci sh1 = (seed & 2 ? 4 : 5); 312bf215546Sopenharmony_ci sh2 = (partitioncount == 3 ? 6 : 5); 313bf215546Sopenharmony_ci } else { 314bf215546Sopenharmony_ci sh1 = (partitioncount == 3 ? 6 : 5); 315bf215546Sopenharmony_ci sh2 = (seed & 2 ? 4 : 5); 316bf215546Sopenharmony_ci } 317bf215546Sopenharmony_ci sh3 = (seed & 0x10) ? sh1 : sh2; 318bf215546Sopenharmony_ci 319bf215546Sopenharmony_ci seed1 >>= sh1; 320bf215546Sopenharmony_ci seed2 >>= sh2; 321bf215546Sopenharmony_ci seed3 >>= sh1; 322bf215546Sopenharmony_ci seed4 >>= sh2; 323bf215546Sopenharmony_ci seed5 >>= sh1; 324bf215546Sopenharmony_ci seed6 >>= sh2; 325bf215546Sopenharmony_ci seed7 >>= sh1; 326bf215546Sopenharmony_ci seed8 >>= sh2; 327bf215546Sopenharmony_ci seed9 >>= sh3; 328bf215546Sopenharmony_ci seed10 >>= sh3; 329bf215546Sopenharmony_ci seed11 >>= sh3; 330bf215546Sopenharmony_ci seed12 >>= sh3; 331bf215546Sopenharmony_ci 332bf215546Sopenharmony_ci int a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14); 333bf215546Sopenharmony_ci int b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10); 334bf215546Sopenharmony_ci int c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6); 335bf215546Sopenharmony_ci int d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2); 336bf215546Sopenharmony_ci 337bf215546Sopenharmony_ci a &= 0x3F; 338bf215546Sopenharmony_ci b &= 0x3F; 339bf215546Sopenharmony_ci c &= 0x3F; 340bf215546Sopenharmony_ci d &= 0x3F; 341bf215546Sopenharmony_ci 342bf215546Sopenharmony_ci if (partitioncount < 4) 343bf215546Sopenharmony_ci d = 0; 344bf215546Sopenharmony_ci if (partitioncount < 3) 345bf215546Sopenharmony_ci c = 0; 346bf215546Sopenharmony_ci 347bf215546Sopenharmony_ci if (a >= b && a >= c && a >= d) 348bf215546Sopenharmony_ci return 0; 349bf215546Sopenharmony_ci else if (b >= c && b >= d) 350bf215546Sopenharmony_ci return 1; 351bf215546Sopenharmony_ci else if (c >= d) 352bf215546Sopenharmony_ci return 2; 353bf215546Sopenharmony_ci else 354bf215546Sopenharmony_ci return 3; 355bf215546Sopenharmony_ci} 356bf215546Sopenharmony_ci 357bf215546Sopenharmony_ci 358bf215546Sopenharmony_cistruct InputBitVector 359bf215546Sopenharmony_ci{ 360bf215546Sopenharmony_ci uint32_t data[4]; 361bf215546Sopenharmony_ci 362bf215546Sopenharmony_ci void printf_bits(int offset, int count, const char *fmt = "", ...) 363bf215546Sopenharmony_ci { 364bf215546Sopenharmony_ci char out[129]; 365bf215546Sopenharmony_ci memset(out, '.', 128); 366bf215546Sopenharmony_ci out[128] = '\0'; 367bf215546Sopenharmony_ci int idx = offset; 368bf215546Sopenharmony_ci for (int i = 0; i < count; ++i) { 369bf215546Sopenharmony_ci out[127 - idx] = ((data[idx >> 5] >> (idx & 31)) & 1) ? '1' : '0'; 370bf215546Sopenharmony_ci ++idx; 371bf215546Sopenharmony_ci } 372bf215546Sopenharmony_ci printf("%s ", out); 373bf215546Sopenharmony_ci va_list ap; 374bf215546Sopenharmony_ci va_start(ap, fmt); 375bf215546Sopenharmony_ci vprintf(fmt, ap); 376bf215546Sopenharmony_ci va_end(ap); 377bf215546Sopenharmony_ci printf("\n"); 378bf215546Sopenharmony_ci } 379bf215546Sopenharmony_ci 380bf215546Sopenharmony_ci uint32_t get_bits(int offset, int count) 381bf215546Sopenharmony_ci { 382bf215546Sopenharmony_ci assert(count >= 0 && count < 32); 383bf215546Sopenharmony_ci 384bf215546Sopenharmony_ci uint32_t out = 0; 385bf215546Sopenharmony_ci if (offset < 32) 386bf215546Sopenharmony_ci out |= data[0] >> offset; 387bf215546Sopenharmony_ci 388bf215546Sopenharmony_ci if (0 < offset && offset <= 32) 389bf215546Sopenharmony_ci out |= data[1] << (32 - offset); 390bf215546Sopenharmony_ci if (32 < offset && offset < 64) 391bf215546Sopenharmony_ci out |= data[1] >> (offset - 32); 392bf215546Sopenharmony_ci 393bf215546Sopenharmony_ci if (32 < offset && offset <= 64) 394bf215546Sopenharmony_ci out |= data[2] << (64 - offset); 395bf215546Sopenharmony_ci if (64 < offset && offset < 96) 396bf215546Sopenharmony_ci out |= data[2] >> (offset - 64); 397bf215546Sopenharmony_ci 398bf215546Sopenharmony_ci if (64 < offset && offset <= 96) 399bf215546Sopenharmony_ci out |= data[3] << (96 - offset); 400bf215546Sopenharmony_ci if (96 < offset && offset < 128) 401bf215546Sopenharmony_ci out |= data[3] >> (offset - 96); 402bf215546Sopenharmony_ci 403bf215546Sopenharmony_ci out &= (1 << count) - 1; 404bf215546Sopenharmony_ci return out; 405bf215546Sopenharmony_ci } 406bf215546Sopenharmony_ci 407bf215546Sopenharmony_ci uint64_t get_bits64(int offset, int count) 408bf215546Sopenharmony_ci { 409bf215546Sopenharmony_ci assert(count >= 0 && count < 64); 410bf215546Sopenharmony_ci 411bf215546Sopenharmony_ci uint64_t out = 0; 412bf215546Sopenharmony_ci if (offset < 32) 413bf215546Sopenharmony_ci out |= data[0] >> offset; 414bf215546Sopenharmony_ci 415bf215546Sopenharmony_ci if (offset <= 32) 416bf215546Sopenharmony_ci out |= (uint64_t)data[1] << (32 - offset); 417bf215546Sopenharmony_ci if (32 < offset && offset < 64) 418bf215546Sopenharmony_ci out |= data[1] >> (offset - 32); 419bf215546Sopenharmony_ci 420bf215546Sopenharmony_ci if (0 < offset && offset <= 64) 421bf215546Sopenharmony_ci out |= (uint64_t)data[2] << (64 - offset); 422bf215546Sopenharmony_ci if (64 < offset && offset < 96) 423bf215546Sopenharmony_ci out |= data[2] >> (offset - 64); 424bf215546Sopenharmony_ci 425bf215546Sopenharmony_ci if (32 < offset && offset <= 96) 426bf215546Sopenharmony_ci out |= (uint64_t)data[3] << (96 - offset); 427bf215546Sopenharmony_ci if (96 < offset && offset < 128) 428bf215546Sopenharmony_ci out |= data[3] >> (offset - 96); 429bf215546Sopenharmony_ci 430bf215546Sopenharmony_ci out &= ((uint64_t)1 << count) - 1; 431bf215546Sopenharmony_ci return out; 432bf215546Sopenharmony_ci } 433bf215546Sopenharmony_ci 434bf215546Sopenharmony_ci uint32_t get_bits_rev(int offset, int count) 435bf215546Sopenharmony_ci { 436bf215546Sopenharmony_ci assert(offset >= count); 437bf215546Sopenharmony_ci uint32_t tmp = get_bits(offset - count, count); 438bf215546Sopenharmony_ci uint32_t out = 0; 439bf215546Sopenharmony_ci for (int i = 0; i < count; ++i) 440bf215546Sopenharmony_ci out |= ((tmp >> i) & 1) << (count - 1 - i); 441bf215546Sopenharmony_ci return out; 442bf215546Sopenharmony_ci } 443bf215546Sopenharmony_ci}; 444bf215546Sopenharmony_ci 445bf215546Sopenharmony_cistruct OutputBitVector 446bf215546Sopenharmony_ci{ 447bf215546Sopenharmony_ci uint32_t data[4]; 448bf215546Sopenharmony_ci int offset; 449bf215546Sopenharmony_ci 450bf215546Sopenharmony_ci OutputBitVector() 451bf215546Sopenharmony_ci : offset(0) 452bf215546Sopenharmony_ci { 453bf215546Sopenharmony_ci memset(data, 0, sizeof(data)); 454bf215546Sopenharmony_ci } 455bf215546Sopenharmony_ci 456bf215546Sopenharmony_ci void append(uint32_t value, int size) 457bf215546Sopenharmony_ci { 458bf215546Sopenharmony_ci if (VERBOSE_WRITE) 459bf215546Sopenharmony_ci printf("append offset=%d size=%d values=0x%x\n", offset, size, value); 460bf215546Sopenharmony_ci 461bf215546Sopenharmony_ci assert(offset + size <= 128); 462bf215546Sopenharmony_ci 463bf215546Sopenharmony_ci assert(size <= 32); 464bf215546Sopenharmony_ci if (size < 32) 465bf215546Sopenharmony_ci assert((value >> size) == 0); 466bf215546Sopenharmony_ci 467bf215546Sopenharmony_ci while (size) { 468bf215546Sopenharmony_ci int c = MIN2(size, 32 - (offset & 31)); 469bf215546Sopenharmony_ci data[offset >> 5] |= (value << (offset & 31)); 470bf215546Sopenharmony_ci offset += c; 471bf215546Sopenharmony_ci size -= c; 472bf215546Sopenharmony_ci value >>= c; 473bf215546Sopenharmony_ci } 474bf215546Sopenharmony_ci } 475bf215546Sopenharmony_ci 476bf215546Sopenharmony_ci void append64(uint64_t value, int size) 477bf215546Sopenharmony_ci { 478bf215546Sopenharmony_ci if (VERBOSE_WRITE) 479bf215546Sopenharmony_ci printf("append offset=%d size=%d values=0x%llx\n", offset, size, (unsigned long long)value); 480bf215546Sopenharmony_ci 481bf215546Sopenharmony_ci assert(offset + size <= 128); 482bf215546Sopenharmony_ci 483bf215546Sopenharmony_ci assert(size <= 64); 484bf215546Sopenharmony_ci if (size < 64) 485bf215546Sopenharmony_ci assert((value >> size) == 0); 486bf215546Sopenharmony_ci 487bf215546Sopenharmony_ci while (size) { 488bf215546Sopenharmony_ci int c = MIN2(size, 32 - (offset & 31)); 489bf215546Sopenharmony_ci data[offset >> 5] |= (value << (offset & 31)); 490bf215546Sopenharmony_ci offset += c; 491bf215546Sopenharmony_ci size -= c; 492bf215546Sopenharmony_ci value >>= c; 493bf215546Sopenharmony_ci } 494bf215546Sopenharmony_ci } 495bf215546Sopenharmony_ci 496bf215546Sopenharmony_ci void append(OutputBitVector &v, int size) 497bf215546Sopenharmony_ci { 498bf215546Sopenharmony_ci if (VERBOSE_WRITE) 499bf215546Sopenharmony_ci printf("append vector offset=%d size=%d\n", offset, size); 500bf215546Sopenharmony_ci 501bf215546Sopenharmony_ci assert(offset + size <= 128); 502bf215546Sopenharmony_ci int i = 0; 503bf215546Sopenharmony_ci while (size >= 32) { 504bf215546Sopenharmony_ci append(v.data[i++], 32); 505bf215546Sopenharmony_ci size -= 32; 506bf215546Sopenharmony_ci } 507bf215546Sopenharmony_ci if (size > 0) 508bf215546Sopenharmony_ci append(v.data[i] & ((1 << size) - 1), size); 509bf215546Sopenharmony_ci } 510bf215546Sopenharmony_ci 511bf215546Sopenharmony_ci void append_end(OutputBitVector &v, int size) 512bf215546Sopenharmony_ci { 513bf215546Sopenharmony_ci for (int i = 0; i < size; ++i) 514bf215546Sopenharmony_ci data[(127 - i) >> 5] |= ((v.data[i >> 5] >> (i & 31)) & 1) << ((127 - i) & 31); 515bf215546Sopenharmony_ci } 516bf215546Sopenharmony_ci 517bf215546Sopenharmony_ci /* Insert the given number of '1' bits. (We could use 0s instead, but 1s are 518bf215546Sopenharmony_ci * more likely to flush out bugs where we accidentally read undefined bits.) 519bf215546Sopenharmony_ci */ 520bf215546Sopenharmony_ci void skip(int size) 521bf215546Sopenharmony_ci { 522bf215546Sopenharmony_ci if (VERBOSE_WRITE) 523bf215546Sopenharmony_ci printf("skip offset=%d size=%d\n", offset, size); 524bf215546Sopenharmony_ci 525bf215546Sopenharmony_ci assert(offset + size <= 128); 526bf215546Sopenharmony_ci while (size >= 32) { 527bf215546Sopenharmony_ci append(0xffffffff, 32); 528bf215546Sopenharmony_ci size -= 32; 529bf215546Sopenharmony_ci } 530bf215546Sopenharmony_ci if (size > 0) 531bf215546Sopenharmony_ci append(0xffffffff >> (32 - size), size); 532bf215546Sopenharmony_ci } 533bf215546Sopenharmony_ci}; 534bf215546Sopenharmony_ci 535bf215546Sopenharmony_ci 536bf215546Sopenharmony_ciclass Decoder 537bf215546Sopenharmony_ci{ 538bf215546Sopenharmony_cipublic: 539bf215546Sopenharmony_ci Decoder(int block_w, int block_h, int block_d, bool srgb, bool output_unorm8) 540bf215546Sopenharmony_ci : block_w(block_w), block_h(block_h), block_d(block_d), srgb(srgb), 541bf215546Sopenharmony_ci output_unorm8(output_unorm8) {} 542bf215546Sopenharmony_ci 543bf215546Sopenharmony_ci decode_error::type decode(const uint8_t *in, uint16_t *output) const; 544bf215546Sopenharmony_ci 545bf215546Sopenharmony_ci int block_w, block_h, block_d; 546bf215546Sopenharmony_ci bool srgb, output_unorm8; 547bf215546Sopenharmony_ci}; 548bf215546Sopenharmony_ci 549bf215546Sopenharmony_cistruct Block 550bf215546Sopenharmony_ci{ 551bf215546Sopenharmony_ci bool is_error; 552bf215546Sopenharmony_ci bool bogus_colour_endpoints; 553bf215546Sopenharmony_ci bool bogus_weights; 554bf215546Sopenharmony_ci 555bf215546Sopenharmony_ci int high_prec; 556bf215546Sopenharmony_ci int dual_plane; 557bf215546Sopenharmony_ci int colour_component_selector; 558bf215546Sopenharmony_ci int wt_range; 559bf215546Sopenharmony_ci int wt_w, wt_h, wt_d; 560bf215546Sopenharmony_ci int num_parts; 561bf215546Sopenharmony_ci int partition_index; 562bf215546Sopenharmony_ci 563bf215546Sopenharmony_ci bool is_void_extent; 564bf215546Sopenharmony_ci int void_extent_d; 565bf215546Sopenharmony_ci int void_extent_min_s; 566bf215546Sopenharmony_ci int void_extent_max_s; 567bf215546Sopenharmony_ci int void_extent_min_t; 568bf215546Sopenharmony_ci int void_extent_max_t; 569bf215546Sopenharmony_ci uint16_t void_extent_colour_r; 570bf215546Sopenharmony_ci uint16_t void_extent_colour_g; 571bf215546Sopenharmony_ci uint16_t void_extent_colour_b; 572bf215546Sopenharmony_ci uint16_t void_extent_colour_a; 573bf215546Sopenharmony_ci 574bf215546Sopenharmony_ci bool is_multi_cem; 575bf215546Sopenharmony_ci int num_extra_cem_bits; 576bf215546Sopenharmony_ci int colour_endpoint_data_offset; 577bf215546Sopenharmony_ci int extra_cem_bits; 578bf215546Sopenharmony_ci int cem_base_class; 579bf215546Sopenharmony_ci int cems[4]; 580bf215546Sopenharmony_ci 581bf215546Sopenharmony_ci int num_cem_values; 582bf215546Sopenharmony_ci 583bf215546Sopenharmony_ci /* Calculated by unpack_weights(): */ 584bf215546Sopenharmony_ci uint8_t weights_quant[64 + 4]; /* max 64 values, plus padding for overflows in trit parsing */ 585bf215546Sopenharmony_ci 586bf215546Sopenharmony_ci /* Calculated by unquantise_weights(): */ 587bf215546Sopenharmony_ci uint8_t weights[64 + 18]; /* max 64 values, plus padding for the infill interpolation */ 588bf215546Sopenharmony_ci 589bf215546Sopenharmony_ci /* Calculated by unpack_colour_endpoints(): */ 590bf215546Sopenharmony_ci uint8_t colour_endpoints_quant[18 + 4]; /* max 18 values, plus padding for overflows in trit parsing */ 591bf215546Sopenharmony_ci 592bf215546Sopenharmony_ci /* Calculated by unquantise_colour_endpoints(): */ 593bf215546Sopenharmony_ci uint8_t colour_endpoints[18]; 594bf215546Sopenharmony_ci 595bf215546Sopenharmony_ci /* Calculated by calculate_from_weights(): */ 596bf215546Sopenharmony_ci int wt_trits; 597bf215546Sopenharmony_ci int wt_quints; 598bf215546Sopenharmony_ci int wt_bits; 599bf215546Sopenharmony_ci int wt_max; 600bf215546Sopenharmony_ci int num_weights; 601bf215546Sopenharmony_ci int weight_bits; 602bf215546Sopenharmony_ci 603bf215546Sopenharmony_ci /* Calculated by calculate_remaining_bits(): */ 604bf215546Sopenharmony_ci int remaining_bits; 605bf215546Sopenharmony_ci 606bf215546Sopenharmony_ci /* Calculated by calculate_colour_endpoints_size(): */ 607bf215546Sopenharmony_ci int colour_endpoint_bits; 608bf215546Sopenharmony_ci int ce_max; 609bf215546Sopenharmony_ci int ce_trits; 610bf215546Sopenharmony_ci int ce_quints; 611bf215546Sopenharmony_ci int ce_bits; 612bf215546Sopenharmony_ci 613bf215546Sopenharmony_ci /* Calculated by compute_infill_weights(); */ 614bf215546Sopenharmony_ci uint8_t infill_weights[2][216]; /* large enough for 6x6x6 */ 615bf215546Sopenharmony_ci 616bf215546Sopenharmony_ci /* Calculated by decode_colour_endpoints(); */ 617bf215546Sopenharmony_ci uint8x4_t endpoints_decoded[2][4]; 618bf215546Sopenharmony_ci 619bf215546Sopenharmony_ci void calculate_from_weights(); 620bf215546Sopenharmony_ci void calculate_remaining_bits(); 621bf215546Sopenharmony_ci decode_error::type calculate_colour_endpoints_size(); 622bf215546Sopenharmony_ci 623bf215546Sopenharmony_ci void unquantise_weights(); 624bf215546Sopenharmony_ci void unquantise_colour_endpoints(); 625bf215546Sopenharmony_ci 626bf215546Sopenharmony_ci decode_error::type decode(const Decoder &decoder, InputBitVector in); 627bf215546Sopenharmony_ci 628bf215546Sopenharmony_ci decode_error::type decode_block_mode(InputBitVector in); 629bf215546Sopenharmony_ci decode_error::type decode_void_extent(InputBitVector in); 630bf215546Sopenharmony_ci void decode_cem(InputBitVector in); 631bf215546Sopenharmony_ci void unpack_colour_endpoints(InputBitVector in); 632bf215546Sopenharmony_ci void decode_colour_endpoints(); 633bf215546Sopenharmony_ci void unpack_weights(InputBitVector in); 634bf215546Sopenharmony_ci void compute_infill_weights(int block_w, int block_h, int block_d); 635bf215546Sopenharmony_ci 636bf215546Sopenharmony_ci void write_decoded(const Decoder &decoder, uint16_t *output); 637bf215546Sopenharmony_ci}; 638bf215546Sopenharmony_ci 639bf215546Sopenharmony_ci 640bf215546Sopenharmony_cidecode_error::type Decoder::decode(const uint8_t *in, uint16_t *output) const 641bf215546Sopenharmony_ci{ 642bf215546Sopenharmony_ci Block blk; 643bf215546Sopenharmony_ci InputBitVector in_vec; 644bf215546Sopenharmony_ci memcpy(&in_vec.data, in, 16); 645bf215546Sopenharmony_ci decode_error::type err = blk.decode(*this, in_vec); 646bf215546Sopenharmony_ci if (err == decode_error::ok) { 647bf215546Sopenharmony_ci blk.write_decoded(*this, output); 648bf215546Sopenharmony_ci } else { 649bf215546Sopenharmony_ci /* Fill output with the error colour */ 650bf215546Sopenharmony_ci for (int i = 0; i < block_w * block_h * block_d; ++i) { 651bf215546Sopenharmony_ci if (output_unorm8) { 652bf215546Sopenharmony_ci output[i*4+0] = 0xff; 653bf215546Sopenharmony_ci output[i*4+1] = 0; 654bf215546Sopenharmony_ci output[i*4+2] = 0xff; 655bf215546Sopenharmony_ci output[i*4+3] = 0xff; 656bf215546Sopenharmony_ci } else { 657bf215546Sopenharmony_ci assert(!srgb); /* srgb must use unorm8 */ 658bf215546Sopenharmony_ci 659bf215546Sopenharmony_ci output[i*4+0] = FP16_ONE; 660bf215546Sopenharmony_ci output[i*4+1] = FP16_ZERO; 661bf215546Sopenharmony_ci output[i*4+2] = FP16_ONE; 662bf215546Sopenharmony_ci output[i*4+3] = FP16_ONE; 663bf215546Sopenharmony_ci } 664bf215546Sopenharmony_ci } 665bf215546Sopenharmony_ci } 666bf215546Sopenharmony_ci return err; 667bf215546Sopenharmony_ci} 668bf215546Sopenharmony_ci 669bf215546Sopenharmony_ci 670bf215546Sopenharmony_cidecode_error::type Block::decode_void_extent(InputBitVector block) 671bf215546Sopenharmony_ci{ 672bf215546Sopenharmony_ci /* TODO: 3D */ 673bf215546Sopenharmony_ci 674bf215546Sopenharmony_ci is_void_extent = true; 675bf215546Sopenharmony_ci void_extent_d = block.get_bits(9, 1); 676bf215546Sopenharmony_ci void_extent_min_s = block.get_bits(12, 13); 677bf215546Sopenharmony_ci void_extent_max_s = block.get_bits(25, 13); 678bf215546Sopenharmony_ci void_extent_min_t = block.get_bits(38, 13); 679bf215546Sopenharmony_ci void_extent_max_t = block.get_bits(51, 13); 680bf215546Sopenharmony_ci void_extent_colour_r = block.get_bits(64, 16); 681bf215546Sopenharmony_ci void_extent_colour_g = block.get_bits(80, 16); 682bf215546Sopenharmony_ci void_extent_colour_b = block.get_bits(96, 16); 683bf215546Sopenharmony_ci void_extent_colour_a = block.get_bits(112, 16); 684bf215546Sopenharmony_ci 685bf215546Sopenharmony_ci /* TODO: maybe we should do something useful with the extent coordinates? */ 686bf215546Sopenharmony_ci 687bf215546Sopenharmony_ci if (void_extent_d) { 688bf215546Sopenharmony_ci return decode_error::unsupported_hdr_void_extent; 689bf215546Sopenharmony_ci } 690bf215546Sopenharmony_ci 691bf215546Sopenharmony_ci if (void_extent_min_s == 0x1fff && void_extent_max_s == 0x1fff 692bf215546Sopenharmony_ci && void_extent_min_t == 0x1fff && void_extent_max_t == 0x1fff) { 693bf215546Sopenharmony_ci 694bf215546Sopenharmony_ci /* No extents */ 695bf215546Sopenharmony_ci 696bf215546Sopenharmony_ci } else { 697bf215546Sopenharmony_ci 698bf215546Sopenharmony_ci /* Check for illegal encoding */ 699bf215546Sopenharmony_ci if (void_extent_min_s >= void_extent_max_s || void_extent_min_t >= void_extent_max_t) { 700bf215546Sopenharmony_ci return decode_error::invalid_range_in_void_extent; 701bf215546Sopenharmony_ci } 702bf215546Sopenharmony_ci } 703bf215546Sopenharmony_ci 704bf215546Sopenharmony_ci return decode_error::ok; 705bf215546Sopenharmony_ci} 706bf215546Sopenharmony_ci 707bf215546Sopenharmony_cidecode_error::type Block::decode_block_mode(InputBitVector in) 708bf215546Sopenharmony_ci{ 709bf215546Sopenharmony_ci dual_plane = in.get_bits(10, 1); 710bf215546Sopenharmony_ci high_prec = in.get_bits(9, 1); 711bf215546Sopenharmony_ci 712bf215546Sopenharmony_ci if (in.get_bits(0, 2) != 0x0) { 713bf215546Sopenharmony_ci wt_range = (in.get_bits(0, 2) << 1) | in.get_bits(4, 1); 714bf215546Sopenharmony_ci int a = in.get_bits(5, 2); 715bf215546Sopenharmony_ci int b = in.get_bits(7, 2); 716bf215546Sopenharmony_ci switch (in.get_bits(2, 2)) { 717bf215546Sopenharmony_ci case 0x0: 718bf215546Sopenharmony_ci if (VERBOSE_DECODE) 719bf215546Sopenharmony_ci in.printf_bits(0, 11, "DHBBAAR00RR"); 720bf215546Sopenharmony_ci wt_w = b + 4; 721bf215546Sopenharmony_ci wt_h = a + 2; 722bf215546Sopenharmony_ci break; 723bf215546Sopenharmony_ci case 0x1: 724bf215546Sopenharmony_ci if (VERBOSE_DECODE) 725bf215546Sopenharmony_ci in.printf_bits(0, 11, "DHBBAAR01RR"); 726bf215546Sopenharmony_ci wt_w = b + 8; 727bf215546Sopenharmony_ci wt_h = a + 2; 728bf215546Sopenharmony_ci break; 729bf215546Sopenharmony_ci case 0x2: 730bf215546Sopenharmony_ci if (VERBOSE_DECODE) 731bf215546Sopenharmony_ci in.printf_bits(0, 11, "DHBBAAR10RR"); 732bf215546Sopenharmony_ci wt_w = a + 2; 733bf215546Sopenharmony_ci wt_h = b + 8; 734bf215546Sopenharmony_ci break; 735bf215546Sopenharmony_ci case 0x3: 736bf215546Sopenharmony_ci if ((b & 0x2) == 0) { 737bf215546Sopenharmony_ci if (VERBOSE_DECODE) 738bf215546Sopenharmony_ci in.printf_bits(0, 11, "DH0BAAR11RR"); 739bf215546Sopenharmony_ci wt_w = a + 2; 740bf215546Sopenharmony_ci wt_h = b + 6; 741bf215546Sopenharmony_ci } else { 742bf215546Sopenharmony_ci if (VERBOSE_DECODE) 743bf215546Sopenharmony_ci in.printf_bits(0, 11, "DH1BAAR11RR"); 744bf215546Sopenharmony_ci wt_w = (b & 0x1) + 2; 745bf215546Sopenharmony_ci wt_h = a + 2; 746bf215546Sopenharmony_ci } 747bf215546Sopenharmony_ci break; 748bf215546Sopenharmony_ci } 749bf215546Sopenharmony_ci } else { 750bf215546Sopenharmony_ci if (in.get_bits(6, 3) == 0x7) { 751bf215546Sopenharmony_ci if (in.get_bits(0, 9) == 0x1fc) { 752bf215546Sopenharmony_ci if (VERBOSE_DECODE) 753bf215546Sopenharmony_ci in.printf_bits(0, 11, "xx111111100 (void extent)"); 754bf215546Sopenharmony_ci return decode_void_extent(in); 755bf215546Sopenharmony_ci } else { 756bf215546Sopenharmony_ci if (VERBOSE_DECODE) 757bf215546Sopenharmony_ci in.printf_bits(0, 11, "xx111xxxx00"); 758bf215546Sopenharmony_ci return decode_error::reserved_block_mode_1; 759bf215546Sopenharmony_ci } 760bf215546Sopenharmony_ci } 761bf215546Sopenharmony_ci if (in.get_bits(0, 4) == 0x0) { 762bf215546Sopenharmony_ci if (VERBOSE_DECODE) 763bf215546Sopenharmony_ci in.printf_bits(0, 11, "xxxxxxx0000"); 764bf215546Sopenharmony_ci return decode_error::reserved_block_mode_2; 765bf215546Sopenharmony_ci } 766bf215546Sopenharmony_ci 767bf215546Sopenharmony_ci wt_range = in.get_bits(1, 3) | in.get_bits(4, 1); 768bf215546Sopenharmony_ci int a = in.get_bits(5, 2); 769bf215546Sopenharmony_ci int b; 770bf215546Sopenharmony_ci 771bf215546Sopenharmony_ci switch (in.get_bits(7, 2)) { 772bf215546Sopenharmony_ci case 0x0: 773bf215546Sopenharmony_ci if (VERBOSE_DECODE) 774bf215546Sopenharmony_ci in.printf_bits(0, 11, "DH00AARRR00"); 775bf215546Sopenharmony_ci wt_w = 12; 776bf215546Sopenharmony_ci wt_h = a + 2; 777bf215546Sopenharmony_ci break; 778bf215546Sopenharmony_ci case 0x1: 779bf215546Sopenharmony_ci if (VERBOSE_DECODE) 780bf215546Sopenharmony_ci in.printf_bits(0, 11, "DH01AARRR00"); 781bf215546Sopenharmony_ci wt_w = a + 2; 782bf215546Sopenharmony_ci wt_h = 12; 783bf215546Sopenharmony_ci break; 784bf215546Sopenharmony_ci case 0x3: 785bf215546Sopenharmony_ci if (in.get_bits(5, 1) == 0) { 786bf215546Sopenharmony_ci if (VERBOSE_DECODE) 787bf215546Sopenharmony_ci in.printf_bits(0, 11, "DH1100RRR00"); 788bf215546Sopenharmony_ci wt_w = 6; 789bf215546Sopenharmony_ci wt_h = 10; 790bf215546Sopenharmony_ci } else { 791bf215546Sopenharmony_ci if (VERBOSE_DECODE) 792bf215546Sopenharmony_ci in.printf_bits(0, 11, "DH1101RRR00"); 793bf215546Sopenharmony_ci wt_w = 10; 794bf215546Sopenharmony_ci wt_h = 6; 795bf215546Sopenharmony_ci } 796bf215546Sopenharmony_ci break; 797bf215546Sopenharmony_ci case 0x2: 798bf215546Sopenharmony_ci if (VERBOSE_DECODE) 799bf215546Sopenharmony_ci in.printf_bits(0, 11, "BB10AARRR00"); 800bf215546Sopenharmony_ci b = in.get_bits(9, 2); 801bf215546Sopenharmony_ci wt_w = a + 6; 802bf215546Sopenharmony_ci wt_h = b + 6; 803bf215546Sopenharmony_ci dual_plane = 0; 804bf215546Sopenharmony_ci high_prec = 0; 805bf215546Sopenharmony_ci break; 806bf215546Sopenharmony_ci } 807bf215546Sopenharmony_ci } 808bf215546Sopenharmony_ci return decode_error::ok; 809bf215546Sopenharmony_ci} 810bf215546Sopenharmony_ci 811bf215546Sopenharmony_civoid Block::decode_cem(InputBitVector in) 812bf215546Sopenharmony_ci{ 813bf215546Sopenharmony_ci cems[0] = cems[1] = cems[2] = cems[3] = -1; 814bf215546Sopenharmony_ci 815bf215546Sopenharmony_ci num_extra_cem_bits = 0; 816bf215546Sopenharmony_ci extra_cem_bits = 0; 817bf215546Sopenharmony_ci 818bf215546Sopenharmony_ci if (num_parts > 1) { 819bf215546Sopenharmony_ci 820bf215546Sopenharmony_ci partition_index = in.get_bits(13, 10); 821bf215546Sopenharmony_ci if (VERBOSE_DECODE) 822bf215546Sopenharmony_ci in.printf_bits(13, 10, "partition ID (%d)", partition_index); 823bf215546Sopenharmony_ci 824bf215546Sopenharmony_ci uint32_t cem = in.get_bits(23, 6); 825bf215546Sopenharmony_ci 826bf215546Sopenharmony_ci if ((cem & 0x3) == 0x0) { 827bf215546Sopenharmony_ci cem >>= 2; 828bf215546Sopenharmony_ci cem_base_class = cem >> 2; 829bf215546Sopenharmony_ci is_multi_cem = false; 830bf215546Sopenharmony_ci 831bf215546Sopenharmony_ci for (int i = 0; i < num_parts; ++i) 832bf215546Sopenharmony_ci cems[i] = cem; 833bf215546Sopenharmony_ci 834bf215546Sopenharmony_ci if (VERBOSE_DECODE) 835bf215546Sopenharmony_ci in.printf_bits(23, 6, "CEM (single, %d)", cem); 836bf215546Sopenharmony_ci } else { 837bf215546Sopenharmony_ci 838bf215546Sopenharmony_ci cem_base_class = (cem & 0x3) - 1; 839bf215546Sopenharmony_ci is_multi_cem = true; 840bf215546Sopenharmony_ci 841bf215546Sopenharmony_ci if (VERBOSE_DECODE) 842bf215546Sopenharmony_ci in.printf_bits(23, 6, "CEM (multi, base class %d)", cem_base_class); 843bf215546Sopenharmony_ci 844bf215546Sopenharmony_ci int offset = 128 - weight_bits; 845bf215546Sopenharmony_ci 846bf215546Sopenharmony_ci if (num_parts == 2) { 847bf215546Sopenharmony_ci if (VERBOSE_DECODE) { 848bf215546Sopenharmony_ci in.printf_bits(25, 4, "M0M0 C1 C0"); 849bf215546Sopenharmony_ci in.printf_bits(offset - 2, 2, "M1M1"); 850bf215546Sopenharmony_ci } 851bf215546Sopenharmony_ci 852bf215546Sopenharmony_ci uint32_t c0 = in.get_bits(25, 1); 853bf215546Sopenharmony_ci uint32_t c1 = in.get_bits(26, 1); 854bf215546Sopenharmony_ci 855bf215546Sopenharmony_ci extra_cem_bits = c0 + c1; 856bf215546Sopenharmony_ci 857bf215546Sopenharmony_ci num_extra_cem_bits = 2; 858bf215546Sopenharmony_ci 859bf215546Sopenharmony_ci uint32_t m0 = in.get_bits(27, 2); 860bf215546Sopenharmony_ci uint32_t m1 = in.get_bits(offset - 2, 2); 861bf215546Sopenharmony_ci 862bf215546Sopenharmony_ci cems[0] = ((cem_base_class + c0) << 2) | m0; 863bf215546Sopenharmony_ci cems[1] = ((cem_base_class + c1) << 2) | m1; 864bf215546Sopenharmony_ci 865bf215546Sopenharmony_ci } else if (num_parts == 3) { 866bf215546Sopenharmony_ci if (VERBOSE_DECODE) { 867bf215546Sopenharmony_ci in.printf_bits(25, 4, "M0 C2 C1 C0"); 868bf215546Sopenharmony_ci in.printf_bits(offset - 5, 5, "M2M2 M1M1 M0"); 869bf215546Sopenharmony_ci } 870bf215546Sopenharmony_ci 871bf215546Sopenharmony_ci uint32_t c0 = in.get_bits(25, 1); 872bf215546Sopenharmony_ci uint32_t c1 = in.get_bits(26, 1); 873bf215546Sopenharmony_ci uint32_t c2 = in.get_bits(27, 1); 874bf215546Sopenharmony_ci 875bf215546Sopenharmony_ci extra_cem_bits = c0 + c1 + c2; 876bf215546Sopenharmony_ci 877bf215546Sopenharmony_ci num_extra_cem_bits = 5; 878bf215546Sopenharmony_ci 879bf215546Sopenharmony_ci uint32_t m0 = in.get_bits(28, 1) | (in.get_bits(128 - weight_bits - 5, 1) << 1); 880bf215546Sopenharmony_ci uint32_t m1 = in.get_bits(offset - 4, 2); 881bf215546Sopenharmony_ci uint32_t m2 = in.get_bits(offset - 2, 2); 882bf215546Sopenharmony_ci 883bf215546Sopenharmony_ci cems[0] = ((cem_base_class + c0) << 2) | m0; 884bf215546Sopenharmony_ci cems[1] = ((cem_base_class + c1) << 2) | m1; 885bf215546Sopenharmony_ci cems[2] = ((cem_base_class + c2) << 2) | m2; 886bf215546Sopenharmony_ci 887bf215546Sopenharmony_ci } else if (num_parts == 4) { 888bf215546Sopenharmony_ci if (VERBOSE_DECODE) { 889bf215546Sopenharmony_ci in.printf_bits(25, 4, "C3 C2 C1 C0"); 890bf215546Sopenharmony_ci in.printf_bits(offset - 8, 8, "M3M3 M2M2 M1M1 M0M0"); 891bf215546Sopenharmony_ci } 892bf215546Sopenharmony_ci 893bf215546Sopenharmony_ci uint32_t c0 = in.get_bits(25, 1); 894bf215546Sopenharmony_ci uint32_t c1 = in.get_bits(26, 1); 895bf215546Sopenharmony_ci uint32_t c2 = in.get_bits(27, 1); 896bf215546Sopenharmony_ci uint32_t c3 = in.get_bits(28, 1); 897bf215546Sopenharmony_ci 898bf215546Sopenharmony_ci extra_cem_bits = c0 + c1 + c2 + c3; 899bf215546Sopenharmony_ci 900bf215546Sopenharmony_ci num_extra_cem_bits = 8; 901bf215546Sopenharmony_ci 902bf215546Sopenharmony_ci uint32_t m0 = in.get_bits(offset - 8, 2); 903bf215546Sopenharmony_ci uint32_t m1 = in.get_bits(offset - 6, 2); 904bf215546Sopenharmony_ci uint32_t m2 = in.get_bits(offset - 4, 2); 905bf215546Sopenharmony_ci uint32_t m3 = in.get_bits(offset - 2, 2); 906bf215546Sopenharmony_ci 907bf215546Sopenharmony_ci cems[0] = ((cem_base_class + c0) << 2) | m0; 908bf215546Sopenharmony_ci cems[1] = ((cem_base_class + c1) << 2) | m1; 909bf215546Sopenharmony_ci cems[2] = ((cem_base_class + c2) << 2) | m2; 910bf215546Sopenharmony_ci cems[3] = ((cem_base_class + c3) << 2) | m3; 911bf215546Sopenharmony_ci } else { 912bf215546Sopenharmony_ci unreachable(""); 913bf215546Sopenharmony_ci } 914bf215546Sopenharmony_ci } 915bf215546Sopenharmony_ci 916bf215546Sopenharmony_ci colour_endpoint_data_offset = 29; 917bf215546Sopenharmony_ci 918bf215546Sopenharmony_ci } else { 919bf215546Sopenharmony_ci uint32_t cem = in.get_bits(13, 4); 920bf215546Sopenharmony_ci 921bf215546Sopenharmony_ci cem_base_class = cem >> 2; 922bf215546Sopenharmony_ci is_multi_cem = false; 923bf215546Sopenharmony_ci 924bf215546Sopenharmony_ci cems[0] = cem; 925bf215546Sopenharmony_ci 926bf215546Sopenharmony_ci partition_index = -1; 927bf215546Sopenharmony_ci 928bf215546Sopenharmony_ci if (VERBOSE_DECODE) 929bf215546Sopenharmony_ci in.printf_bits(13, 4, "CEM = %d (class %d)", cem, cem_base_class); 930bf215546Sopenharmony_ci 931bf215546Sopenharmony_ci colour_endpoint_data_offset = 17; 932bf215546Sopenharmony_ci } 933bf215546Sopenharmony_ci} 934bf215546Sopenharmony_ci 935bf215546Sopenharmony_civoid Block::unpack_colour_endpoints(InputBitVector in) 936bf215546Sopenharmony_ci{ 937bf215546Sopenharmony_ci if (ce_trits) { 938bf215546Sopenharmony_ci int offset = colour_endpoint_data_offset; 939bf215546Sopenharmony_ci int bits_left = colour_endpoint_bits; 940bf215546Sopenharmony_ci for (int i = 0; i < num_cem_values; i += 5) { 941bf215546Sopenharmony_ci int bits_to_read = MIN2(bits_left, 8 + ce_bits * 5); 942bf215546Sopenharmony_ci /* If ce_trits then ce_bits <= 6, so bits_to_read <= 38 and we have to use uint64_t */ 943bf215546Sopenharmony_ci uint64_t raw = in.get_bits64(offset, bits_to_read); 944bf215546Sopenharmony_ci unpack_trit_block(ce_bits, raw, &colour_endpoints_quant[i]); 945bf215546Sopenharmony_ci 946bf215546Sopenharmony_ci if (VERBOSE_DECODE) 947bf215546Sopenharmony_ci in.printf_bits(offset, bits_to_read, 948bf215546Sopenharmony_ci "trits [%d,%d,%d,%d,%d]", 949bf215546Sopenharmony_ci colour_endpoints_quant[i+0], colour_endpoints_quant[i+1], 950bf215546Sopenharmony_ci colour_endpoints_quant[i+2], colour_endpoints_quant[i+3], 951bf215546Sopenharmony_ci colour_endpoints_quant[i+4]); 952bf215546Sopenharmony_ci 953bf215546Sopenharmony_ci offset += 8 + ce_bits * 5; 954bf215546Sopenharmony_ci bits_left -= 8 + ce_bits * 5; 955bf215546Sopenharmony_ci } 956bf215546Sopenharmony_ci } else if (ce_quints) { 957bf215546Sopenharmony_ci int offset = colour_endpoint_data_offset; 958bf215546Sopenharmony_ci int bits_left = colour_endpoint_bits; 959bf215546Sopenharmony_ci for (int i = 0; i < num_cem_values; i += 3) { 960bf215546Sopenharmony_ci int bits_to_read = MIN2(bits_left, 7 + ce_bits * 3); 961bf215546Sopenharmony_ci /* If ce_quints then ce_bits <= 5, so bits_to_read <= 22 and we can use uint32_t */ 962bf215546Sopenharmony_ci uint32_t raw = in.get_bits(offset, bits_to_read); 963bf215546Sopenharmony_ci unpack_quint_block(ce_bits, raw, &colour_endpoints_quant[i]); 964bf215546Sopenharmony_ci 965bf215546Sopenharmony_ci if (VERBOSE_DECODE) 966bf215546Sopenharmony_ci in.printf_bits(offset, bits_to_read, 967bf215546Sopenharmony_ci "quints [%d,%d,%d]", 968bf215546Sopenharmony_ci colour_endpoints_quant[i], colour_endpoints_quant[i+1], colour_endpoints_quant[i+2]); 969bf215546Sopenharmony_ci 970bf215546Sopenharmony_ci offset += 7 + ce_bits * 3; 971bf215546Sopenharmony_ci bits_left -= 7 + ce_bits * 3; 972bf215546Sopenharmony_ci } 973bf215546Sopenharmony_ci } else { 974bf215546Sopenharmony_ci assert((colour_endpoint_bits % ce_bits) == 0); 975bf215546Sopenharmony_ci int offset = colour_endpoint_data_offset; 976bf215546Sopenharmony_ci for (int i = 0; i < num_cem_values; i++) { 977bf215546Sopenharmony_ci colour_endpoints_quant[i] = in.get_bits(offset, ce_bits); 978bf215546Sopenharmony_ci 979bf215546Sopenharmony_ci if (VERBOSE_DECODE) 980bf215546Sopenharmony_ci in.printf_bits(offset, ce_bits, "bits [%d]", colour_endpoints_quant[i]); 981bf215546Sopenharmony_ci 982bf215546Sopenharmony_ci offset += ce_bits; 983bf215546Sopenharmony_ci } 984bf215546Sopenharmony_ci } 985bf215546Sopenharmony_ci} 986bf215546Sopenharmony_ci 987bf215546Sopenharmony_civoid Block::decode_colour_endpoints() 988bf215546Sopenharmony_ci{ 989bf215546Sopenharmony_ci int cem_values_idx = 0; 990bf215546Sopenharmony_ci for (int part = 0; part < num_parts; ++part) { 991bf215546Sopenharmony_ci uint8_t *v = &colour_endpoints[cem_values_idx]; 992bf215546Sopenharmony_ci int v0 = v[0]; 993bf215546Sopenharmony_ci int v1 = v[1]; 994bf215546Sopenharmony_ci int v2 = v[2]; 995bf215546Sopenharmony_ci int v3 = v[3]; 996bf215546Sopenharmony_ci int v4 = v[4]; 997bf215546Sopenharmony_ci int v5 = v[5]; 998bf215546Sopenharmony_ci int v6 = v[6]; 999bf215546Sopenharmony_ci int v7 = v[7]; 1000bf215546Sopenharmony_ci cem_values_idx += ((cems[part] >> 2) + 1) * 2; 1001bf215546Sopenharmony_ci 1002bf215546Sopenharmony_ci uint8x4_t e0, e1; 1003bf215546Sopenharmony_ci int s0, s1, L0, L1; 1004bf215546Sopenharmony_ci 1005bf215546Sopenharmony_ci switch (cems[part]) 1006bf215546Sopenharmony_ci { 1007bf215546Sopenharmony_ci case 0: 1008bf215546Sopenharmony_ci e0 = uint8x4_t(v0, v0, v0, 0xff); 1009bf215546Sopenharmony_ci e1 = uint8x4_t(v1, v1, v1, 0xff); 1010bf215546Sopenharmony_ci break; 1011bf215546Sopenharmony_ci case 1: 1012bf215546Sopenharmony_ci L0 = (v0 >> 2) | (v1 & 0xc0); 1013bf215546Sopenharmony_ci L1 = L0 + (v1 & 0x3f); 1014bf215546Sopenharmony_ci if (L1 > 0xff) 1015bf215546Sopenharmony_ci L1 = 0xff; 1016bf215546Sopenharmony_ci e0 = uint8x4_t(L0, L0, L0, 0xff); 1017bf215546Sopenharmony_ci e1 = uint8x4_t(L1, L1, L1, 0xff); 1018bf215546Sopenharmony_ci break; 1019bf215546Sopenharmony_ci case 4: 1020bf215546Sopenharmony_ci e0 = uint8x4_t(v0, v0, v0, v2); 1021bf215546Sopenharmony_ci e1 = uint8x4_t(v1, v1, v1, v3); 1022bf215546Sopenharmony_ci break; 1023bf215546Sopenharmony_ci case 5: 1024bf215546Sopenharmony_ci bit_transfer_signed(v1, v0); 1025bf215546Sopenharmony_ci bit_transfer_signed(v3, v2); 1026bf215546Sopenharmony_ci e0 = uint8x4_t(v0, v0, v0, v2); 1027bf215546Sopenharmony_ci e1 = uint8x4_t::clamped(v0+v1, v0+v1, v0+v1, v2+v3); 1028bf215546Sopenharmony_ci break; 1029bf215546Sopenharmony_ci case 6: 1030bf215546Sopenharmony_ci e0 = uint8x4_t(v0*v3 >> 8, v1*v3 >> 8, v2*v3 >> 8, 0xff); 1031bf215546Sopenharmony_ci e1 = uint8x4_t(v0, v1, v2, 0xff); 1032bf215546Sopenharmony_ci break; 1033bf215546Sopenharmony_ci case 8: 1034bf215546Sopenharmony_ci s0 = v0 + v2 + v4; 1035bf215546Sopenharmony_ci s1 = v1 + v3 + v5; 1036bf215546Sopenharmony_ci if (s1 >= s0) { 1037bf215546Sopenharmony_ci e0 = uint8x4_t(v0, v2, v4, 0xff); 1038bf215546Sopenharmony_ci e1 = uint8x4_t(v1, v3, v5, 0xff); 1039bf215546Sopenharmony_ci } else { 1040bf215546Sopenharmony_ci e0 = blue_contract(v1, v3, v5, 0xff); 1041bf215546Sopenharmony_ci e1 = blue_contract(v0, v2, v4, 0xff); 1042bf215546Sopenharmony_ci } 1043bf215546Sopenharmony_ci break; 1044bf215546Sopenharmony_ci case 9: 1045bf215546Sopenharmony_ci bit_transfer_signed(v1, v0); 1046bf215546Sopenharmony_ci bit_transfer_signed(v3, v2); 1047bf215546Sopenharmony_ci bit_transfer_signed(v5, v4); 1048bf215546Sopenharmony_ci if (v1 + v3 + v5 >= 0) { 1049bf215546Sopenharmony_ci e0 = uint8x4_t(v0, v2, v4, 0xff); 1050bf215546Sopenharmony_ci e1 = uint8x4_t::clamped(v0+v1, v2+v3, v4+v5, 0xff); 1051bf215546Sopenharmony_ci } else { 1052bf215546Sopenharmony_ci e0 = blue_contract_clamped(v0+v1, v2+v3, v4+v5, 0xff); 1053bf215546Sopenharmony_ci e1 = blue_contract(v0, v2, v4, 0xff); 1054bf215546Sopenharmony_ci } 1055bf215546Sopenharmony_ci break; 1056bf215546Sopenharmony_ci case 10: 1057bf215546Sopenharmony_ci e0 = uint8x4_t(v0*v3 >> 8, v1*v3 >> 8, v2*v3 >> 8, v4); 1058bf215546Sopenharmony_ci e1 = uint8x4_t(v0, v1, v2, v5); 1059bf215546Sopenharmony_ci break; 1060bf215546Sopenharmony_ci case 12: 1061bf215546Sopenharmony_ci s0 = v0 + v2 + v4; 1062bf215546Sopenharmony_ci s1 = v1 + v3 + v5; 1063bf215546Sopenharmony_ci if (s1 >= s0) { 1064bf215546Sopenharmony_ci e0 = uint8x4_t(v0, v2, v4, v6); 1065bf215546Sopenharmony_ci e1 = uint8x4_t(v1, v3, v5, v7); 1066bf215546Sopenharmony_ci } else { 1067bf215546Sopenharmony_ci e0 = blue_contract(v1, v3, v5, v7); 1068bf215546Sopenharmony_ci e1 = blue_contract(v0, v2, v4, v6); 1069bf215546Sopenharmony_ci } 1070bf215546Sopenharmony_ci break; 1071bf215546Sopenharmony_ci case 13: 1072bf215546Sopenharmony_ci bit_transfer_signed(v1, v0); 1073bf215546Sopenharmony_ci bit_transfer_signed(v3, v2); 1074bf215546Sopenharmony_ci bit_transfer_signed(v5, v4); 1075bf215546Sopenharmony_ci bit_transfer_signed(v7, v6); 1076bf215546Sopenharmony_ci if (v1 + v3 + v5 >= 0) { 1077bf215546Sopenharmony_ci e0 = uint8x4_t(v0, v2, v4, v6); 1078bf215546Sopenharmony_ci e1 = uint8x4_t::clamped(v0+v1, v2+v3, v4+v5, v6+v7); 1079bf215546Sopenharmony_ci } else { 1080bf215546Sopenharmony_ci e0 = blue_contract_clamped(v0+v1, v2+v3, v4+v5, v6+v7); 1081bf215546Sopenharmony_ci e1 = blue_contract(v0, v2, v4, v6); 1082bf215546Sopenharmony_ci } 1083bf215546Sopenharmony_ci break; 1084bf215546Sopenharmony_ci default: 1085bf215546Sopenharmony_ci /* HDR endpoints not supported; return error colour */ 1086bf215546Sopenharmony_ci e0 = uint8x4_t(255, 0, 255, 255); 1087bf215546Sopenharmony_ci e1 = uint8x4_t(255, 0, 255, 255); 1088bf215546Sopenharmony_ci break; 1089bf215546Sopenharmony_ci } 1090bf215546Sopenharmony_ci 1091bf215546Sopenharmony_ci endpoints_decoded[0][part] = e0; 1092bf215546Sopenharmony_ci endpoints_decoded[1][part] = e1; 1093bf215546Sopenharmony_ci 1094bf215546Sopenharmony_ci if (VERBOSE_DECODE) { 1095bf215546Sopenharmony_ci printf("cems[%d]=%d v=[", part, cems[part]); 1096bf215546Sopenharmony_ci for (int i = 0; i < (cems[part] >> 2) + 1; ++i) { 1097bf215546Sopenharmony_ci if (i) 1098bf215546Sopenharmony_ci printf(", "); 1099bf215546Sopenharmony_ci printf("%3d", v[i]); 1100bf215546Sopenharmony_ci } 1101bf215546Sopenharmony_ci printf("] e0=[%3d,%4d,%4d,%4d] e1=[%3d,%4d,%4d,%4d]\n", 1102bf215546Sopenharmony_ci e0.v[0], e0.v[1], e0.v[2], e0.v[3], 1103bf215546Sopenharmony_ci e1.v[0], e1.v[1], e1.v[2], e1.v[3]); 1104bf215546Sopenharmony_ci } 1105bf215546Sopenharmony_ci } 1106bf215546Sopenharmony_ci} 1107bf215546Sopenharmony_ci 1108bf215546Sopenharmony_civoid Block::unpack_weights(InputBitVector in) 1109bf215546Sopenharmony_ci{ 1110bf215546Sopenharmony_ci if (wt_trits) { 1111bf215546Sopenharmony_ci int offset = 128; 1112bf215546Sopenharmony_ci int bits_left = weight_bits; 1113bf215546Sopenharmony_ci for (int i = 0; i < num_weights; i += 5) { 1114bf215546Sopenharmony_ci int bits_to_read = MIN2(bits_left, 8 + 5*wt_bits); 1115bf215546Sopenharmony_ci /* If wt_trits then wt_bits <= 3, so bits_to_read <= 23 and we can use uint32_t */ 1116bf215546Sopenharmony_ci uint32_t raw = in.get_bits_rev(offset, bits_to_read); 1117bf215546Sopenharmony_ci unpack_trit_block(wt_bits, raw, &weights_quant[i]); 1118bf215546Sopenharmony_ci 1119bf215546Sopenharmony_ci if (VERBOSE_DECODE) 1120bf215546Sopenharmony_ci in.printf_bits(offset - bits_to_read, bits_to_read, "weight trits [%d,%d,%d,%d,%d]", 1121bf215546Sopenharmony_ci weights_quant[i+0], weights_quant[i+1], 1122bf215546Sopenharmony_ci weights_quant[i+2], weights_quant[i+3], 1123bf215546Sopenharmony_ci weights_quant[i+4]); 1124bf215546Sopenharmony_ci 1125bf215546Sopenharmony_ci offset -= 8 + wt_bits * 5; 1126bf215546Sopenharmony_ci bits_left -= 8 + wt_bits * 5; 1127bf215546Sopenharmony_ci } 1128bf215546Sopenharmony_ci 1129bf215546Sopenharmony_ci } else if (wt_quints) { 1130bf215546Sopenharmony_ci 1131bf215546Sopenharmony_ci int offset = 128; 1132bf215546Sopenharmony_ci int bits_left = weight_bits; 1133bf215546Sopenharmony_ci for (int i = 0; i < num_weights; i += 3) { 1134bf215546Sopenharmony_ci int bits_to_read = MIN2(bits_left, 7 + 3*wt_bits); 1135bf215546Sopenharmony_ci /* If wt_quints then wt_bits <= 2, so bits_to_read <= 13 and we can use uint32_t */ 1136bf215546Sopenharmony_ci uint32_t raw = in.get_bits_rev(offset, bits_to_read); 1137bf215546Sopenharmony_ci unpack_quint_block(wt_bits, raw, &weights_quant[i]); 1138bf215546Sopenharmony_ci 1139bf215546Sopenharmony_ci if (VERBOSE_DECODE) 1140bf215546Sopenharmony_ci in.printf_bits(offset - bits_to_read, bits_to_read, "weight quints [%d,%d,%d]", 1141bf215546Sopenharmony_ci weights_quant[i], weights_quant[i+1], weights_quant[i+2]); 1142bf215546Sopenharmony_ci 1143bf215546Sopenharmony_ci offset -= 7 + wt_bits * 3; 1144bf215546Sopenharmony_ci bits_left -= 7 + wt_bits * 3; 1145bf215546Sopenharmony_ci } 1146bf215546Sopenharmony_ci 1147bf215546Sopenharmony_ci } else { 1148bf215546Sopenharmony_ci int offset = 128; 1149bf215546Sopenharmony_ci assert((weight_bits % wt_bits) == 0); 1150bf215546Sopenharmony_ci for (int i = 0; i < num_weights; ++i) { 1151bf215546Sopenharmony_ci weights_quant[i] = in.get_bits_rev(offset, wt_bits); 1152bf215546Sopenharmony_ci 1153bf215546Sopenharmony_ci if (VERBOSE_DECODE) 1154bf215546Sopenharmony_ci in.printf_bits(offset - wt_bits, wt_bits, "weight bits [%d]", weights_quant[i]); 1155bf215546Sopenharmony_ci 1156bf215546Sopenharmony_ci offset -= wt_bits; 1157bf215546Sopenharmony_ci } 1158bf215546Sopenharmony_ci } 1159bf215546Sopenharmony_ci} 1160bf215546Sopenharmony_ci 1161bf215546Sopenharmony_civoid Block::unquantise_weights() 1162bf215546Sopenharmony_ci{ 1163bf215546Sopenharmony_ci assert(num_weights <= (int)ARRAY_SIZE(weights_quant)); 1164bf215546Sopenharmony_ci assert(num_weights <= (int)ARRAY_SIZE(weights)); 1165bf215546Sopenharmony_ci 1166bf215546Sopenharmony_ci memset(weights, 0, sizeof(weights)); 1167bf215546Sopenharmony_ci 1168bf215546Sopenharmony_ci for (int i = 0; i < num_weights; ++i) { 1169bf215546Sopenharmony_ci 1170bf215546Sopenharmony_ci uint8_t v = weights_quant[i]; 1171bf215546Sopenharmony_ci uint8_t w; 1172bf215546Sopenharmony_ci 1173bf215546Sopenharmony_ci if (wt_trits) { 1174bf215546Sopenharmony_ci 1175bf215546Sopenharmony_ci if (wt_bits == 0) { 1176bf215546Sopenharmony_ci w = v * 32; 1177bf215546Sopenharmony_ci } else { 1178bf215546Sopenharmony_ci uint8_t A, B, C, D; 1179bf215546Sopenharmony_ci A = (v & 0x1) ? 0x7F : 0x00; 1180bf215546Sopenharmony_ci switch (wt_bits) { 1181bf215546Sopenharmony_ci case 1: 1182bf215546Sopenharmony_ci B = 0; 1183bf215546Sopenharmony_ci C = 50; 1184bf215546Sopenharmony_ci D = v >> 1; 1185bf215546Sopenharmony_ci break; 1186bf215546Sopenharmony_ci case 2: 1187bf215546Sopenharmony_ci B = (v & 0x2) ? 0x45 : 0x00; 1188bf215546Sopenharmony_ci C = 23; 1189bf215546Sopenharmony_ci D = v >> 2; 1190bf215546Sopenharmony_ci break; 1191bf215546Sopenharmony_ci case 3: 1192bf215546Sopenharmony_ci B = ((v & 0x6) >> 1) | ((v & 0x6) << 4); 1193bf215546Sopenharmony_ci C = 11; 1194bf215546Sopenharmony_ci D = v >> 3; 1195bf215546Sopenharmony_ci break; 1196bf215546Sopenharmony_ci default: 1197bf215546Sopenharmony_ci unreachable(""); 1198bf215546Sopenharmony_ci } 1199bf215546Sopenharmony_ci uint16_t T = D * C + B; 1200bf215546Sopenharmony_ci T = T ^ A; 1201bf215546Sopenharmony_ci T = (A & 0x20) | (T >> 2); 1202bf215546Sopenharmony_ci assert(T < 64); 1203bf215546Sopenharmony_ci if (T > 32) 1204bf215546Sopenharmony_ci T++; 1205bf215546Sopenharmony_ci w = T; 1206bf215546Sopenharmony_ci } 1207bf215546Sopenharmony_ci 1208bf215546Sopenharmony_ci } else if (wt_quints) { 1209bf215546Sopenharmony_ci 1210bf215546Sopenharmony_ci if (wt_bits == 0) { 1211bf215546Sopenharmony_ci w = v * 16; 1212bf215546Sopenharmony_ci } else { 1213bf215546Sopenharmony_ci uint8_t A, B, C, D; 1214bf215546Sopenharmony_ci A = (v & 0x1) ? 0x7F : 0x00; 1215bf215546Sopenharmony_ci switch (wt_bits) { 1216bf215546Sopenharmony_ci case 1: 1217bf215546Sopenharmony_ci B = 0; 1218bf215546Sopenharmony_ci C = 28; 1219bf215546Sopenharmony_ci D = v >> 1; 1220bf215546Sopenharmony_ci break; 1221bf215546Sopenharmony_ci case 2: 1222bf215546Sopenharmony_ci B = (v & 0x2) ? 0x42 : 0x00; 1223bf215546Sopenharmony_ci C = 13; 1224bf215546Sopenharmony_ci D = v >> 2; 1225bf215546Sopenharmony_ci break; 1226bf215546Sopenharmony_ci default: 1227bf215546Sopenharmony_ci unreachable(""); 1228bf215546Sopenharmony_ci } 1229bf215546Sopenharmony_ci uint16_t T = D * C + B; 1230bf215546Sopenharmony_ci T = T ^ A; 1231bf215546Sopenharmony_ci T = (A & 0x20) | (T >> 2); 1232bf215546Sopenharmony_ci assert(T < 64); 1233bf215546Sopenharmony_ci if (T > 32) 1234bf215546Sopenharmony_ci T++; 1235bf215546Sopenharmony_ci w = T; 1236bf215546Sopenharmony_ci } 1237bf215546Sopenharmony_ci weights[i] = w; 1238bf215546Sopenharmony_ci 1239bf215546Sopenharmony_ci } else { 1240bf215546Sopenharmony_ci 1241bf215546Sopenharmony_ci switch (wt_bits) { 1242bf215546Sopenharmony_ci case 1: w = v ? 0x3F : 0x00; break; 1243bf215546Sopenharmony_ci case 2: w = v | (v << 2) | (v << 4); break; 1244bf215546Sopenharmony_ci case 3: w = v | (v << 3); break; 1245bf215546Sopenharmony_ci case 4: w = (v >> 2) | (v << 2); break; 1246bf215546Sopenharmony_ci case 5: w = (v >> 4) | (v << 1); break; 1247bf215546Sopenharmony_ci default: unreachable(""); 1248bf215546Sopenharmony_ci } 1249bf215546Sopenharmony_ci assert(w < 64); 1250bf215546Sopenharmony_ci if (w > 32) 1251bf215546Sopenharmony_ci w++; 1252bf215546Sopenharmony_ci } 1253bf215546Sopenharmony_ci weights[i] = w; 1254bf215546Sopenharmony_ci } 1255bf215546Sopenharmony_ci} 1256bf215546Sopenharmony_ci 1257bf215546Sopenharmony_civoid Block::compute_infill_weights(int block_w, int block_h, int block_d) 1258bf215546Sopenharmony_ci{ 1259bf215546Sopenharmony_ci int Ds = block_w <= 1 ? 0 : (1024 + block_w / 2) / (block_w - 1); 1260bf215546Sopenharmony_ci int Dt = block_h <= 1 ? 0 : (1024 + block_h / 2) / (block_h - 1); 1261bf215546Sopenharmony_ci int Dr = block_d <= 1 ? 0 : (1024 + block_d / 2) / (block_d - 1); 1262bf215546Sopenharmony_ci for (int r = 0; r < block_d; ++r) { 1263bf215546Sopenharmony_ci for (int t = 0; t < block_h; ++t) { 1264bf215546Sopenharmony_ci for (int s = 0; s < block_w; ++s) { 1265bf215546Sopenharmony_ci int cs = Ds * s; 1266bf215546Sopenharmony_ci int ct = Dt * t; 1267bf215546Sopenharmony_ci int cr = Dr * r; 1268bf215546Sopenharmony_ci int gs = (cs * (wt_w - 1) + 32) >> 6; 1269bf215546Sopenharmony_ci int gt = (ct * (wt_h - 1) + 32) >> 6; 1270bf215546Sopenharmony_ci int gr = (cr * (wt_d - 1) + 32) >> 6; 1271bf215546Sopenharmony_ci assert(gs >= 0 && gs <= 176); 1272bf215546Sopenharmony_ci assert(gt >= 0 && gt <= 176); 1273bf215546Sopenharmony_ci assert(gr >= 0 && gr <= 176); 1274bf215546Sopenharmony_ci int js = gs >> 4; 1275bf215546Sopenharmony_ci int fs = gs & 0xf; 1276bf215546Sopenharmony_ci int jt = gt >> 4; 1277bf215546Sopenharmony_ci int ft = gt & 0xf; 1278bf215546Sopenharmony_ci int jr = gr >> 4; 1279bf215546Sopenharmony_ci int fr = gr & 0xf; 1280bf215546Sopenharmony_ci 1281bf215546Sopenharmony_ci /* TODO: 3D */ 1282bf215546Sopenharmony_ci (void)jr; 1283bf215546Sopenharmony_ci (void)fr; 1284bf215546Sopenharmony_ci 1285bf215546Sopenharmony_ci int w11 = (fs * ft + 8) >> 4; 1286bf215546Sopenharmony_ci int w10 = ft - w11; 1287bf215546Sopenharmony_ci int w01 = fs - w11; 1288bf215546Sopenharmony_ci int w00 = 16 - fs - ft + w11; 1289bf215546Sopenharmony_ci 1290bf215546Sopenharmony_ci if (dual_plane) { 1291bf215546Sopenharmony_ci int p00, p01, p10, p11, i0, i1; 1292bf215546Sopenharmony_ci int v0 = js + jt * wt_w; 1293bf215546Sopenharmony_ci p00 = weights[(v0) * 2]; 1294bf215546Sopenharmony_ci p01 = weights[(v0 + 1) * 2]; 1295bf215546Sopenharmony_ci p10 = weights[(v0 + wt_w) * 2]; 1296bf215546Sopenharmony_ci p11 = weights[(v0 + wt_w + 1) * 2]; 1297bf215546Sopenharmony_ci i0 = (p00*w00 + p01*w01 + p10*w10 + p11*w11 + 8) >> 4; 1298bf215546Sopenharmony_ci p00 = weights[(v0) * 2 + 1]; 1299bf215546Sopenharmony_ci p01 = weights[(v0 + 1) * 2 + 1]; 1300bf215546Sopenharmony_ci p10 = weights[(v0 + wt_w) * 2 + 1]; 1301bf215546Sopenharmony_ci p11 = weights[(v0 + wt_w + 1) * 2 + 1]; 1302bf215546Sopenharmony_ci assert((v0 + wt_w + 1) * 2 + 1 < (int)ARRAY_SIZE(weights)); 1303bf215546Sopenharmony_ci i1 = (p00*w00 + p01*w01 + p10*w10 + p11*w11 + 8) >> 4; 1304bf215546Sopenharmony_ci assert(0 <= i0 && i0 <= 64); 1305bf215546Sopenharmony_ci infill_weights[0][s + t*block_w + r*block_w*block_h] = i0; 1306bf215546Sopenharmony_ci infill_weights[1][s + t*block_w + r*block_w*block_h] = i1; 1307bf215546Sopenharmony_ci } else { 1308bf215546Sopenharmony_ci int p00, p01, p10, p11, i; 1309bf215546Sopenharmony_ci int v0 = js + jt * wt_w; 1310bf215546Sopenharmony_ci p00 = weights[v0]; 1311bf215546Sopenharmony_ci p01 = weights[v0 + 1]; 1312bf215546Sopenharmony_ci p10 = weights[v0 + wt_w]; 1313bf215546Sopenharmony_ci p11 = weights[v0 + wt_w + 1]; 1314bf215546Sopenharmony_ci assert(v0 + wt_w + 1 < (int)ARRAY_SIZE(weights)); 1315bf215546Sopenharmony_ci i = (p00*w00 + p01*w01 + p10*w10 + p11*w11 + 8) >> 4; 1316bf215546Sopenharmony_ci assert(0 <= i && i <= 64); 1317bf215546Sopenharmony_ci infill_weights[0][s + t*block_w + r*block_w*block_h] = i; 1318bf215546Sopenharmony_ci } 1319bf215546Sopenharmony_ci } 1320bf215546Sopenharmony_ci } 1321bf215546Sopenharmony_ci } 1322bf215546Sopenharmony_ci} 1323bf215546Sopenharmony_ci 1324bf215546Sopenharmony_civoid Block::unquantise_colour_endpoints() 1325bf215546Sopenharmony_ci{ 1326bf215546Sopenharmony_ci assert(num_cem_values <= (int)ARRAY_SIZE(colour_endpoints_quant)); 1327bf215546Sopenharmony_ci assert(num_cem_values <= (int)ARRAY_SIZE(colour_endpoints)); 1328bf215546Sopenharmony_ci 1329bf215546Sopenharmony_ci for (int i = 0; i < num_cem_values; ++i) { 1330bf215546Sopenharmony_ci uint8_t v = colour_endpoints_quant[i]; 1331bf215546Sopenharmony_ci 1332bf215546Sopenharmony_ci if (ce_trits) { 1333bf215546Sopenharmony_ci uint16_t A, B, C, D; 1334bf215546Sopenharmony_ci uint16_t t; 1335bf215546Sopenharmony_ci A = (v & 0x1) ? 0x1FF : 0x000; 1336bf215546Sopenharmony_ci switch (ce_bits) { 1337bf215546Sopenharmony_ci case 1: 1338bf215546Sopenharmony_ci B = 0; 1339bf215546Sopenharmony_ci C = 204; 1340bf215546Sopenharmony_ci D = v >> 1; 1341bf215546Sopenharmony_ci break; 1342bf215546Sopenharmony_ci case 2: 1343bf215546Sopenharmony_ci B = (v & 0x2) ? 0x116 : 0x000; 1344bf215546Sopenharmony_ci C = 93; 1345bf215546Sopenharmony_ci D = v >> 2; 1346bf215546Sopenharmony_ci break; 1347bf215546Sopenharmony_ci case 3: 1348bf215546Sopenharmony_ci t = ((v >> 1) & 0x3); 1349bf215546Sopenharmony_ci B = t | (t << 2) | (t << 7); 1350bf215546Sopenharmony_ci C = 44; 1351bf215546Sopenharmony_ci D = v >> 3; 1352bf215546Sopenharmony_ci break; 1353bf215546Sopenharmony_ci case 4: 1354bf215546Sopenharmony_ci t = ((v >> 1) & 0x7); 1355bf215546Sopenharmony_ci B = t | (t << 6); 1356bf215546Sopenharmony_ci C = 22; 1357bf215546Sopenharmony_ci D = v >> 4; 1358bf215546Sopenharmony_ci break; 1359bf215546Sopenharmony_ci case 5: 1360bf215546Sopenharmony_ci t = ((v >> 1) & 0xF); 1361bf215546Sopenharmony_ci B = (t >> 2) | (t << 5); 1362bf215546Sopenharmony_ci C = 11; 1363bf215546Sopenharmony_ci D = v >> 5; 1364bf215546Sopenharmony_ci break; 1365bf215546Sopenharmony_ci case 6: 1366bf215546Sopenharmony_ci B = ((v & 0x3E) << 3) | ((v >> 5) & 0x1); 1367bf215546Sopenharmony_ci C = 5; 1368bf215546Sopenharmony_ci D = v >> 6; 1369bf215546Sopenharmony_ci break; 1370bf215546Sopenharmony_ci default: 1371bf215546Sopenharmony_ci unreachable(""); 1372bf215546Sopenharmony_ci } 1373bf215546Sopenharmony_ci uint16_t T = D * C + B; 1374bf215546Sopenharmony_ci T = T ^ A; 1375bf215546Sopenharmony_ci T = (A & 0x80) | (T >> 2); 1376bf215546Sopenharmony_ci assert(T < 256); 1377bf215546Sopenharmony_ci colour_endpoints[i] = T; 1378bf215546Sopenharmony_ci } else if (ce_quints) { 1379bf215546Sopenharmony_ci uint16_t A, B, C, D; 1380bf215546Sopenharmony_ci uint16_t t; 1381bf215546Sopenharmony_ci A = (v & 0x1) ? 0x1FF : 0x000; 1382bf215546Sopenharmony_ci switch (ce_bits) { 1383bf215546Sopenharmony_ci case 1: 1384bf215546Sopenharmony_ci B = 0; 1385bf215546Sopenharmony_ci C = 113; 1386bf215546Sopenharmony_ci D = v >> 1; 1387bf215546Sopenharmony_ci break; 1388bf215546Sopenharmony_ci case 2: 1389bf215546Sopenharmony_ci B = (v & 0x2) ? 0x10C : 0x000; 1390bf215546Sopenharmony_ci C = 54; 1391bf215546Sopenharmony_ci D = v >> 2; 1392bf215546Sopenharmony_ci break; 1393bf215546Sopenharmony_ci case 3: 1394bf215546Sopenharmony_ci t = ((v >> 1) & 0x3); 1395bf215546Sopenharmony_ci B = (t >> 1) | (t << 1) | (t << 7); 1396bf215546Sopenharmony_ci C = 26; 1397bf215546Sopenharmony_ci D = v >> 3; 1398bf215546Sopenharmony_ci break; 1399bf215546Sopenharmony_ci case 4: 1400bf215546Sopenharmony_ci t = ((v >> 1) & 0x7); 1401bf215546Sopenharmony_ci B = (t >> 1) | (t << 6); 1402bf215546Sopenharmony_ci C = 13; 1403bf215546Sopenharmony_ci D = v >> 4; 1404bf215546Sopenharmony_ci break; 1405bf215546Sopenharmony_ci case 5: 1406bf215546Sopenharmony_ci t = ((v >> 1) & 0xF); 1407bf215546Sopenharmony_ci B = (t >> 4) | (t << 5); 1408bf215546Sopenharmony_ci C = 6; 1409bf215546Sopenharmony_ci D = v >> 5; 1410bf215546Sopenharmony_ci break; 1411bf215546Sopenharmony_ci default: 1412bf215546Sopenharmony_ci unreachable(""); 1413bf215546Sopenharmony_ci } 1414bf215546Sopenharmony_ci uint16_t T = D * C + B; 1415bf215546Sopenharmony_ci T = T ^ A; 1416bf215546Sopenharmony_ci T = (A & 0x80) | (T >> 2); 1417bf215546Sopenharmony_ci assert(T < 256); 1418bf215546Sopenharmony_ci colour_endpoints[i] = T; 1419bf215546Sopenharmony_ci } else { 1420bf215546Sopenharmony_ci switch (ce_bits) { 1421bf215546Sopenharmony_ci case 1: v = v ? 0xFF : 0x00; break; 1422bf215546Sopenharmony_ci case 2: v = (v << 6) | (v << 4) | (v << 2) | v; break; 1423bf215546Sopenharmony_ci case 3: v = (v << 5) | (v << 2) | (v >> 1); break; 1424bf215546Sopenharmony_ci case 4: v = (v << 4) | v; break; 1425bf215546Sopenharmony_ci case 5: v = (v << 3) | (v >> 2); break; 1426bf215546Sopenharmony_ci case 6: v = (v << 2) | (v >> 4); break; 1427bf215546Sopenharmony_ci case 7: v = (v << 1) | (v >> 6); break; 1428bf215546Sopenharmony_ci case 8: break; 1429bf215546Sopenharmony_ci default: unreachable(""); 1430bf215546Sopenharmony_ci } 1431bf215546Sopenharmony_ci colour_endpoints[i] = v; 1432bf215546Sopenharmony_ci } 1433bf215546Sopenharmony_ci } 1434bf215546Sopenharmony_ci} 1435bf215546Sopenharmony_ci 1436bf215546Sopenharmony_cidecode_error::type Block::decode(const Decoder &decoder, InputBitVector in) 1437bf215546Sopenharmony_ci{ 1438bf215546Sopenharmony_ci decode_error::type err; 1439bf215546Sopenharmony_ci 1440bf215546Sopenharmony_ci is_error = false; 1441bf215546Sopenharmony_ci bogus_colour_endpoints = false; 1442bf215546Sopenharmony_ci bogus_weights = false; 1443bf215546Sopenharmony_ci is_void_extent = false; 1444bf215546Sopenharmony_ci 1445bf215546Sopenharmony_ci wt_d = 1; 1446bf215546Sopenharmony_ci /* TODO: 3D */ 1447bf215546Sopenharmony_ci 1448bf215546Sopenharmony_ci /* TODO: test for all the illegal encodings */ 1449bf215546Sopenharmony_ci 1450bf215546Sopenharmony_ci if (VERBOSE_DECODE) 1451bf215546Sopenharmony_ci in.printf_bits(0, 128); 1452bf215546Sopenharmony_ci 1453bf215546Sopenharmony_ci err = decode_block_mode(in); 1454bf215546Sopenharmony_ci if (err != decode_error::ok) 1455bf215546Sopenharmony_ci return err; 1456bf215546Sopenharmony_ci 1457bf215546Sopenharmony_ci if (is_void_extent) 1458bf215546Sopenharmony_ci return decode_error::ok; 1459bf215546Sopenharmony_ci 1460bf215546Sopenharmony_ci /* TODO: 3D */ 1461bf215546Sopenharmony_ci 1462bf215546Sopenharmony_ci calculate_from_weights(); 1463bf215546Sopenharmony_ci 1464bf215546Sopenharmony_ci if (VERBOSE_DECODE) 1465bf215546Sopenharmony_ci printf("weights_grid=%dx%dx%d dual_plane=%d num_weights=%d high_prec=%d r=%d range=0..%d (%dt %dq %db) weight_bits=%d\n", 1466bf215546Sopenharmony_ci wt_w, wt_h, wt_d, dual_plane, num_weights, high_prec, wt_range, wt_max, wt_trits, wt_quints, wt_bits, weight_bits); 1467bf215546Sopenharmony_ci 1468bf215546Sopenharmony_ci if (wt_w > decoder.block_w || wt_h > decoder.block_h || wt_d > decoder.block_d) 1469bf215546Sopenharmony_ci return decode_error::weight_grid_exceeds_block_size; 1470bf215546Sopenharmony_ci 1471bf215546Sopenharmony_ci num_parts = in.get_bits(11, 2) + 1; 1472bf215546Sopenharmony_ci 1473bf215546Sopenharmony_ci if (VERBOSE_DECODE) 1474bf215546Sopenharmony_ci in.printf_bits(11, 2, "partitions = %d", num_parts); 1475bf215546Sopenharmony_ci 1476bf215546Sopenharmony_ci if (dual_plane && num_parts > 3) 1477bf215546Sopenharmony_ci return decode_error::dual_plane_and_too_many_partitions; 1478bf215546Sopenharmony_ci 1479bf215546Sopenharmony_ci decode_cem(in); 1480bf215546Sopenharmony_ci 1481bf215546Sopenharmony_ci if (VERBOSE_DECODE) 1482bf215546Sopenharmony_ci printf("cem=[%d,%d,%d,%d] base_cem_class=%d\n", cems[0], cems[1], cems[2], cems[3], cem_base_class); 1483bf215546Sopenharmony_ci 1484bf215546Sopenharmony_ci int num_cem_pairs = (cem_base_class + 1) * num_parts + extra_cem_bits; 1485bf215546Sopenharmony_ci num_cem_values = num_cem_pairs * 2; 1486bf215546Sopenharmony_ci 1487bf215546Sopenharmony_ci calculate_remaining_bits(); 1488bf215546Sopenharmony_ci err = calculate_colour_endpoints_size(); 1489bf215546Sopenharmony_ci if (err != decode_error::ok) 1490bf215546Sopenharmony_ci return err; 1491bf215546Sopenharmony_ci 1492bf215546Sopenharmony_ci if (VERBOSE_DECODE) 1493bf215546Sopenharmony_ci in.printf_bits(colour_endpoint_data_offset, colour_endpoint_bits, 1494bf215546Sopenharmony_ci "endpoint data (%d bits, %d vals, %dt %dq %db)", 1495bf215546Sopenharmony_ci colour_endpoint_bits, num_cem_values, ce_trits, ce_quints, ce_bits); 1496bf215546Sopenharmony_ci 1497bf215546Sopenharmony_ci unpack_colour_endpoints(in); 1498bf215546Sopenharmony_ci 1499bf215546Sopenharmony_ci if (VERBOSE_DECODE) { 1500bf215546Sopenharmony_ci printf("cem values raw =["); 1501bf215546Sopenharmony_ci for (int i = 0; i < num_cem_values; i++) { 1502bf215546Sopenharmony_ci if (i) 1503bf215546Sopenharmony_ci printf(", "); 1504bf215546Sopenharmony_ci printf("%3d", colour_endpoints_quant[i]); 1505bf215546Sopenharmony_ci } 1506bf215546Sopenharmony_ci printf("]\n"); 1507bf215546Sopenharmony_ci } 1508bf215546Sopenharmony_ci 1509bf215546Sopenharmony_ci if (num_cem_values > 18) 1510bf215546Sopenharmony_ci return decode_error::invalid_colour_endpoints_count; 1511bf215546Sopenharmony_ci 1512bf215546Sopenharmony_ci unquantise_colour_endpoints(); 1513bf215546Sopenharmony_ci 1514bf215546Sopenharmony_ci if (VERBOSE_DECODE) { 1515bf215546Sopenharmony_ci printf("cem values norm=["); 1516bf215546Sopenharmony_ci for (int i = 0; i < num_cem_values; i++) { 1517bf215546Sopenharmony_ci if (i) 1518bf215546Sopenharmony_ci printf(", "); 1519bf215546Sopenharmony_ci printf("%3d", colour_endpoints[i]); 1520bf215546Sopenharmony_ci } 1521bf215546Sopenharmony_ci printf("]\n"); 1522bf215546Sopenharmony_ci } 1523bf215546Sopenharmony_ci 1524bf215546Sopenharmony_ci decode_colour_endpoints(); 1525bf215546Sopenharmony_ci 1526bf215546Sopenharmony_ci if (dual_plane) { 1527bf215546Sopenharmony_ci int ccs_offset = 128 - weight_bits - num_extra_cem_bits - 2; 1528bf215546Sopenharmony_ci colour_component_selector = in.get_bits(ccs_offset, 2); 1529bf215546Sopenharmony_ci 1530bf215546Sopenharmony_ci if (VERBOSE_DECODE) 1531bf215546Sopenharmony_ci in.printf_bits(ccs_offset, 2, "colour component selector = %d", colour_component_selector); 1532bf215546Sopenharmony_ci } else { 1533bf215546Sopenharmony_ci colour_component_selector = 0; 1534bf215546Sopenharmony_ci } 1535bf215546Sopenharmony_ci 1536bf215546Sopenharmony_ci 1537bf215546Sopenharmony_ci if (VERBOSE_DECODE) 1538bf215546Sopenharmony_ci in.printf_bits(128 - weight_bits, weight_bits, "weights (%d bits)", weight_bits); 1539bf215546Sopenharmony_ci 1540bf215546Sopenharmony_ci if (num_weights > 64) 1541bf215546Sopenharmony_ci return decode_error::invalid_num_weights; 1542bf215546Sopenharmony_ci 1543bf215546Sopenharmony_ci if (weight_bits < 24 || weight_bits > 96) 1544bf215546Sopenharmony_ci return decode_error::invalid_weight_bits; 1545bf215546Sopenharmony_ci 1546bf215546Sopenharmony_ci unpack_weights(in); 1547bf215546Sopenharmony_ci 1548bf215546Sopenharmony_ci unquantise_weights(); 1549bf215546Sopenharmony_ci 1550bf215546Sopenharmony_ci if (VERBOSE_DECODE) { 1551bf215546Sopenharmony_ci printf("weights=["); 1552bf215546Sopenharmony_ci for (int i = 0; i < num_weights; ++i) { 1553bf215546Sopenharmony_ci if (i) 1554bf215546Sopenharmony_ci printf(", "); 1555bf215546Sopenharmony_ci printf("%d", weights[i]); 1556bf215546Sopenharmony_ci } 1557bf215546Sopenharmony_ci printf("]\n"); 1558bf215546Sopenharmony_ci 1559bf215546Sopenharmony_ci for (int plane = 0; plane <= dual_plane; ++plane) { 1560bf215546Sopenharmony_ci printf("weights (plane %d):\n", plane); 1561bf215546Sopenharmony_ci int i = 0; 1562bf215546Sopenharmony_ci (void)i; 1563bf215546Sopenharmony_ci 1564bf215546Sopenharmony_ci for (int r = 0; r < wt_d; ++r) { 1565bf215546Sopenharmony_ci for (int t = 0; t < wt_h; ++t) { 1566bf215546Sopenharmony_ci for (int s = 0; s < wt_w; ++s) { 1567bf215546Sopenharmony_ci printf("%3d", weights[i++ * (1 + dual_plane) + plane]); 1568bf215546Sopenharmony_ci } 1569bf215546Sopenharmony_ci printf("\n"); 1570bf215546Sopenharmony_ci } 1571bf215546Sopenharmony_ci if (r < wt_d - 1) 1572bf215546Sopenharmony_ci printf("\n"); 1573bf215546Sopenharmony_ci } 1574bf215546Sopenharmony_ci } 1575bf215546Sopenharmony_ci } 1576bf215546Sopenharmony_ci 1577bf215546Sopenharmony_ci compute_infill_weights(decoder.block_w, decoder.block_h, decoder.block_d); 1578bf215546Sopenharmony_ci 1579bf215546Sopenharmony_ci if (VERBOSE_DECODE) { 1580bf215546Sopenharmony_ci for (int plane = 0; plane <= dual_plane; ++plane) { 1581bf215546Sopenharmony_ci printf("infilled weights (plane %d):\n", plane); 1582bf215546Sopenharmony_ci int i = 0; 1583bf215546Sopenharmony_ci (void)i; 1584bf215546Sopenharmony_ci 1585bf215546Sopenharmony_ci for (int r = 0; r < decoder.block_d; ++r) { 1586bf215546Sopenharmony_ci for (int t = 0; t < decoder.block_h; ++t) { 1587bf215546Sopenharmony_ci for (int s = 0; s < decoder.block_w; ++s) { 1588bf215546Sopenharmony_ci printf("%3d", infill_weights[plane][i++]); 1589bf215546Sopenharmony_ci } 1590bf215546Sopenharmony_ci printf("\n"); 1591bf215546Sopenharmony_ci } 1592bf215546Sopenharmony_ci if (r < decoder.block_d - 1) 1593bf215546Sopenharmony_ci printf("\n"); 1594bf215546Sopenharmony_ci } 1595bf215546Sopenharmony_ci } 1596bf215546Sopenharmony_ci } 1597bf215546Sopenharmony_ci if (VERBOSE_DECODE) 1598bf215546Sopenharmony_ci printf("\n"); 1599bf215546Sopenharmony_ci 1600bf215546Sopenharmony_ci return decode_error::ok; 1601bf215546Sopenharmony_ci} 1602bf215546Sopenharmony_ci 1603bf215546Sopenharmony_civoid Block::write_decoded(const Decoder &decoder, uint16_t *output) 1604bf215546Sopenharmony_ci{ 1605bf215546Sopenharmony_ci /* sRGB can only be stored as unorm8. */ 1606bf215546Sopenharmony_ci assert(!decoder.srgb || decoder.output_unorm8); 1607bf215546Sopenharmony_ci 1608bf215546Sopenharmony_ci if (is_void_extent) { 1609bf215546Sopenharmony_ci for (int idx = 0; idx < decoder.block_w*decoder.block_h*decoder.block_d; ++idx) { 1610bf215546Sopenharmony_ci if (decoder.output_unorm8) { 1611bf215546Sopenharmony_ci output[idx*4+0] = void_extent_colour_r >> 8; 1612bf215546Sopenharmony_ci output[idx*4+1] = void_extent_colour_g >> 8; 1613bf215546Sopenharmony_ci output[idx*4+2] = void_extent_colour_b >> 8; 1614bf215546Sopenharmony_ci output[idx*4+3] = void_extent_colour_a >> 8; 1615bf215546Sopenharmony_ci } else { 1616bf215546Sopenharmony_ci /* Store the color as FP16. */ 1617bf215546Sopenharmony_ci output[idx*4+0] = _mesa_uint16_div_64k_to_half(void_extent_colour_r); 1618bf215546Sopenharmony_ci output[idx*4+1] = _mesa_uint16_div_64k_to_half(void_extent_colour_g); 1619bf215546Sopenharmony_ci output[idx*4+2] = _mesa_uint16_div_64k_to_half(void_extent_colour_b); 1620bf215546Sopenharmony_ci output[idx*4+3] = _mesa_uint16_div_64k_to_half(void_extent_colour_a); 1621bf215546Sopenharmony_ci } 1622bf215546Sopenharmony_ci } 1623bf215546Sopenharmony_ci return; 1624bf215546Sopenharmony_ci } 1625bf215546Sopenharmony_ci 1626bf215546Sopenharmony_ci int small_block = (decoder.block_w * decoder.block_h * decoder.block_d) < 31; 1627bf215546Sopenharmony_ci 1628bf215546Sopenharmony_ci int idx = 0; 1629bf215546Sopenharmony_ci for (int z = 0; z < decoder.block_d; ++z) { 1630bf215546Sopenharmony_ci for (int y = 0; y < decoder.block_h; ++y) { 1631bf215546Sopenharmony_ci for (int x = 0; x < decoder.block_w; ++x) { 1632bf215546Sopenharmony_ci 1633bf215546Sopenharmony_ci int partition; 1634bf215546Sopenharmony_ci if (num_parts > 1) { 1635bf215546Sopenharmony_ci partition = select_partition(partition_index, x, y, z, num_parts, small_block); 1636bf215546Sopenharmony_ci assert(partition < num_parts); 1637bf215546Sopenharmony_ci } else { 1638bf215546Sopenharmony_ci partition = 0; 1639bf215546Sopenharmony_ci } 1640bf215546Sopenharmony_ci 1641bf215546Sopenharmony_ci /* TODO: HDR */ 1642bf215546Sopenharmony_ci 1643bf215546Sopenharmony_ci uint8x4_t e0 = endpoints_decoded[0][partition]; 1644bf215546Sopenharmony_ci uint8x4_t e1 = endpoints_decoded[1][partition]; 1645bf215546Sopenharmony_ci uint16_t c0[4], c1[4]; 1646bf215546Sopenharmony_ci 1647bf215546Sopenharmony_ci /* Expand to 16 bits. */ 1648bf215546Sopenharmony_ci if (decoder.srgb) { 1649bf215546Sopenharmony_ci c0[0] = (uint16_t)((e0.v[0] << 8) | 0x80); 1650bf215546Sopenharmony_ci c0[1] = (uint16_t)((e0.v[1] << 8) | 0x80); 1651bf215546Sopenharmony_ci c0[2] = (uint16_t)((e0.v[2] << 8) | 0x80); 1652bf215546Sopenharmony_ci c0[3] = (uint16_t)((e0.v[3] << 8) | 0x80); 1653bf215546Sopenharmony_ci 1654bf215546Sopenharmony_ci c1[0] = (uint16_t)((e1.v[0] << 8) | 0x80); 1655bf215546Sopenharmony_ci c1[1] = (uint16_t)((e1.v[1] << 8) | 0x80); 1656bf215546Sopenharmony_ci c1[2] = (uint16_t)((e1.v[2] << 8) | 0x80); 1657bf215546Sopenharmony_ci c1[3] = (uint16_t)((e1.v[3] << 8) | 0x80); 1658bf215546Sopenharmony_ci } else { 1659bf215546Sopenharmony_ci c0[0] = (uint16_t)((e0.v[0] << 8) | e0.v[0]); 1660bf215546Sopenharmony_ci c0[1] = (uint16_t)((e0.v[1] << 8) | e0.v[1]); 1661bf215546Sopenharmony_ci c0[2] = (uint16_t)((e0.v[2] << 8) | e0.v[2]); 1662bf215546Sopenharmony_ci c0[3] = (uint16_t)((e0.v[3] << 8) | e0.v[3]); 1663bf215546Sopenharmony_ci 1664bf215546Sopenharmony_ci c1[0] = (uint16_t)((e1.v[0] << 8) | e1.v[0]); 1665bf215546Sopenharmony_ci c1[1] = (uint16_t)((e1.v[1] << 8) | e1.v[1]); 1666bf215546Sopenharmony_ci c1[2] = (uint16_t)((e1.v[2] << 8) | e1.v[2]); 1667bf215546Sopenharmony_ci c1[3] = (uint16_t)((e1.v[3] << 8) | e1.v[3]); 1668bf215546Sopenharmony_ci } 1669bf215546Sopenharmony_ci 1670bf215546Sopenharmony_ci int w[4]; 1671bf215546Sopenharmony_ci if (dual_plane) { 1672bf215546Sopenharmony_ci int w0 = infill_weights[0][idx]; 1673bf215546Sopenharmony_ci int w1 = infill_weights[1][idx]; 1674bf215546Sopenharmony_ci w[0] = w[1] = w[2] = w[3] = w0; 1675bf215546Sopenharmony_ci w[colour_component_selector] = w1; 1676bf215546Sopenharmony_ci } else { 1677bf215546Sopenharmony_ci int w0 = infill_weights[0][idx]; 1678bf215546Sopenharmony_ci w[0] = w[1] = w[2] = w[3] = w0; 1679bf215546Sopenharmony_ci } 1680bf215546Sopenharmony_ci 1681bf215546Sopenharmony_ci /* Interpolate to produce UNORM16, applying weights. */ 1682bf215546Sopenharmony_ci uint16_t c[4] = { 1683bf215546Sopenharmony_ci (uint16_t)((c0[0] * (64 - w[0]) + c1[0] * w[0] + 32) >> 6), 1684bf215546Sopenharmony_ci (uint16_t)((c0[1] * (64 - w[1]) + c1[1] * w[1] + 32) >> 6), 1685bf215546Sopenharmony_ci (uint16_t)((c0[2] * (64 - w[2]) + c1[2] * w[2] + 32) >> 6), 1686bf215546Sopenharmony_ci (uint16_t)((c0[3] * (64 - w[3]) + c1[3] * w[3] + 32) >> 6), 1687bf215546Sopenharmony_ci }; 1688bf215546Sopenharmony_ci 1689bf215546Sopenharmony_ci if (decoder.output_unorm8) { 1690bf215546Sopenharmony_ci output[idx*4+0] = c[0] >> 8; 1691bf215546Sopenharmony_ci output[idx*4+1] = c[1] >> 8; 1692bf215546Sopenharmony_ci output[idx*4+2] = c[2] >> 8; 1693bf215546Sopenharmony_ci output[idx*4+3] = c[3] >> 8; 1694bf215546Sopenharmony_ci } else { 1695bf215546Sopenharmony_ci /* Store the color as FP16. */ 1696bf215546Sopenharmony_ci output[idx*4+0] = c[0] == 65535 ? FP16_ONE : _mesa_uint16_div_64k_to_half(c[0]); 1697bf215546Sopenharmony_ci output[idx*4+1] = c[1] == 65535 ? FP16_ONE : _mesa_uint16_div_64k_to_half(c[1]); 1698bf215546Sopenharmony_ci output[idx*4+2] = c[2] == 65535 ? FP16_ONE : _mesa_uint16_div_64k_to_half(c[2]); 1699bf215546Sopenharmony_ci output[idx*4+3] = c[3] == 65535 ? FP16_ONE : _mesa_uint16_div_64k_to_half(c[3]); 1700bf215546Sopenharmony_ci } 1701bf215546Sopenharmony_ci 1702bf215546Sopenharmony_ci idx++; 1703bf215546Sopenharmony_ci } 1704bf215546Sopenharmony_ci } 1705bf215546Sopenharmony_ci } 1706bf215546Sopenharmony_ci} 1707bf215546Sopenharmony_ci 1708bf215546Sopenharmony_civoid Block::calculate_from_weights() 1709bf215546Sopenharmony_ci{ 1710bf215546Sopenharmony_ci wt_trits = 0; 1711bf215546Sopenharmony_ci wt_quints = 0; 1712bf215546Sopenharmony_ci wt_bits = 0; 1713bf215546Sopenharmony_ci switch (high_prec) { 1714bf215546Sopenharmony_ci case 0: 1715bf215546Sopenharmony_ci switch (wt_range) { 1716bf215546Sopenharmony_ci case 0x2: wt_max = 1; wt_bits = 1; break; 1717bf215546Sopenharmony_ci case 0x3: wt_max = 2; wt_trits = 1; break; 1718bf215546Sopenharmony_ci case 0x4: wt_max = 3; wt_bits = 2; break; 1719bf215546Sopenharmony_ci case 0x5: wt_max = 4; wt_quints = 1; break; 1720bf215546Sopenharmony_ci case 0x6: wt_max = 5; wt_trits = 1; wt_bits = 1; break; 1721bf215546Sopenharmony_ci case 0x7: wt_max = 7; wt_bits = 3; break; 1722bf215546Sopenharmony_ci default: abort(); 1723bf215546Sopenharmony_ci } 1724bf215546Sopenharmony_ci break; 1725bf215546Sopenharmony_ci case 1: 1726bf215546Sopenharmony_ci switch (wt_range) { 1727bf215546Sopenharmony_ci case 0x2: wt_max = 9; wt_quints = 1; wt_bits = 1; break; 1728bf215546Sopenharmony_ci case 0x3: wt_max = 11; wt_trits = 1; wt_bits = 2; break; 1729bf215546Sopenharmony_ci case 0x4: wt_max = 15; wt_bits = 4; break; 1730bf215546Sopenharmony_ci case 0x5: wt_max = 19; wt_quints = 1; wt_bits = 2; break; 1731bf215546Sopenharmony_ci case 0x6: wt_max = 23; wt_trits = 1; wt_bits = 3; break; 1732bf215546Sopenharmony_ci case 0x7: wt_max = 31; wt_bits = 5; break; 1733bf215546Sopenharmony_ci default: abort(); 1734bf215546Sopenharmony_ci } 1735bf215546Sopenharmony_ci break; 1736bf215546Sopenharmony_ci } 1737bf215546Sopenharmony_ci 1738bf215546Sopenharmony_ci assert(wt_trits || wt_quints || wt_bits); 1739bf215546Sopenharmony_ci 1740bf215546Sopenharmony_ci num_weights = wt_w * wt_h * wt_d; 1741bf215546Sopenharmony_ci 1742bf215546Sopenharmony_ci if (dual_plane) 1743bf215546Sopenharmony_ci num_weights *= 2; 1744bf215546Sopenharmony_ci 1745bf215546Sopenharmony_ci weight_bits = 1746bf215546Sopenharmony_ci (num_weights * 8 * wt_trits + 4) / 5 1747bf215546Sopenharmony_ci + (num_weights * 7 * wt_quints + 2) / 3 1748bf215546Sopenharmony_ci + num_weights * wt_bits; 1749bf215546Sopenharmony_ci} 1750bf215546Sopenharmony_ci 1751bf215546Sopenharmony_civoid Block::calculate_remaining_bits() 1752bf215546Sopenharmony_ci{ 1753bf215546Sopenharmony_ci int config_bits; 1754bf215546Sopenharmony_ci if (num_parts > 1) { 1755bf215546Sopenharmony_ci if (!is_multi_cem) 1756bf215546Sopenharmony_ci config_bits = 29; 1757bf215546Sopenharmony_ci else 1758bf215546Sopenharmony_ci config_bits = 25 + 3 * num_parts; 1759bf215546Sopenharmony_ci } else { 1760bf215546Sopenharmony_ci config_bits = 17; 1761bf215546Sopenharmony_ci } 1762bf215546Sopenharmony_ci 1763bf215546Sopenharmony_ci if (dual_plane) 1764bf215546Sopenharmony_ci config_bits += 2; 1765bf215546Sopenharmony_ci 1766bf215546Sopenharmony_ci remaining_bits = 128 - config_bits - weight_bits; 1767bf215546Sopenharmony_ci} 1768bf215546Sopenharmony_ci 1769bf215546Sopenharmony_cidecode_error::type Block::calculate_colour_endpoints_size() 1770bf215546Sopenharmony_ci{ 1771bf215546Sopenharmony_ci /* Specified as illegal */ 1772bf215546Sopenharmony_ci if (remaining_bits < (13 * num_cem_values + 4) / 5) { 1773bf215546Sopenharmony_ci colour_endpoint_bits = ce_max = ce_trits = ce_quints = ce_bits = 0; 1774bf215546Sopenharmony_ci return decode_error::invalid_colour_endpoints_size; 1775bf215546Sopenharmony_ci } 1776bf215546Sopenharmony_ci 1777bf215546Sopenharmony_ci /* Find the largest cem_ranges that fits within remaining_bits */ 1778bf215546Sopenharmony_ci for (int i = ARRAY_SIZE(cem_ranges)-1; i >= 0; --i) { 1779bf215546Sopenharmony_ci int cem_bits; 1780bf215546Sopenharmony_ci cem_bits = (num_cem_values * 8 * cem_ranges[i].t + 4) / 5 1781bf215546Sopenharmony_ci + (num_cem_values * 7 * cem_ranges[i].q + 2) / 3 1782bf215546Sopenharmony_ci + num_cem_values * cem_ranges[i].b; 1783bf215546Sopenharmony_ci 1784bf215546Sopenharmony_ci if (cem_bits <= remaining_bits) 1785bf215546Sopenharmony_ci { 1786bf215546Sopenharmony_ci colour_endpoint_bits = cem_bits; 1787bf215546Sopenharmony_ci ce_max = cem_ranges[i].max; 1788bf215546Sopenharmony_ci ce_trits = cem_ranges[i].t; 1789bf215546Sopenharmony_ci ce_quints = cem_ranges[i].q; 1790bf215546Sopenharmony_ci ce_bits = cem_ranges[i].b; 1791bf215546Sopenharmony_ci return decode_error::ok; 1792bf215546Sopenharmony_ci } 1793bf215546Sopenharmony_ci } 1794bf215546Sopenharmony_ci 1795bf215546Sopenharmony_ci assert(0); 1796bf215546Sopenharmony_ci return decode_error::invalid_colour_endpoints_size; 1797bf215546Sopenharmony_ci} 1798bf215546Sopenharmony_ci 1799bf215546Sopenharmony_ci/** 1800bf215546Sopenharmony_ci * Decode ASTC 2D LDR texture data. 1801bf215546Sopenharmony_ci * 1802bf215546Sopenharmony_ci * \param src_width in pixels 1803bf215546Sopenharmony_ci * \param src_height in pixels 1804bf215546Sopenharmony_ci * \param dst_stride in bytes 1805bf215546Sopenharmony_ci */ 1806bf215546Sopenharmony_ciextern "C" void 1807bf215546Sopenharmony_ci_mesa_unpack_astc_2d_ldr(uint8_t *dst_row, 1808bf215546Sopenharmony_ci unsigned dst_stride, 1809bf215546Sopenharmony_ci const uint8_t *src_row, 1810bf215546Sopenharmony_ci unsigned src_stride, 1811bf215546Sopenharmony_ci unsigned src_width, 1812bf215546Sopenharmony_ci unsigned src_height, 1813bf215546Sopenharmony_ci mesa_format format) 1814bf215546Sopenharmony_ci{ 1815bf215546Sopenharmony_ci assert(_mesa_is_format_astc_2d(format)); 1816bf215546Sopenharmony_ci bool srgb = _mesa_is_format_srgb(format); 1817bf215546Sopenharmony_ci 1818bf215546Sopenharmony_ci unsigned blk_w, blk_h; 1819bf215546Sopenharmony_ci _mesa_get_format_block_size(format, &blk_w, &blk_h); 1820bf215546Sopenharmony_ci 1821bf215546Sopenharmony_ci const unsigned block_size = 16; 1822bf215546Sopenharmony_ci unsigned x_blocks = (src_width + blk_w - 1) / blk_w; 1823bf215546Sopenharmony_ci unsigned y_blocks = (src_height + blk_h - 1) / blk_h; 1824bf215546Sopenharmony_ci 1825bf215546Sopenharmony_ci Decoder dec(blk_w, blk_h, 1, srgb, true); 1826bf215546Sopenharmony_ci 1827bf215546Sopenharmony_ci for (unsigned y = 0; y < y_blocks; ++y) { 1828bf215546Sopenharmony_ci for (unsigned x = 0; x < x_blocks; ++x) { 1829bf215546Sopenharmony_ci /* Same size as the largest block. */ 1830bf215546Sopenharmony_ci uint16_t block_out[12 * 12 * 4]; 1831bf215546Sopenharmony_ci 1832bf215546Sopenharmony_ci dec.decode(src_row + x * block_size, block_out); 1833bf215546Sopenharmony_ci 1834bf215546Sopenharmony_ci /* This can be smaller with NPOT dimensions. */ 1835bf215546Sopenharmony_ci unsigned dst_blk_w = MIN2(blk_w, src_width - x*blk_w); 1836bf215546Sopenharmony_ci unsigned dst_blk_h = MIN2(blk_h, src_height - y*blk_h); 1837bf215546Sopenharmony_ci 1838bf215546Sopenharmony_ci for (unsigned sub_y = 0; sub_y < dst_blk_h; ++sub_y) { 1839bf215546Sopenharmony_ci for (unsigned sub_x = 0; sub_x < dst_blk_w; ++sub_x) { 1840bf215546Sopenharmony_ci uint8_t *dst = dst_row + sub_y * dst_stride + 1841bf215546Sopenharmony_ci (x * blk_w + sub_x) * 4; 1842bf215546Sopenharmony_ci const uint16_t *src = &block_out[(sub_y * blk_w + sub_x) * 4]; 1843bf215546Sopenharmony_ci 1844bf215546Sopenharmony_ci dst[0] = src[0]; 1845bf215546Sopenharmony_ci dst[1] = src[1]; 1846bf215546Sopenharmony_ci dst[2] = src[2]; 1847bf215546Sopenharmony_ci dst[3] = src[3]; 1848bf215546Sopenharmony_ci } 1849bf215546Sopenharmony_ci } 1850bf215546Sopenharmony_ci } 1851bf215546Sopenharmony_ci src_row += src_stride; 1852bf215546Sopenharmony_ci dst_row += dst_stride * blk_h; 1853bf215546Sopenharmony_ci } 1854bf215546Sopenharmony_ci} 1855