1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright 2015 Philip Taylor <philip@zaynar.co.uk>
3bf215546Sopenharmony_ci * Copyright 2018 Advanced Micro Devices, Inc.
4bf215546Sopenharmony_ci *
5bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
6bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
7bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
8bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
10bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
11bf215546Sopenharmony_ci *
12bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
13bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
14bf215546Sopenharmony_ci * Software.
15bf215546Sopenharmony_ci *
16bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22bf215546Sopenharmony_ci * DEALINGS IN THE SOFTWARE.
23bf215546Sopenharmony_ci */
24bf215546Sopenharmony_ci
25bf215546Sopenharmony_ci/**
26bf215546Sopenharmony_ci * \file texcompress_astc.c
27bf215546Sopenharmony_ci *
28bf215546Sopenharmony_ci * Decompression code for GL_KHR_texture_compression_astc_ldr, which is just
29bf215546Sopenharmony_ci * ASTC 2D LDR.
30bf215546Sopenharmony_ci *
31bf215546Sopenharmony_ci * The ASTC 2D LDR decoder (without the sRGB part) was copied from the OASTC
32bf215546Sopenharmony_ci * library written by Philip Taylor. I added sRGB support and adjusted it for
33bf215546Sopenharmony_ci * Mesa. - Marek
34bf215546Sopenharmony_ci */
35bf215546Sopenharmony_ci
36bf215546Sopenharmony_ci#include "texcompress_astc.h"
37bf215546Sopenharmony_ci#include "macros.h"
38bf215546Sopenharmony_ci#include "util/half_float.h"
39bf215546Sopenharmony_ci#include <stdio.h>
40bf215546Sopenharmony_ci#include <cstdlib>  // for abort() on windows
41bf215546Sopenharmony_ci
42bf215546Sopenharmony_cistatic bool VERBOSE_DECODE = false;
43bf215546Sopenharmony_cistatic bool VERBOSE_WRITE = false;
44bf215546Sopenharmony_ci
45bf215546Sopenharmony_ciclass decode_error
46bf215546Sopenharmony_ci{
47bf215546Sopenharmony_cipublic:
48bf215546Sopenharmony_ci   enum type {
49bf215546Sopenharmony_ci      ok,
50bf215546Sopenharmony_ci      unsupported_hdr_void_extent,
51bf215546Sopenharmony_ci      reserved_block_mode_1,
52bf215546Sopenharmony_ci      reserved_block_mode_2,
53bf215546Sopenharmony_ci      dual_plane_and_too_many_partitions,
54bf215546Sopenharmony_ci      invalid_range_in_void_extent,
55bf215546Sopenharmony_ci      weight_grid_exceeds_block_size,
56bf215546Sopenharmony_ci      invalid_colour_endpoints_size,
57bf215546Sopenharmony_ci      invalid_colour_endpoints_count,
58bf215546Sopenharmony_ci      invalid_weight_bits,
59bf215546Sopenharmony_ci      invalid_num_weights,
60bf215546Sopenharmony_ci   };
61bf215546Sopenharmony_ci};
62bf215546Sopenharmony_ci
63bf215546Sopenharmony_ci
64bf215546Sopenharmony_cistruct cem_range {
65bf215546Sopenharmony_ci   uint8_t max;
66bf215546Sopenharmony_ci   uint8_t t, q, b;
67bf215546Sopenharmony_ci};
68bf215546Sopenharmony_ci
69bf215546Sopenharmony_ci/* Based on the Color Unquantization Parameters table,
70bf215546Sopenharmony_ci * plus the bit-only representations, sorted by increasing size
71bf215546Sopenharmony_ci */
72bf215546Sopenharmony_cistatic cem_range cem_ranges[] = {
73bf215546Sopenharmony_ci   { 5, 1, 0, 1 },
74bf215546Sopenharmony_ci   { 7, 0, 0, 3 },
75bf215546Sopenharmony_ci   { 9, 0, 1, 1 },
76bf215546Sopenharmony_ci   { 11, 1, 0, 2 },
77bf215546Sopenharmony_ci   { 15, 0, 0, 4 },
78bf215546Sopenharmony_ci   { 19, 0, 1, 2 },
79bf215546Sopenharmony_ci   { 23, 1, 0, 3 },
80bf215546Sopenharmony_ci   { 31, 0, 0, 5 },
81bf215546Sopenharmony_ci   { 39, 0, 1, 3 },
82bf215546Sopenharmony_ci   { 47, 1, 0, 4 },
83bf215546Sopenharmony_ci   { 63, 0, 0, 6 },
84bf215546Sopenharmony_ci   { 79, 0, 1, 4 },
85bf215546Sopenharmony_ci   { 95, 1, 0, 5 },
86bf215546Sopenharmony_ci   { 127, 0, 0, 7 },
87bf215546Sopenharmony_ci   { 159, 0, 1, 5 },
88bf215546Sopenharmony_ci   { 191, 1, 0, 6 },
89bf215546Sopenharmony_ci   { 255, 0, 0, 8 },
90bf215546Sopenharmony_ci};
91bf215546Sopenharmony_ci
92bf215546Sopenharmony_ci#define CAT_BITS_2(a, b)          ( ((a) << 1) | (b) )
93bf215546Sopenharmony_ci#define CAT_BITS_3(a, b, c)       ( ((a) << 2) | ((b) << 1) | (c) )
94bf215546Sopenharmony_ci#define CAT_BITS_4(a, b, c, d)    ( ((a) << 3) | ((b) << 2) | ((c) << 1) | (d) )
95bf215546Sopenharmony_ci#define CAT_BITS_5(a, b, c, d, e) ( ((a) << 4) | ((b) << 3) | ((c) << 2) | ((d) << 1) | (e) )
96bf215546Sopenharmony_ci
97bf215546Sopenharmony_ci/**
98bf215546Sopenharmony_ci * Unpack 5n+8 bits from 'in' into 5 output values.
99bf215546Sopenharmony_ci * If n <= 4 then T should be uint32_t, else it must be uint64_t.
100bf215546Sopenharmony_ci */
101bf215546Sopenharmony_citemplate <typename T>
102bf215546Sopenharmony_cistatic void unpack_trit_block(int n, T in, uint8_t *out)
103bf215546Sopenharmony_ci{
104bf215546Sopenharmony_ci   assert(n <= 6); /* else output will overflow uint8_t */
105bf215546Sopenharmony_ci
106bf215546Sopenharmony_ci   uint8_t T0 = (in >> (n)) & 0x1;
107bf215546Sopenharmony_ci   uint8_t T1 = (in >> (n+1)) & 0x1;
108bf215546Sopenharmony_ci   uint8_t T2 = (in >> (2*n+2)) & 0x1;
109bf215546Sopenharmony_ci   uint8_t T3 = (in >> (2*n+3)) & 0x1;
110bf215546Sopenharmony_ci   uint8_t T4 = (in >> (3*n+4)) & 0x1;
111bf215546Sopenharmony_ci   uint8_t T5 = (in >> (4*n+5)) & 0x1;
112bf215546Sopenharmony_ci   uint8_t T6 = (in >> (4*n+6)) & 0x1;
113bf215546Sopenharmony_ci   uint8_t T7 = (in >> (5*n+7)) & 0x1;
114bf215546Sopenharmony_ci   uint8_t mmask = (1 << n) - 1;
115bf215546Sopenharmony_ci   uint8_t m0 = (in >> (0)) & mmask;
116bf215546Sopenharmony_ci   uint8_t m1 = (in >> (n+2)) & mmask;
117bf215546Sopenharmony_ci   uint8_t m2 = (in >> (2*n+4)) & mmask;
118bf215546Sopenharmony_ci   uint8_t m3 = (in >> (3*n+5)) & mmask;
119bf215546Sopenharmony_ci   uint8_t m4 = (in >> (4*n+7)) & mmask;
120bf215546Sopenharmony_ci
121bf215546Sopenharmony_ci   uint8_t C;
122bf215546Sopenharmony_ci   uint8_t t4, t3, t2, t1, t0;
123bf215546Sopenharmony_ci   if (CAT_BITS_3(T4, T3, T2) == 0x7) {
124bf215546Sopenharmony_ci      C = CAT_BITS_5(T7, T6, T5, T1, T0);
125bf215546Sopenharmony_ci      t4 = t3 = 2;
126bf215546Sopenharmony_ci   } else {
127bf215546Sopenharmony_ci      C = CAT_BITS_5(T4, T3, T2, T1, T0);
128bf215546Sopenharmony_ci      if (CAT_BITS_2(T6, T5) == 0x3) {
129bf215546Sopenharmony_ci         t4 = 2;
130bf215546Sopenharmony_ci         t3 = T7;
131bf215546Sopenharmony_ci      } else {
132bf215546Sopenharmony_ci         t4 = T7;
133bf215546Sopenharmony_ci         t3 = CAT_BITS_2(T6, T5);
134bf215546Sopenharmony_ci      }
135bf215546Sopenharmony_ci   }
136bf215546Sopenharmony_ci
137bf215546Sopenharmony_ci   if ((C & 0x3) == 0x3) {
138bf215546Sopenharmony_ci      t2 = 2;
139bf215546Sopenharmony_ci      t1 = (C >> 4) & 0x1;
140bf215546Sopenharmony_ci      uint8_t C3 = (C >> 3) & 0x1;
141bf215546Sopenharmony_ci      uint8_t C2 = (C >> 2) & 0x1;
142bf215546Sopenharmony_ci      t0 = (C3 << 1) | (C2 & ~C3);
143bf215546Sopenharmony_ci   } else if (((C >> 2) & 0x3) == 0x3) {
144bf215546Sopenharmony_ci      t2 = 2;
145bf215546Sopenharmony_ci      t1 = 2;
146bf215546Sopenharmony_ci      t0 = C & 0x3;
147bf215546Sopenharmony_ci   } else {
148bf215546Sopenharmony_ci      t2 = (C >> 4) & 0x1;
149bf215546Sopenharmony_ci      t1 = (C >> 2) & 0x3;
150bf215546Sopenharmony_ci      uint8_t C1 = (C >> 1) & 0x1;
151bf215546Sopenharmony_ci      uint8_t C0 = (C >> 0) & 0x1;
152bf215546Sopenharmony_ci      t0 = (C1 << 1) | (C0 & ~C1);
153bf215546Sopenharmony_ci   }
154bf215546Sopenharmony_ci
155bf215546Sopenharmony_ci   out[0] = (t0 << n) | m0;
156bf215546Sopenharmony_ci   out[1] = (t1 << n) | m1;
157bf215546Sopenharmony_ci   out[2] = (t2 << n) | m2;
158bf215546Sopenharmony_ci   out[3] = (t3 << n) | m3;
159bf215546Sopenharmony_ci   out[4] = (t4 << n) | m4;
160bf215546Sopenharmony_ci}
161bf215546Sopenharmony_ci
162bf215546Sopenharmony_ci/**
163bf215546Sopenharmony_ci * Unpack 3n+7 bits from 'in' into 3 output values
164bf215546Sopenharmony_ci */
165bf215546Sopenharmony_cistatic void unpack_quint_block(int n, uint32_t in, uint8_t *out)
166bf215546Sopenharmony_ci{
167bf215546Sopenharmony_ci   assert(n <= 5); /* else output will overflow uint8_t */
168bf215546Sopenharmony_ci
169bf215546Sopenharmony_ci   uint8_t Q0 = (in >> (n)) & 0x1;
170bf215546Sopenharmony_ci   uint8_t Q1 = (in >> (n+1)) & 0x1;
171bf215546Sopenharmony_ci   uint8_t Q2 = (in >> (n+2)) & 0x1;
172bf215546Sopenharmony_ci   uint8_t Q3 = (in >> (2*n+3)) & 0x1;
173bf215546Sopenharmony_ci   uint8_t Q4 = (in >> (2*n+4)) & 0x1;
174bf215546Sopenharmony_ci   uint8_t Q5 = (in >> (3*n+5)) & 0x1;
175bf215546Sopenharmony_ci   uint8_t Q6 = (in >> (3*n+6)) & 0x1;
176bf215546Sopenharmony_ci   uint8_t mmask = (1 << n) - 1;
177bf215546Sopenharmony_ci   uint8_t m0 = (in >> (0)) & mmask;
178bf215546Sopenharmony_ci   uint8_t m1 = (in >> (n+3)) & mmask;
179bf215546Sopenharmony_ci   uint8_t m2 = (in >> (2*n+5)) & mmask;
180bf215546Sopenharmony_ci
181bf215546Sopenharmony_ci   uint8_t C;
182bf215546Sopenharmony_ci   uint8_t q2, q1, q0;
183bf215546Sopenharmony_ci   if (CAT_BITS_4(Q6, Q5, Q2, Q1) == 0x3) {
184bf215546Sopenharmony_ci      q2 = CAT_BITS_3(Q0, Q4 & ~Q0, Q3 & ~Q0);
185bf215546Sopenharmony_ci      q1 = 4;
186bf215546Sopenharmony_ci      q0 = 4;
187bf215546Sopenharmony_ci   } else {
188bf215546Sopenharmony_ci      if (CAT_BITS_2(Q2, Q1) == 0x3) {
189bf215546Sopenharmony_ci         q2 = 4;
190bf215546Sopenharmony_ci         C = CAT_BITS_5(Q4, Q3, 0x1 & ~Q6, 0x1 & ~Q5, Q0);
191bf215546Sopenharmony_ci      } else {
192bf215546Sopenharmony_ci         q2 = CAT_BITS_2(Q6, Q5);
193bf215546Sopenharmony_ci         C = CAT_BITS_5(Q4, Q3, Q2, Q1, Q0);
194bf215546Sopenharmony_ci      }
195bf215546Sopenharmony_ci      if ((C & 0x7) == 0x5) {
196bf215546Sopenharmony_ci         q1 = 4;
197bf215546Sopenharmony_ci         q0 = (C >> 3) & 0x3;
198bf215546Sopenharmony_ci      } else {
199bf215546Sopenharmony_ci         q1 = (C >> 3) & 0x3;
200bf215546Sopenharmony_ci         q0 = C & 0x7;
201bf215546Sopenharmony_ci      }
202bf215546Sopenharmony_ci   }
203bf215546Sopenharmony_ci   out[0] = (q0 << n) | m0;
204bf215546Sopenharmony_ci   out[1] = (q1 << n) | m1;
205bf215546Sopenharmony_ci   out[2] = (q2 << n) | m2;
206bf215546Sopenharmony_ci}
207bf215546Sopenharmony_ci
208bf215546Sopenharmony_ci
209bf215546Sopenharmony_cistruct uint8x4_t
210bf215546Sopenharmony_ci{
211bf215546Sopenharmony_ci   uint8_t v[4];
212bf215546Sopenharmony_ci
213bf215546Sopenharmony_ci   uint8x4_t() { }
214bf215546Sopenharmony_ci
215bf215546Sopenharmony_ci   uint8x4_t(int a, int b, int c, int d)
216bf215546Sopenharmony_ci   {
217bf215546Sopenharmony_ci      assert(0 <= a && a <= 255);
218bf215546Sopenharmony_ci      assert(0 <= b && b <= 255);
219bf215546Sopenharmony_ci      assert(0 <= c && c <= 255);
220bf215546Sopenharmony_ci      assert(0 <= d && d <= 255);
221bf215546Sopenharmony_ci      v[0] = a;
222bf215546Sopenharmony_ci      v[1] = b;
223bf215546Sopenharmony_ci      v[2] = c;
224bf215546Sopenharmony_ci      v[3] = d;
225bf215546Sopenharmony_ci   }
226bf215546Sopenharmony_ci
227bf215546Sopenharmony_ci   static uint8x4_t clamped(int a, int b, int c, int d)
228bf215546Sopenharmony_ci   {
229bf215546Sopenharmony_ci      uint8x4_t r;
230bf215546Sopenharmony_ci      r.v[0] = MAX2(0, MIN2(255, a));
231bf215546Sopenharmony_ci      r.v[1] = MAX2(0, MIN2(255, b));
232bf215546Sopenharmony_ci      r.v[2] = MAX2(0, MIN2(255, c));
233bf215546Sopenharmony_ci      r.v[3] = MAX2(0, MIN2(255, d));
234bf215546Sopenharmony_ci      return r;
235bf215546Sopenharmony_ci   }
236bf215546Sopenharmony_ci};
237bf215546Sopenharmony_ci
238bf215546Sopenharmony_cistatic uint8x4_t blue_contract(int r, int g, int b, int a)
239bf215546Sopenharmony_ci{
240bf215546Sopenharmony_ci   return uint8x4_t((r+b) >> 1, (g+b) >> 1, b, a);
241bf215546Sopenharmony_ci}
242bf215546Sopenharmony_ci
243bf215546Sopenharmony_cistatic uint8x4_t blue_contract_clamped(int r, int g, int b, int a)
244bf215546Sopenharmony_ci{
245bf215546Sopenharmony_ci   return uint8x4_t::clamped((r+b) >> 1, (g+b) >> 1, b, a);
246bf215546Sopenharmony_ci}
247bf215546Sopenharmony_ci
248bf215546Sopenharmony_cistatic void bit_transfer_signed(int &a, int &b)
249bf215546Sopenharmony_ci{
250bf215546Sopenharmony_ci   b >>= 1;
251bf215546Sopenharmony_ci   b |= a & 0x80;
252bf215546Sopenharmony_ci   a >>= 1;
253bf215546Sopenharmony_ci   a &= 0x3f;
254bf215546Sopenharmony_ci   if (a & 0x20)
255bf215546Sopenharmony_ci      a -= 0x40;
256bf215546Sopenharmony_ci}
257bf215546Sopenharmony_ci
258bf215546Sopenharmony_cistatic uint32_t hash52(uint32_t p)
259bf215546Sopenharmony_ci{
260bf215546Sopenharmony_ci   p ^= p >> 15;
261bf215546Sopenharmony_ci   p -= p << 17;
262bf215546Sopenharmony_ci   p += p << 7;
263bf215546Sopenharmony_ci   p += p << 4;
264bf215546Sopenharmony_ci   p ^= p >> 5;
265bf215546Sopenharmony_ci   p += p << 16;
266bf215546Sopenharmony_ci   p ^= p >> 7;
267bf215546Sopenharmony_ci   p ^= p >> 3;
268bf215546Sopenharmony_ci   p ^= p << 6;
269bf215546Sopenharmony_ci   p ^= p >> 17;
270bf215546Sopenharmony_ci   return p;
271bf215546Sopenharmony_ci}
272bf215546Sopenharmony_ci
273bf215546Sopenharmony_cistatic int select_partition(int seed, int x, int y, int z, int partitioncount,
274bf215546Sopenharmony_ci                            int small_block)
275bf215546Sopenharmony_ci{
276bf215546Sopenharmony_ci   if (small_block) {
277bf215546Sopenharmony_ci      x <<= 1;
278bf215546Sopenharmony_ci      y <<= 1;
279bf215546Sopenharmony_ci      z <<= 1;
280bf215546Sopenharmony_ci   }
281bf215546Sopenharmony_ci   seed += (partitioncount - 1) * 1024;
282bf215546Sopenharmony_ci   uint32_t rnum = hash52(seed);
283bf215546Sopenharmony_ci   uint8_t seed1 = rnum & 0xF;
284bf215546Sopenharmony_ci   uint8_t seed2 = (rnum >> 4) & 0xF;
285bf215546Sopenharmony_ci   uint8_t seed3 = (rnum >> 8) & 0xF;
286bf215546Sopenharmony_ci   uint8_t seed4 = (rnum >> 12) & 0xF;
287bf215546Sopenharmony_ci   uint8_t seed5 = (rnum >> 16) & 0xF;
288bf215546Sopenharmony_ci   uint8_t seed6 = (rnum >> 20) & 0xF;
289bf215546Sopenharmony_ci   uint8_t seed7 = (rnum >> 24) & 0xF;
290bf215546Sopenharmony_ci   uint8_t seed8 = (rnum >> 28) & 0xF;
291bf215546Sopenharmony_ci   uint8_t seed9 = (rnum >> 18) & 0xF;
292bf215546Sopenharmony_ci   uint8_t seed10 = (rnum >> 22) & 0xF;
293bf215546Sopenharmony_ci   uint8_t seed11 = (rnum >> 26) & 0xF;
294bf215546Sopenharmony_ci   uint8_t seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF;
295bf215546Sopenharmony_ci
296bf215546Sopenharmony_ci   seed1 *= seed1;
297bf215546Sopenharmony_ci   seed2 *= seed2;
298bf215546Sopenharmony_ci   seed3 *= seed3;
299bf215546Sopenharmony_ci   seed4 *= seed4;
300bf215546Sopenharmony_ci   seed5 *= seed5;
301bf215546Sopenharmony_ci   seed6 *= seed6;
302bf215546Sopenharmony_ci   seed7 *= seed7;
303bf215546Sopenharmony_ci   seed8 *= seed8;
304bf215546Sopenharmony_ci   seed9 *= seed9;
305bf215546Sopenharmony_ci   seed10 *= seed10;
306bf215546Sopenharmony_ci   seed11 *= seed11;
307bf215546Sopenharmony_ci   seed12 *= seed12;
308bf215546Sopenharmony_ci
309bf215546Sopenharmony_ci   int sh1, sh2, sh3;
310bf215546Sopenharmony_ci   if (seed & 1) {
311bf215546Sopenharmony_ci      sh1 = (seed & 2 ? 4 : 5);
312bf215546Sopenharmony_ci      sh2 = (partitioncount == 3 ? 6 : 5);
313bf215546Sopenharmony_ci   } else {
314bf215546Sopenharmony_ci      sh1 = (partitioncount == 3 ? 6 : 5);
315bf215546Sopenharmony_ci      sh2 = (seed & 2 ? 4 : 5);
316bf215546Sopenharmony_ci   }
317bf215546Sopenharmony_ci   sh3 = (seed & 0x10) ? sh1 : sh2;
318bf215546Sopenharmony_ci
319bf215546Sopenharmony_ci   seed1 >>= sh1;
320bf215546Sopenharmony_ci   seed2 >>= sh2;
321bf215546Sopenharmony_ci   seed3 >>= sh1;
322bf215546Sopenharmony_ci   seed4 >>= sh2;
323bf215546Sopenharmony_ci   seed5 >>= sh1;
324bf215546Sopenharmony_ci   seed6 >>= sh2;
325bf215546Sopenharmony_ci   seed7 >>= sh1;
326bf215546Sopenharmony_ci   seed8 >>= sh2;
327bf215546Sopenharmony_ci   seed9 >>= sh3;
328bf215546Sopenharmony_ci   seed10 >>= sh3;
329bf215546Sopenharmony_ci   seed11 >>= sh3;
330bf215546Sopenharmony_ci   seed12 >>= sh3;
331bf215546Sopenharmony_ci
332bf215546Sopenharmony_ci   int a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14);
333bf215546Sopenharmony_ci   int b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10);
334bf215546Sopenharmony_ci   int c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6);
335bf215546Sopenharmony_ci   int d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2);
336bf215546Sopenharmony_ci
337bf215546Sopenharmony_ci   a &= 0x3F;
338bf215546Sopenharmony_ci   b &= 0x3F;
339bf215546Sopenharmony_ci   c &= 0x3F;
340bf215546Sopenharmony_ci   d &= 0x3F;
341bf215546Sopenharmony_ci
342bf215546Sopenharmony_ci   if (partitioncount < 4)
343bf215546Sopenharmony_ci      d = 0;
344bf215546Sopenharmony_ci   if (partitioncount < 3)
345bf215546Sopenharmony_ci      c = 0;
346bf215546Sopenharmony_ci
347bf215546Sopenharmony_ci   if (a >= b && a >= c && a >= d)
348bf215546Sopenharmony_ci      return 0;
349bf215546Sopenharmony_ci   else if (b >= c && b >= d)
350bf215546Sopenharmony_ci      return 1;
351bf215546Sopenharmony_ci   else if (c >= d)
352bf215546Sopenharmony_ci      return 2;
353bf215546Sopenharmony_ci   else
354bf215546Sopenharmony_ci      return 3;
355bf215546Sopenharmony_ci}
356bf215546Sopenharmony_ci
357bf215546Sopenharmony_ci
358bf215546Sopenharmony_cistruct InputBitVector
359bf215546Sopenharmony_ci{
360bf215546Sopenharmony_ci   uint32_t data[4];
361bf215546Sopenharmony_ci
362bf215546Sopenharmony_ci   void printf_bits(int offset, int count, const char *fmt = "", ...)
363bf215546Sopenharmony_ci   {
364bf215546Sopenharmony_ci      char out[129];
365bf215546Sopenharmony_ci      memset(out, '.', 128);
366bf215546Sopenharmony_ci      out[128] = '\0';
367bf215546Sopenharmony_ci      int idx = offset;
368bf215546Sopenharmony_ci      for (int i = 0; i < count; ++i) {
369bf215546Sopenharmony_ci         out[127 - idx] = ((data[idx >> 5] >> (idx & 31)) & 1) ? '1' : '0';
370bf215546Sopenharmony_ci         ++idx;
371bf215546Sopenharmony_ci      }
372bf215546Sopenharmony_ci      printf("%s ", out);
373bf215546Sopenharmony_ci      va_list ap;
374bf215546Sopenharmony_ci      va_start(ap, fmt);
375bf215546Sopenharmony_ci      vprintf(fmt, ap);
376bf215546Sopenharmony_ci      va_end(ap);
377bf215546Sopenharmony_ci      printf("\n");
378bf215546Sopenharmony_ci   }
379bf215546Sopenharmony_ci
380bf215546Sopenharmony_ci   uint32_t get_bits(int offset, int count)
381bf215546Sopenharmony_ci   {
382bf215546Sopenharmony_ci      assert(count >= 0 && count < 32);
383bf215546Sopenharmony_ci
384bf215546Sopenharmony_ci      uint32_t out = 0;
385bf215546Sopenharmony_ci      if (offset < 32)
386bf215546Sopenharmony_ci         out |= data[0] >> offset;
387bf215546Sopenharmony_ci
388bf215546Sopenharmony_ci      if (0 < offset && offset <= 32)
389bf215546Sopenharmony_ci         out |= data[1] << (32 - offset);
390bf215546Sopenharmony_ci      if (32 < offset && offset < 64)
391bf215546Sopenharmony_ci         out |= data[1] >> (offset - 32);
392bf215546Sopenharmony_ci
393bf215546Sopenharmony_ci      if (32 < offset && offset <= 64)
394bf215546Sopenharmony_ci         out |= data[2] << (64 - offset);
395bf215546Sopenharmony_ci      if (64 < offset && offset < 96)
396bf215546Sopenharmony_ci         out |= data[2] >> (offset - 64);
397bf215546Sopenharmony_ci
398bf215546Sopenharmony_ci      if (64 < offset && offset <= 96)
399bf215546Sopenharmony_ci         out |= data[3] << (96 - offset);
400bf215546Sopenharmony_ci      if (96 < offset && offset < 128)
401bf215546Sopenharmony_ci         out |= data[3] >> (offset - 96);
402bf215546Sopenharmony_ci
403bf215546Sopenharmony_ci      out &= (1 << count) - 1;
404bf215546Sopenharmony_ci      return out;
405bf215546Sopenharmony_ci   }
406bf215546Sopenharmony_ci
407bf215546Sopenharmony_ci   uint64_t get_bits64(int offset, int count)
408bf215546Sopenharmony_ci   {
409bf215546Sopenharmony_ci      assert(count >= 0 && count < 64);
410bf215546Sopenharmony_ci
411bf215546Sopenharmony_ci      uint64_t out = 0;
412bf215546Sopenharmony_ci      if (offset < 32)
413bf215546Sopenharmony_ci         out |= data[0] >> offset;
414bf215546Sopenharmony_ci
415bf215546Sopenharmony_ci      if (offset <= 32)
416bf215546Sopenharmony_ci         out |= (uint64_t)data[1] << (32 - offset);
417bf215546Sopenharmony_ci      if (32 < offset && offset < 64)
418bf215546Sopenharmony_ci         out |= data[1] >> (offset - 32);
419bf215546Sopenharmony_ci
420bf215546Sopenharmony_ci      if (0 < offset && offset <= 64)
421bf215546Sopenharmony_ci         out |= (uint64_t)data[2] << (64 - offset);
422bf215546Sopenharmony_ci      if (64 < offset && offset < 96)
423bf215546Sopenharmony_ci         out |= data[2] >> (offset - 64);
424bf215546Sopenharmony_ci
425bf215546Sopenharmony_ci      if (32 < offset && offset <= 96)
426bf215546Sopenharmony_ci         out |= (uint64_t)data[3] << (96 - offset);
427bf215546Sopenharmony_ci      if (96 < offset && offset < 128)
428bf215546Sopenharmony_ci         out |= data[3] >> (offset - 96);
429bf215546Sopenharmony_ci
430bf215546Sopenharmony_ci      out &= ((uint64_t)1 << count) - 1;
431bf215546Sopenharmony_ci      return out;
432bf215546Sopenharmony_ci   }
433bf215546Sopenharmony_ci
434bf215546Sopenharmony_ci   uint32_t get_bits_rev(int offset, int count)
435bf215546Sopenharmony_ci   {
436bf215546Sopenharmony_ci      assert(offset >= count);
437bf215546Sopenharmony_ci      uint32_t tmp = get_bits(offset - count, count);
438bf215546Sopenharmony_ci      uint32_t out = 0;
439bf215546Sopenharmony_ci      for (int i = 0; i < count; ++i)
440bf215546Sopenharmony_ci         out |= ((tmp >> i) & 1) << (count - 1 - i);
441bf215546Sopenharmony_ci      return out;
442bf215546Sopenharmony_ci   }
443bf215546Sopenharmony_ci};
444bf215546Sopenharmony_ci
445bf215546Sopenharmony_cistruct OutputBitVector
446bf215546Sopenharmony_ci{
447bf215546Sopenharmony_ci   uint32_t data[4];
448bf215546Sopenharmony_ci   int offset;
449bf215546Sopenharmony_ci
450bf215546Sopenharmony_ci   OutputBitVector()
451bf215546Sopenharmony_ci      : offset(0)
452bf215546Sopenharmony_ci   {
453bf215546Sopenharmony_ci      memset(data, 0, sizeof(data));
454bf215546Sopenharmony_ci   }
455bf215546Sopenharmony_ci
456bf215546Sopenharmony_ci   void append(uint32_t value, int size)
457bf215546Sopenharmony_ci   {
458bf215546Sopenharmony_ci      if (VERBOSE_WRITE)
459bf215546Sopenharmony_ci         printf("append offset=%d size=%d values=0x%x\n", offset, size, value);
460bf215546Sopenharmony_ci
461bf215546Sopenharmony_ci      assert(offset + size <= 128);
462bf215546Sopenharmony_ci
463bf215546Sopenharmony_ci      assert(size <= 32);
464bf215546Sopenharmony_ci      if (size < 32)
465bf215546Sopenharmony_ci         assert((value >> size) == 0);
466bf215546Sopenharmony_ci
467bf215546Sopenharmony_ci      while (size) {
468bf215546Sopenharmony_ci         int c = MIN2(size, 32 - (offset & 31));
469bf215546Sopenharmony_ci         data[offset >> 5] |= (value << (offset & 31));
470bf215546Sopenharmony_ci         offset += c;
471bf215546Sopenharmony_ci         size -= c;
472bf215546Sopenharmony_ci         value >>= c;
473bf215546Sopenharmony_ci      }
474bf215546Sopenharmony_ci   }
475bf215546Sopenharmony_ci
476bf215546Sopenharmony_ci   void append64(uint64_t value, int size)
477bf215546Sopenharmony_ci   {
478bf215546Sopenharmony_ci      if (VERBOSE_WRITE)
479bf215546Sopenharmony_ci         printf("append offset=%d size=%d values=0x%llx\n", offset, size, (unsigned long long)value);
480bf215546Sopenharmony_ci
481bf215546Sopenharmony_ci      assert(offset + size <= 128);
482bf215546Sopenharmony_ci
483bf215546Sopenharmony_ci      assert(size <= 64);
484bf215546Sopenharmony_ci      if (size < 64)
485bf215546Sopenharmony_ci         assert((value >> size) == 0);
486bf215546Sopenharmony_ci
487bf215546Sopenharmony_ci      while (size) {
488bf215546Sopenharmony_ci         int c = MIN2(size, 32 - (offset & 31));
489bf215546Sopenharmony_ci         data[offset >> 5] |= (value << (offset & 31));
490bf215546Sopenharmony_ci         offset += c;
491bf215546Sopenharmony_ci         size -= c;
492bf215546Sopenharmony_ci         value >>= c;
493bf215546Sopenharmony_ci      }
494bf215546Sopenharmony_ci   }
495bf215546Sopenharmony_ci
496bf215546Sopenharmony_ci   void append(OutputBitVector &v, int size)
497bf215546Sopenharmony_ci   {
498bf215546Sopenharmony_ci      if (VERBOSE_WRITE)
499bf215546Sopenharmony_ci         printf("append vector offset=%d size=%d\n", offset, size);
500bf215546Sopenharmony_ci
501bf215546Sopenharmony_ci      assert(offset + size <= 128);
502bf215546Sopenharmony_ci      int i = 0;
503bf215546Sopenharmony_ci      while (size >= 32) {
504bf215546Sopenharmony_ci         append(v.data[i++], 32);
505bf215546Sopenharmony_ci         size -= 32;
506bf215546Sopenharmony_ci      }
507bf215546Sopenharmony_ci      if (size > 0)
508bf215546Sopenharmony_ci         append(v.data[i] & ((1 << size) - 1), size);
509bf215546Sopenharmony_ci   }
510bf215546Sopenharmony_ci
511bf215546Sopenharmony_ci   void append_end(OutputBitVector &v, int size)
512bf215546Sopenharmony_ci   {
513bf215546Sopenharmony_ci      for (int i = 0; i < size; ++i)
514bf215546Sopenharmony_ci         data[(127 - i) >> 5] |= ((v.data[i >> 5] >> (i & 31)) & 1) << ((127 - i) & 31);
515bf215546Sopenharmony_ci   }
516bf215546Sopenharmony_ci
517bf215546Sopenharmony_ci   /* Insert the given number of '1' bits. (We could use 0s instead, but 1s are
518bf215546Sopenharmony_ci    * more likely to flush out bugs where we accidentally read undefined bits.)
519bf215546Sopenharmony_ci    */
520bf215546Sopenharmony_ci   void skip(int size)
521bf215546Sopenharmony_ci   {
522bf215546Sopenharmony_ci      if (VERBOSE_WRITE)
523bf215546Sopenharmony_ci         printf("skip offset=%d size=%d\n", offset, size);
524bf215546Sopenharmony_ci
525bf215546Sopenharmony_ci      assert(offset + size <= 128);
526bf215546Sopenharmony_ci      while (size >= 32) {
527bf215546Sopenharmony_ci         append(0xffffffff, 32);
528bf215546Sopenharmony_ci         size -= 32;
529bf215546Sopenharmony_ci      }
530bf215546Sopenharmony_ci      if (size > 0)
531bf215546Sopenharmony_ci         append(0xffffffff >> (32 - size), size);
532bf215546Sopenharmony_ci   }
533bf215546Sopenharmony_ci};
534bf215546Sopenharmony_ci
535bf215546Sopenharmony_ci
536bf215546Sopenharmony_ciclass Decoder
537bf215546Sopenharmony_ci{
538bf215546Sopenharmony_cipublic:
539bf215546Sopenharmony_ci   Decoder(int block_w, int block_h, int block_d, bool srgb, bool output_unorm8)
540bf215546Sopenharmony_ci      : block_w(block_w), block_h(block_h), block_d(block_d), srgb(srgb),
541bf215546Sopenharmony_ci        output_unorm8(output_unorm8) {}
542bf215546Sopenharmony_ci
543bf215546Sopenharmony_ci   decode_error::type decode(const uint8_t *in, uint16_t *output) const;
544bf215546Sopenharmony_ci
545bf215546Sopenharmony_ci   int block_w, block_h, block_d;
546bf215546Sopenharmony_ci   bool srgb, output_unorm8;
547bf215546Sopenharmony_ci};
548bf215546Sopenharmony_ci
549bf215546Sopenharmony_cistruct Block
550bf215546Sopenharmony_ci{
551bf215546Sopenharmony_ci   bool is_error;
552bf215546Sopenharmony_ci   bool bogus_colour_endpoints;
553bf215546Sopenharmony_ci   bool bogus_weights;
554bf215546Sopenharmony_ci
555bf215546Sopenharmony_ci   int high_prec;
556bf215546Sopenharmony_ci   int dual_plane;
557bf215546Sopenharmony_ci   int colour_component_selector;
558bf215546Sopenharmony_ci   int wt_range;
559bf215546Sopenharmony_ci   int wt_w, wt_h, wt_d;
560bf215546Sopenharmony_ci   int num_parts;
561bf215546Sopenharmony_ci   int partition_index;
562bf215546Sopenharmony_ci
563bf215546Sopenharmony_ci   bool is_void_extent;
564bf215546Sopenharmony_ci   int void_extent_d;
565bf215546Sopenharmony_ci   int void_extent_min_s;
566bf215546Sopenharmony_ci   int void_extent_max_s;
567bf215546Sopenharmony_ci   int void_extent_min_t;
568bf215546Sopenharmony_ci   int void_extent_max_t;
569bf215546Sopenharmony_ci   uint16_t void_extent_colour_r;
570bf215546Sopenharmony_ci   uint16_t void_extent_colour_g;
571bf215546Sopenharmony_ci   uint16_t void_extent_colour_b;
572bf215546Sopenharmony_ci   uint16_t void_extent_colour_a;
573bf215546Sopenharmony_ci
574bf215546Sopenharmony_ci   bool is_multi_cem;
575bf215546Sopenharmony_ci   int num_extra_cem_bits;
576bf215546Sopenharmony_ci   int colour_endpoint_data_offset;
577bf215546Sopenharmony_ci   int extra_cem_bits;
578bf215546Sopenharmony_ci   int cem_base_class;
579bf215546Sopenharmony_ci   int cems[4];
580bf215546Sopenharmony_ci
581bf215546Sopenharmony_ci   int num_cem_values;
582bf215546Sopenharmony_ci
583bf215546Sopenharmony_ci   /* Calculated by unpack_weights(): */
584bf215546Sopenharmony_ci   uint8_t weights_quant[64 + 4]; /* max 64 values, plus padding for overflows in trit parsing */
585bf215546Sopenharmony_ci
586bf215546Sopenharmony_ci   /* Calculated by unquantise_weights(): */
587bf215546Sopenharmony_ci   uint8_t weights[64 + 18]; /* max 64 values, plus padding for the infill interpolation */
588bf215546Sopenharmony_ci
589bf215546Sopenharmony_ci   /* Calculated by unpack_colour_endpoints(): */
590bf215546Sopenharmony_ci   uint8_t colour_endpoints_quant[18 + 4]; /* max 18 values, plus padding for overflows in trit parsing */
591bf215546Sopenharmony_ci
592bf215546Sopenharmony_ci   /* Calculated by unquantise_colour_endpoints(): */
593bf215546Sopenharmony_ci   uint8_t colour_endpoints[18];
594bf215546Sopenharmony_ci
595bf215546Sopenharmony_ci   /* Calculated by calculate_from_weights(): */
596bf215546Sopenharmony_ci   int wt_trits;
597bf215546Sopenharmony_ci   int wt_quints;
598bf215546Sopenharmony_ci   int wt_bits;
599bf215546Sopenharmony_ci   int wt_max;
600bf215546Sopenharmony_ci   int num_weights;
601bf215546Sopenharmony_ci   int weight_bits;
602bf215546Sopenharmony_ci
603bf215546Sopenharmony_ci   /* Calculated by calculate_remaining_bits(): */
604bf215546Sopenharmony_ci   int remaining_bits;
605bf215546Sopenharmony_ci
606bf215546Sopenharmony_ci   /* Calculated by calculate_colour_endpoints_size(): */
607bf215546Sopenharmony_ci   int colour_endpoint_bits;
608bf215546Sopenharmony_ci   int ce_max;
609bf215546Sopenharmony_ci   int ce_trits;
610bf215546Sopenharmony_ci   int ce_quints;
611bf215546Sopenharmony_ci   int ce_bits;
612bf215546Sopenharmony_ci
613bf215546Sopenharmony_ci   /* Calculated by compute_infill_weights(); */
614bf215546Sopenharmony_ci   uint8_t infill_weights[2][216]; /* large enough for 6x6x6 */
615bf215546Sopenharmony_ci
616bf215546Sopenharmony_ci   /* Calculated by decode_colour_endpoints(); */
617bf215546Sopenharmony_ci   uint8x4_t endpoints_decoded[2][4];
618bf215546Sopenharmony_ci
619bf215546Sopenharmony_ci   void calculate_from_weights();
620bf215546Sopenharmony_ci   void calculate_remaining_bits();
621bf215546Sopenharmony_ci   decode_error::type calculate_colour_endpoints_size();
622bf215546Sopenharmony_ci
623bf215546Sopenharmony_ci   void unquantise_weights();
624bf215546Sopenharmony_ci   void unquantise_colour_endpoints();
625bf215546Sopenharmony_ci
626bf215546Sopenharmony_ci   decode_error::type decode(const Decoder &decoder, InputBitVector in);
627bf215546Sopenharmony_ci
628bf215546Sopenharmony_ci   decode_error::type decode_block_mode(InputBitVector in);
629bf215546Sopenharmony_ci   decode_error::type decode_void_extent(InputBitVector in);
630bf215546Sopenharmony_ci   void decode_cem(InputBitVector in);
631bf215546Sopenharmony_ci   void unpack_colour_endpoints(InputBitVector in);
632bf215546Sopenharmony_ci   void decode_colour_endpoints();
633bf215546Sopenharmony_ci   void unpack_weights(InputBitVector in);
634bf215546Sopenharmony_ci   void compute_infill_weights(int block_w, int block_h, int block_d);
635bf215546Sopenharmony_ci
636bf215546Sopenharmony_ci   void write_decoded(const Decoder &decoder, uint16_t *output);
637bf215546Sopenharmony_ci};
638bf215546Sopenharmony_ci
639bf215546Sopenharmony_ci
640bf215546Sopenharmony_cidecode_error::type Decoder::decode(const uint8_t *in, uint16_t *output) const
641bf215546Sopenharmony_ci{
642bf215546Sopenharmony_ci   Block blk;
643bf215546Sopenharmony_ci   InputBitVector in_vec;
644bf215546Sopenharmony_ci   memcpy(&in_vec.data, in, 16);
645bf215546Sopenharmony_ci   decode_error::type err = blk.decode(*this, in_vec);
646bf215546Sopenharmony_ci   if (err == decode_error::ok) {
647bf215546Sopenharmony_ci      blk.write_decoded(*this, output);
648bf215546Sopenharmony_ci   } else {
649bf215546Sopenharmony_ci      /* Fill output with the error colour */
650bf215546Sopenharmony_ci      for (int i = 0; i < block_w * block_h * block_d; ++i) {
651bf215546Sopenharmony_ci         if (output_unorm8) {
652bf215546Sopenharmony_ci            output[i*4+0] = 0xff;
653bf215546Sopenharmony_ci            output[i*4+1] = 0;
654bf215546Sopenharmony_ci            output[i*4+2] = 0xff;
655bf215546Sopenharmony_ci            output[i*4+3] = 0xff;
656bf215546Sopenharmony_ci         } else {
657bf215546Sopenharmony_ci            assert(!srgb); /* srgb must use unorm8 */
658bf215546Sopenharmony_ci
659bf215546Sopenharmony_ci            output[i*4+0] = FP16_ONE;
660bf215546Sopenharmony_ci            output[i*4+1] = FP16_ZERO;
661bf215546Sopenharmony_ci            output[i*4+2] = FP16_ONE;
662bf215546Sopenharmony_ci            output[i*4+3] = FP16_ONE;
663bf215546Sopenharmony_ci         }
664bf215546Sopenharmony_ci      }
665bf215546Sopenharmony_ci   }
666bf215546Sopenharmony_ci   return err;
667bf215546Sopenharmony_ci}
668bf215546Sopenharmony_ci
669bf215546Sopenharmony_ci
670bf215546Sopenharmony_cidecode_error::type Block::decode_void_extent(InputBitVector block)
671bf215546Sopenharmony_ci{
672bf215546Sopenharmony_ci   /* TODO: 3D */
673bf215546Sopenharmony_ci
674bf215546Sopenharmony_ci   is_void_extent = true;
675bf215546Sopenharmony_ci   void_extent_d = block.get_bits(9, 1);
676bf215546Sopenharmony_ci   void_extent_min_s = block.get_bits(12, 13);
677bf215546Sopenharmony_ci   void_extent_max_s = block.get_bits(25, 13);
678bf215546Sopenharmony_ci   void_extent_min_t = block.get_bits(38, 13);
679bf215546Sopenharmony_ci   void_extent_max_t = block.get_bits(51, 13);
680bf215546Sopenharmony_ci   void_extent_colour_r = block.get_bits(64, 16);
681bf215546Sopenharmony_ci   void_extent_colour_g = block.get_bits(80, 16);
682bf215546Sopenharmony_ci   void_extent_colour_b = block.get_bits(96, 16);
683bf215546Sopenharmony_ci   void_extent_colour_a = block.get_bits(112, 16);
684bf215546Sopenharmony_ci
685bf215546Sopenharmony_ci   /* TODO: maybe we should do something useful with the extent coordinates? */
686bf215546Sopenharmony_ci
687bf215546Sopenharmony_ci   if (void_extent_d) {
688bf215546Sopenharmony_ci      return decode_error::unsupported_hdr_void_extent;
689bf215546Sopenharmony_ci   }
690bf215546Sopenharmony_ci
691bf215546Sopenharmony_ci   if (void_extent_min_s == 0x1fff && void_extent_max_s == 0x1fff
692bf215546Sopenharmony_ci       && void_extent_min_t == 0x1fff && void_extent_max_t == 0x1fff) {
693bf215546Sopenharmony_ci
694bf215546Sopenharmony_ci      /* No extents */
695bf215546Sopenharmony_ci
696bf215546Sopenharmony_ci   } else {
697bf215546Sopenharmony_ci
698bf215546Sopenharmony_ci      /* Check for illegal encoding */
699bf215546Sopenharmony_ci      if (void_extent_min_s >= void_extent_max_s || void_extent_min_t >= void_extent_max_t) {
700bf215546Sopenharmony_ci         return decode_error::invalid_range_in_void_extent;
701bf215546Sopenharmony_ci      }
702bf215546Sopenharmony_ci   }
703bf215546Sopenharmony_ci
704bf215546Sopenharmony_ci   return decode_error::ok;
705bf215546Sopenharmony_ci}
706bf215546Sopenharmony_ci
707bf215546Sopenharmony_cidecode_error::type Block::decode_block_mode(InputBitVector in)
708bf215546Sopenharmony_ci{
709bf215546Sopenharmony_ci   dual_plane = in.get_bits(10, 1);
710bf215546Sopenharmony_ci   high_prec = in.get_bits(9, 1);
711bf215546Sopenharmony_ci
712bf215546Sopenharmony_ci   if (in.get_bits(0, 2) != 0x0) {
713bf215546Sopenharmony_ci      wt_range = (in.get_bits(0, 2) << 1) | in.get_bits(4, 1);
714bf215546Sopenharmony_ci      int a = in.get_bits(5, 2);
715bf215546Sopenharmony_ci      int b = in.get_bits(7, 2);
716bf215546Sopenharmony_ci      switch (in.get_bits(2, 2)) {
717bf215546Sopenharmony_ci      case 0x0:
718bf215546Sopenharmony_ci         if (VERBOSE_DECODE)
719bf215546Sopenharmony_ci            in.printf_bits(0, 11, "DHBBAAR00RR");
720bf215546Sopenharmony_ci         wt_w = b + 4;
721bf215546Sopenharmony_ci         wt_h = a + 2;
722bf215546Sopenharmony_ci         break;
723bf215546Sopenharmony_ci      case 0x1:
724bf215546Sopenharmony_ci         if (VERBOSE_DECODE)
725bf215546Sopenharmony_ci            in.printf_bits(0, 11, "DHBBAAR01RR");
726bf215546Sopenharmony_ci         wt_w = b + 8;
727bf215546Sopenharmony_ci         wt_h = a + 2;
728bf215546Sopenharmony_ci         break;
729bf215546Sopenharmony_ci      case 0x2:
730bf215546Sopenharmony_ci         if (VERBOSE_DECODE)
731bf215546Sopenharmony_ci            in.printf_bits(0, 11, "DHBBAAR10RR");
732bf215546Sopenharmony_ci         wt_w = a + 2;
733bf215546Sopenharmony_ci         wt_h = b + 8;
734bf215546Sopenharmony_ci         break;
735bf215546Sopenharmony_ci      case 0x3:
736bf215546Sopenharmony_ci         if ((b & 0x2) == 0) {
737bf215546Sopenharmony_ci            if (VERBOSE_DECODE)
738bf215546Sopenharmony_ci               in.printf_bits(0, 11, "DH0BAAR11RR");
739bf215546Sopenharmony_ci            wt_w = a + 2;
740bf215546Sopenharmony_ci            wt_h = b + 6;
741bf215546Sopenharmony_ci         } else {
742bf215546Sopenharmony_ci            if (VERBOSE_DECODE)
743bf215546Sopenharmony_ci               in.printf_bits(0, 11, "DH1BAAR11RR");
744bf215546Sopenharmony_ci            wt_w = (b & 0x1) + 2;
745bf215546Sopenharmony_ci            wt_h = a + 2;
746bf215546Sopenharmony_ci         }
747bf215546Sopenharmony_ci         break;
748bf215546Sopenharmony_ci      }
749bf215546Sopenharmony_ci   } else {
750bf215546Sopenharmony_ci      if (in.get_bits(6, 3) == 0x7) {
751bf215546Sopenharmony_ci         if (in.get_bits(0, 9) == 0x1fc) {
752bf215546Sopenharmony_ci            if (VERBOSE_DECODE)
753bf215546Sopenharmony_ci               in.printf_bits(0, 11, "xx111111100 (void extent)");
754bf215546Sopenharmony_ci            return decode_void_extent(in);
755bf215546Sopenharmony_ci         } else {
756bf215546Sopenharmony_ci            if (VERBOSE_DECODE)
757bf215546Sopenharmony_ci               in.printf_bits(0, 11, "xx111xxxx00");
758bf215546Sopenharmony_ci            return decode_error::reserved_block_mode_1;
759bf215546Sopenharmony_ci         }
760bf215546Sopenharmony_ci      }
761bf215546Sopenharmony_ci      if (in.get_bits(0, 4) == 0x0) {
762bf215546Sopenharmony_ci         if (VERBOSE_DECODE)
763bf215546Sopenharmony_ci            in.printf_bits(0, 11, "xxxxxxx0000");
764bf215546Sopenharmony_ci         return decode_error::reserved_block_mode_2;
765bf215546Sopenharmony_ci      }
766bf215546Sopenharmony_ci
767bf215546Sopenharmony_ci      wt_range = in.get_bits(1, 3) | in.get_bits(4, 1);
768bf215546Sopenharmony_ci      int a = in.get_bits(5, 2);
769bf215546Sopenharmony_ci      int b;
770bf215546Sopenharmony_ci
771bf215546Sopenharmony_ci      switch (in.get_bits(7, 2)) {
772bf215546Sopenharmony_ci      case 0x0:
773bf215546Sopenharmony_ci         if (VERBOSE_DECODE)
774bf215546Sopenharmony_ci            in.printf_bits(0, 11, "DH00AARRR00");
775bf215546Sopenharmony_ci         wt_w = 12;
776bf215546Sopenharmony_ci         wt_h = a + 2;
777bf215546Sopenharmony_ci         break;
778bf215546Sopenharmony_ci      case 0x1:
779bf215546Sopenharmony_ci         if (VERBOSE_DECODE)
780bf215546Sopenharmony_ci            in.printf_bits(0, 11, "DH01AARRR00");
781bf215546Sopenharmony_ci         wt_w = a + 2;
782bf215546Sopenharmony_ci         wt_h = 12;
783bf215546Sopenharmony_ci         break;
784bf215546Sopenharmony_ci      case 0x3:
785bf215546Sopenharmony_ci         if (in.get_bits(5, 1) == 0) {
786bf215546Sopenharmony_ci            if (VERBOSE_DECODE)
787bf215546Sopenharmony_ci               in.printf_bits(0, 11, "DH1100RRR00");
788bf215546Sopenharmony_ci            wt_w = 6;
789bf215546Sopenharmony_ci            wt_h = 10;
790bf215546Sopenharmony_ci         } else {
791bf215546Sopenharmony_ci            if (VERBOSE_DECODE)
792bf215546Sopenharmony_ci               in.printf_bits(0, 11, "DH1101RRR00");
793bf215546Sopenharmony_ci            wt_w = 10;
794bf215546Sopenharmony_ci            wt_h = 6;
795bf215546Sopenharmony_ci         }
796bf215546Sopenharmony_ci         break;
797bf215546Sopenharmony_ci      case 0x2:
798bf215546Sopenharmony_ci         if (VERBOSE_DECODE)
799bf215546Sopenharmony_ci            in.printf_bits(0, 11, "BB10AARRR00");
800bf215546Sopenharmony_ci         b = in.get_bits(9, 2);
801bf215546Sopenharmony_ci         wt_w = a + 6;
802bf215546Sopenharmony_ci         wt_h = b + 6;
803bf215546Sopenharmony_ci         dual_plane = 0;
804bf215546Sopenharmony_ci         high_prec = 0;
805bf215546Sopenharmony_ci         break;
806bf215546Sopenharmony_ci      }
807bf215546Sopenharmony_ci   }
808bf215546Sopenharmony_ci   return decode_error::ok;
809bf215546Sopenharmony_ci}
810bf215546Sopenharmony_ci
811bf215546Sopenharmony_civoid Block::decode_cem(InputBitVector in)
812bf215546Sopenharmony_ci{
813bf215546Sopenharmony_ci   cems[0] = cems[1] = cems[2] = cems[3] = -1;
814bf215546Sopenharmony_ci
815bf215546Sopenharmony_ci   num_extra_cem_bits = 0;
816bf215546Sopenharmony_ci   extra_cem_bits = 0;
817bf215546Sopenharmony_ci
818bf215546Sopenharmony_ci   if (num_parts > 1) {
819bf215546Sopenharmony_ci
820bf215546Sopenharmony_ci      partition_index = in.get_bits(13, 10);
821bf215546Sopenharmony_ci      if (VERBOSE_DECODE)
822bf215546Sopenharmony_ci         in.printf_bits(13, 10, "partition ID (%d)", partition_index);
823bf215546Sopenharmony_ci
824bf215546Sopenharmony_ci      uint32_t cem = in.get_bits(23, 6);
825bf215546Sopenharmony_ci
826bf215546Sopenharmony_ci      if ((cem & 0x3) == 0x0) {
827bf215546Sopenharmony_ci         cem >>= 2;
828bf215546Sopenharmony_ci         cem_base_class = cem >> 2;
829bf215546Sopenharmony_ci         is_multi_cem = false;
830bf215546Sopenharmony_ci
831bf215546Sopenharmony_ci         for (int i = 0; i < num_parts; ++i)
832bf215546Sopenharmony_ci            cems[i] = cem;
833bf215546Sopenharmony_ci
834bf215546Sopenharmony_ci         if (VERBOSE_DECODE)
835bf215546Sopenharmony_ci            in.printf_bits(23, 6, "CEM (single, %d)", cem);
836bf215546Sopenharmony_ci      } else {
837bf215546Sopenharmony_ci
838bf215546Sopenharmony_ci         cem_base_class = (cem & 0x3) - 1;
839bf215546Sopenharmony_ci         is_multi_cem = true;
840bf215546Sopenharmony_ci
841bf215546Sopenharmony_ci         if (VERBOSE_DECODE)
842bf215546Sopenharmony_ci            in.printf_bits(23, 6, "CEM (multi, base class %d)", cem_base_class);
843bf215546Sopenharmony_ci
844bf215546Sopenharmony_ci         int offset = 128 - weight_bits;
845bf215546Sopenharmony_ci
846bf215546Sopenharmony_ci         if (num_parts == 2) {
847bf215546Sopenharmony_ci            if (VERBOSE_DECODE) {
848bf215546Sopenharmony_ci               in.printf_bits(25, 4, "M0M0 C1 C0");
849bf215546Sopenharmony_ci               in.printf_bits(offset - 2, 2, "M1M1");
850bf215546Sopenharmony_ci            }
851bf215546Sopenharmony_ci
852bf215546Sopenharmony_ci            uint32_t c0 = in.get_bits(25, 1);
853bf215546Sopenharmony_ci            uint32_t c1 = in.get_bits(26, 1);
854bf215546Sopenharmony_ci
855bf215546Sopenharmony_ci            extra_cem_bits = c0 + c1;
856bf215546Sopenharmony_ci
857bf215546Sopenharmony_ci            num_extra_cem_bits = 2;
858bf215546Sopenharmony_ci
859bf215546Sopenharmony_ci            uint32_t m0 = in.get_bits(27, 2);
860bf215546Sopenharmony_ci            uint32_t m1 = in.get_bits(offset - 2, 2);
861bf215546Sopenharmony_ci
862bf215546Sopenharmony_ci            cems[0] = ((cem_base_class + c0) << 2) | m0;
863bf215546Sopenharmony_ci            cems[1] = ((cem_base_class + c1) << 2) | m1;
864bf215546Sopenharmony_ci
865bf215546Sopenharmony_ci         } else if (num_parts == 3) {
866bf215546Sopenharmony_ci            if (VERBOSE_DECODE) {
867bf215546Sopenharmony_ci               in.printf_bits(25, 4, "M0 C2 C1 C0");
868bf215546Sopenharmony_ci               in.printf_bits(offset - 5, 5, "M2M2 M1M1 M0");
869bf215546Sopenharmony_ci            }
870bf215546Sopenharmony_ci
871bf215546Sopenharmony_ci            uint32_t c0 = in.get_bits(25, 1);
872bf215546Sopenharmony_ci            uint32_t c1 = in.get_bits(26, 1);
873bf215546Sopenharmony_ci            uint32_t c2 = in.get_bits(27, 1);
874bf215546Sopenharmony_ci
875bf215546Sopenharmony_ci            extra_cem_bits = c0 + c1 + c2;
876bf215546Sopenharmony_ci
877bf215546Sopenharmony_ci            num_extra_cem_bits = 5;
878bf215546Sopenharmony_ci
879bf215546Sopenharmony_ci            uint32_t m0 = in.get_bits(28, 1) | (in.get_bits(128 - weight_bits - 5, 1) << 1);
880bf215546Sopenharmony_ci            uint32_t m1 = in.get_bits(offset - 4, 2);
881bf215546Sopenharmony_ci            uint32_t m2 = in.get_bits(offset - 2, 2);
882bf215546Sopenharmony_ci
883bf215546Sopenharmony_ci            cems[0] = ((cem_base_class + c0) << 2) | m0;
884bf215546Sopenharmony_ci            cems[1] = ((cem_base_class + c1) << 2) | m1;
885bf215546Sopenharmony_ci            cems[2] = ((cem_base_class + c2) << 2) | m2;
886bf215546Sopenharmony_ci
887bf215546Sopenharmony_ci         } else if (num_parts == 4) {
888bf215546Sopenharmony_ci            if (VERBOSE_DECODE) {
889bf215546Sopenharmony_ci               in.printf_bits(25, 4, "C3 C2 C1 C0");
890bf215546Sopenharmony_ci               in.printf_bits(offset - 8, 8, "M3M3 M2M2 M1M1 M0M0");
891bf215546Sopenharmony_ci            }
892bf215546Sopenharmony_ci
893bf215546Sopenharmony_ci            uint32_t c0 = in.get_bits(25, 1);
894bf215546Sopenharmony_ci            uint32_t c1 = in.get_bits(26, 1);
895bf215546Sopenharmony_ci            uint32_t c2 = in.get_bits(27, 1);
896bf215546Sopenharmony_ci            uint32_t c3 = in.get_bits(28, 1);
897bf215546Sopenharmony_ci
898bf215546Sopenharmony_ci            extra_cem_bits = c0 + c1 + c2 + c3;
899bf215546Sopenharmony_ci
900bf215546Sopenharmony_ci            num_extra_cem_bits = 8;
901bf215546Sopenharmony_ci
902bf215546Sopenharmony_ci            uint32_t m0 = in.get_bits(offset - 8, 2);
903bf215546Sopenharmony_ci            uint32_t m1 = in.get_bits(offset - 6, 2);
904bf215546Sopenharmony_ci            uint32_t m2 = in.get_bits(offset - 4, 2);
905bf215546Sopenharmony_ci            uint32_t m3 = in.get_bits(offset - 2, 2);
906bf215546Sopenharmony_ci
907bf215546Sopenharmony_ci            cems[0] = ((cem_base_class + c0) << 2) | m0;
908bf215546Sopenharmony_ci            cems[1] = ((cem_base_class + c1) << 2) | m1;
909bf215546Sopenharmony_ci            cems[2] = ((cem_base_class + c2) << 2) | m2;
910bf215546Sopenharmony_ci            cems[3] = ((cem_base_class + c3) << 2) | m3;
911bf215546Sopenharmony_ci         } else {
912bf215546Sopenharmony_ci            unreachable("");
913bf215546Sopenharmony_ci         }
914bf215546Sopenharmony_ci      }
915bf215546Sopenharmony_ci
916bf215546Sopenharmony_ci      colour_endpoint_data_offset = 29;
917bf215546Sopenharmony_ci
918bf215546Sopenharmony_ci   } else {
919bf215546Sopenharmony_ci      uint32_t cem = in.get_bits(13, 4);
920bf215546Sopenharmony_ci
921bf215546Sopenharmony_ci      cem_base_class = cem >> 2;
922bf215546Sopenharmony_ci      is_multi_cem = false;
923bf215546Sopenharmony_ci
924bf215546Sopenharmony_ci      cems[0] = cem;
925bf215546Sopenharmony_ci
926bf215546Sopenharmony_ci      partition_index = -1;
927bf215546Sopenharmony_ci
928bf215546Sopenharmony_ci      if (VERBOSE_DECODE)
929bf215546Sopenharmony_ci         in.printf_bits(13, 4, "CEM = %d (class %d)", cem, cem_base_class);
930bf215546Sopenharmony_ci
931bf215546Sopenharmony_ci      colour_endpoint_data_offset = 17;
932bf215546Sopenharmony_ci   }
933bf215546Sopenharmony_ci}
934bf215546Sopenharmony_ci
935bf215546Sopenharmony_civoid Block::unpack_colour_endpoints(InputBitVector in)
936bf215546Sopenharmony_ci{
937bf215546Sopenharmony_ci   if (ce_trits) {
938bf215546Sopenharmony_ci      int offset = colour_endpoint_data_offset;
939bf215546Sopenharmony_ci      int bits_left = colour_endpoint_bits;
940bf215546Sopenharmony_ci      for (int i = 0; i < num_cem_values; i += 5) {
941bf215546Sopenharmony_ci         int bits_to_read = MIN2(bits_left, 8 + ce_bits * 5);
942bf215546Sopenharmony_ci         /* If ce_trits then ce_bits <= 6, so bits_to_read <= 38 and we have to use uint64_t */
943bf215546Sopenharmony_ci         uint64_t raw = in.get_bits64(offset, bits_to_read);
944bf215546Sopenharmony_ci         unpack_trit_block(ce_bits, raw, &colour_endpoints_quant[i]);
945bf215546Sopenharmony_ci
946bf215546Sopenharmony_ci         if (VERBOSE_DECODE)
947bf215546Sopenharmony_ci            in.printf_bits(offset, bits_to_read,
948bf215546Sopenharmony_ci                           "trits [%d,%d,%d,%d,%d]",
949bf215546Sopenharmony_ci                           colour_endpoints_quant[i+0], colour_endpoints_quant[i+1],
950bf215546Sopenharmony_ci                  colour_endpoints_quant[i+2], colour_endpoints_quant[i+3],
951bf215546Sopenharmony_ci                  colour_endpoints_quant[i+4]);
952bf215546Sopenharmony_ci
953bf215546Sopenharmony_ci         offset += 8 + ce_bits * 5;
954bf215546Sopenharmony_ci         bits_left -= 8 + ce_bits * 5;
955bf215546Sopenharmony_ci      }
956bf215546Sopenharmony_ci   } else if (ce_quints) {
957bf215546Sopenharmony_ci      int offset = colour_endpoint_data_offset;
958bf215546Sopenharmony_ci      int bits_left = colour_endpoint_bits;
959bf215546Sopenharmony_ci      for (int i = 0; i < num_cem_values; i += 3) {
960bf215546Sopenharmony_ci         int bits_to_read = MIN2(bits_left, 7 + ce_bits * 3);
961bf215546Sopenharmony_ci         /* If ce_quints then ce_bits <= 5, so bits_to_read <= 22 and we can use uint32_t */
962bf215546Sopenharmony_ci         uint32_t raw = in.get_bits(offset, bits_to_read);
963bf215546Sopenharmony_ci         unpack_quint_block(ce_bits, raw, &colour_endpoints_quant[i]);
964bf215546Sopenharmony_ci
965bf215546Sopenharmony_ci         if (VERBOSE_DECODE)
966bf215546Sopenharmony_ci            in.printf_bits(offset, bits_to_read,
967bf215546Sopenharmony_ci                           "quints [%d,%d,%d]",
968bf215546Sopenharmony_ci                           colour_endpoints_quant[i], colour_endpoints_quant[i+1], colour_endpoints_quant[i+2]);
969bf215546Sopenharmony_ci
970bf215546Sopenharmony_ci         offset += 7 + ce_bits * 3;
971bf215546Sopenharmony_ci         bits_left -= 7 + ce_bits * 3;
972bf215546Sopenharmony_ci      }
973bf215546Sopenharmony_ci   } else {
974bf215546Sopenharmony_ci      assert((colour_endpoint_bits % ce_bits) == 0);
975bf215546Sopenharmony_ci      int offset = colour_endpoint_data_offset;
976bf215546Sopenharmony_ci      for (int i = 0; i < num_cem_values; i++) {
977bf215546Sopenharmony_ci         colour_endpoints_quant[i] = in.get_bits(offset, ce_bits);
978bf215546Sopenharmony_ci
979bf215546Sopenharmony_ci         if (VERBOSE_DECODE)
980bf215546Sopenharmony_ci            in.printf_bits(offset, ce_bits, "bits [%d]", colour_endpoints_quant[i]);
981bf215546Sopenharmony_ci
982bf215546Sopenharmony_ci         offset += ce_bits;
983bf215546Sopenharmony_ci      }
984bf215546Sopenharmony_ci   }
985bf215546Sopenharmony_ci}
986bf215546Sopenharmony_ci
987bf215546Sopenharmony_civoid Block::decode_colour_endpoints()
988bf215546Sopenharmony_ci{
989bf215546Sopenharmony_ci   int cem_values_idx = 0;
990bf215546Sopenharmony_ci   for (int part = 0; part < num_parts; ++part) {
991bf215546Sopenharmony_ci      uint8_t *v = &colour_endpoints[cem_values_idx];
992bf215546Sopenharmony_ci      int v0 = v[0];
993bf215546Sopenharmony_ci      int v1 = v[1];
994bf215546Sopenharmony_ci      int v2 = v[2];
995bf215546Sopenharmony_ci      int v3 = v[3];
996bf215546Sopenharmony_ci      int v4 = v[4];
997bf215546Sopenharmony_ci      int v5 = v[5];
998bf215546Sopenharmony_ci      int v6 = v[6];
999bf215546Sopenharmony_ci      int v7 = v[7];
1000bf215546Sopenharmony_ci      cem_values_idx += ((cems[part] >> 2) + 1) * 2;
1001bf215546Sopenharmony_ci
1002bf215546Sopenharmony_ci      uint8x4_t e0, e1;
1003bf215546Sopenharmony_ci      int s0, s1, L0, L1;
1004bf215546Sopenharmony_ci
1005bf215546Sopenharmony_ci      switch (cems[part])
1006bf215546Sopenharmony_ci      {
1007bf215546Sopenharmony_ci      case 0:
1008bf215546Sopenharmony_ci         e0 = uint8x4_t(v0, v0, v0, 0xff);
1009bf215546Sopenharmony_ci         e1 = uint8x4_t(v1, v1, v1, 0xff);
1010bf215546Sopenharmony_ci         break;
1011bf215546Sopenharmony_ci      case 1:
1012bf215546Sopenharmony_ci         L0 = (v0 >> 2) | (v1 & 0xc0);
1013bf215546Sopenharmony_ci         L1 = L0 + (v1 & 0x3f);
1014bf215546Sopenharmony_ci         if (L1 > 0xff)
1015bf215546Sopenharmony_ci            L1 = 0xff;
1016bf215546Sopenharmony_ci         e0 = uint8x4_t(L0, L0, L0, 0xff);
1017bf215546Sopenharmony_ci         e1 = uint8x4_t(L1, L1, L1, 0xff);
1018bf215546Sopenharmony_ci         break;
1019bf215546Sopenharmony_ci      case 4:
1020bf215546Sopenharmony_ci         e0 = uint8x4_t(v0, v0, v0, v2);
1021bf215546Sopenharmony_ci         e1 = uint8x4_t(v1, v1, v1, v3);
1022bf215546Sopenharmony_ci         break;
1023bf215546Sopenharmony_ci      case 5:
1024bf215546Sopenharmony_ci         bit_transfer_signed(v1, v0);
1025bf215546Sopenharmony_ci         bit_transfer_signed(v3, v2);
1026bf215546Sopenharmony_ci         e0 = uint8x4_t(v0, v0, v0, v2);
1027bf215546Sopenharmony_ci         e1 = uint8x4_t::clamped(v0+v1, v0+v1, v0+v1, v2+v3);
1028bf215546Sopenharmony_ci         break;
1029bf215546Sopenharmony_ci      case 6:
1030bf215546Sopenharmony_ci         e0 = uint8x4_t(v0*v3 >> 8, v1*v3 >> 8, v2*v3 >> 8, 0xff);
1031bf215546Sopenharmony_ci         e1 = uint8x4_t(v0, v1, v2, 0xff);
1032bf215546Sopenharmony_ci         break;
1033bf215546Sopenharmony_ci      case 8:
1034bf215546Sopenharmony_ci         s0 = v0 + v2 + v4;
1035bf215546Sopenharmony_ci         s1 = v1 + v3 + v5;
1036bf215546Sopenharmony_ci         if (s1 >= s0) {
1037bf215546Sopenharmony_ci            e0 = uint8x4_t(v0, v2, v4, 0xff);
1038bf215546Sopenharmony_ci            e1 = uint8x4_t(v1, v3, v5, 0xff);
1039bf215546Sopenharmony_ci         } else {
1040bf215546Sopenharmony_ci            e0 = blue_contract(v1, v3, v5, 0xff);
1041bf215546Sopenharmony_ci            e1 = blue_contract(v0, v2, v4, 0xff);
1042bf215546Sopenharmony_ci         }
1043bf215546Sopenharmony_ci         break;
1044bf215546Sopenharmony_ci      case 9:
1045bf215546Sopenharmony_ci         bit_transfer_signed(v1, v0);
1046bf215546Sopenharmony_ci         bit_transfer_signed(v3, v2);
1047bf215546Sopenharmony_ci         bit_transfer_signed(v5, v4);
1048bf215546Sopenharmony_ci         if (v1 + v3 + v5 >= 0) {
1049bf215546Sopenharmony_ci            e0 = uint8x4_t(v0, v2, v4, 0xff);
1050bf215546Sopenharmony_ci            e1 = uint8x4_t::clamped(v0+v1, v2+v3, v4+v5, 0xff);
1051bf215546Sopenharmony_ci         } else {
1052bf215546Sopenharmony_ci            e0 = blue_contract_clamped(v0+v1, v2+v3, v4+v5, 0xff);
1053bf215546Sopenharmony_ci            e1 = blue_contract(v0, v2, v4, 0xff);
1054bf215546Sopenharmony_ci         }
1055bf215546Sopenharmony_ci         break;
1056bf215546Sopenharmony_ci      case 10:
1057bf215546Sopenharmony_ci         e0 = uint8x4_t(v0*v3 >> 8, v1*v3 >> 8, v2*v3 >> 8, v4);
1058bf215546Sopenharmony_ci         e1 = uint8x4_t(v0, v1, v2, v5);
1059bf215546Sopenharmony_ci         break;
1060bf215546Sopenharmony_ci      case 12:
1061bf215546Sopenharmony_ci         s0 = v0 + v2 + v4;
1062bf215546Sopenharmony_ci         s1 = v1 + v3 + v5;
1063bf215546Sopenharmony_ci         if (s1 >= s0) {
1064bf215546Sopenharmony_ci            e0 = uint8x4_t(v0, v2, v4, v6);
1065bf215546Sopenharmony_ci            e1 = uint8x4_t(v1, v3, v5, v7);
1066bf215546Sopenharmony_ci         } else {
1067bf215546Sopenharmony_ci            e0 = blue_contract(v1, v3, v5, v7);
1068bf215546Sopenharmony_ci            e1 = blue_contract(v0, v2, v4, v6);
1069bf215546Sopenharmony_ci         }
1070bf215546Sopenharmony_ci         break;
1071bf215546Sopenharmony_ci      case 13:
1072bf215546Sopenharmony_ci         bit_transfer_signed(v1, v0);
1073bf215546Sopenharmony_ci         bit_transfer_signed(v3, v2);
1074bf215546Sopenharmony_ci         bit_transfer_signed(v5, v4);
1075bf215546Sopenharmony_ci         bit_transfer_signed(v7, v6);
1076bf215546Sopenharmony_ci         if (v1 + v3 + v5 >= 0) {
1077bf215546Sopenharmony_ci            e0 = uint8x4_t(v0, v2, v4, v6);
1078bf215546Sopenharmony_ci            e1 = uint8x4_t::clamped(v0+v1, v2+v3, v4+v5, v6+v7);
1079bf215546Sopenharmony_ci         } else {
1080bf215546Sopenharmony_ci            e0 = blue_contract_clamped(v0+v1, v2+v3, v4+v5, v6+v7);
1081bf215546Sopenharmony_ci            e1 = blue_contract(v0, v2, v4, v6);
1082bf215546Sopenharmony_ci         }
1083bf215546Sopenharmony_ci         break;
1084bf215546Sopenharmony_ci      default:
1085bf215546Sopenharmony_ci         /* HDR endpoints not supported; return error colour */
1086bf215546Sopenharmony_ci         e0 = uint8x4_t(255, 0, 255, 255);
1087bf215546Sopenharmony_ci         e1 = uint8x4_t(255, 0, 255, 255);
1088bf215546Sopenharmony_ci         break;
1089bf215546Sopenharmony_ci      }
1090bf215546Sopenharmony_ci
1091bf215546Sopenharmony_ci      endpoints_decoded[0][part] = e0;
1092bf215546Sopenharmony_ci      endpoints_decoded[1][part] = e1;
1093bf215546Sopenharmony_ci
1094bf215546Sopenharmony_ci      if (VERBOSE_DECODE) {
1095bf215546Sopenharmony_ci         printf("cems[%d]=%d v=[", part, cems[part]);
1096bf215546Sopenharmony_ci         for (int i = 0; i < (cems[part] >> 2) + 1; ++i) {
1097bf215546Sopenharmony_ci            if (i)
1098bf215546Sopenharmony_ci               printf(", ");
1099bf215546Sopenharmony_ci            printf("%3d", v[i]);
1100bf215546Sopenharmony_ci         }
1101bf215546Sopenharmony_ci         printf("] e0=[%3d,%4d,%4d,%4d] e1=[%3d,%4d,%4d,%4d]\n",
1102bf215546Sopenharmony_ci                e0.v[0], e0.v[1], e0.v[2], e0.v[3],
1103bf215546Sopenharmony_ci               e1.v[0], e1.v[1], e1.v[2], e1.v[3]);
1104bf215546Sopenharmony_ci      }
1105bf215546Sopenharmony_ci   }
1106bf215546Sopenharmony_ci}
1107bf215546Sopenharmony_ci
1108bf215546Sopenharmony_civoid Block::unpack_weights(InputBitVector in)
1109bf215546Sopenharmony_ci{
1110bf215546Sopenharmony_ci   if (wt_trits) {
1111bf215546Sopenharmony_ci      int offset = 128;
1112bf215546Sopenharmony_ci      int bits_left = weight_bits;
1113bf215546Sopenharmony_ci      for (int i = 0; i < num_weights; i += 5) {
1114bf215546Sopenharmony_ci         int bits_to_read = MIN2(bits_left, 8 + 5*wt_bits);
1115bf215546Sopenharmony_ci         /* If wt_trits then wt_bits <= 3, so bits_to_read <= 23 and we can use uint32_t */
1116bf215546Sopenharmony_ci         uint32_t raw = in.get_bits_rev(offset, bits_to_read);
1117bf215546Sopenharmony_ci         unpack_trit_block(wt_bits, raw, &weights_quant[i]);
1118bf215546Sopenharmony_ci
1119bf215546Sopenharmony_ci         if (VERBOSE_DECODE)
1120bf215546Sopenharmony_ci            in.printf_bits(offset - bits_to_read, bits_to_read, "weight trits [%d,%d,%d,%d,%d]",
1121bf215546Sopenharmony_ci                           weights_quant[i+0], weights_quant[i+1],
1122bf215546Sopenharmony_ci                  weights_quant[i+2], weights_quant[i+3],
1123bf215546Sopenharmony_ci                  weights_quant[i+4]);
1124bf215546Sopenharmony_ci
1125bf215546Sopenharmony_ci         offset -= 8 + wt_bits * 5;
1126bf215546Sopenharmony_ci         bits_left -= 8 + wt_bits * 5;
1127bf215546Sopenharmony_ci      }
1128bf215546Sopenharmony_ci
1129bf215546Sopenharmony_ci   } else if (wt_quints) {
1130bf215546Sopenharmony_ci
1131bf215546Sopenharmony_ci      int offset = 128;
1132bf215546Sopenharmony_ci      int bits_left = weight_bits;
1133bf215546Sopenharmony_ci      for (int i = 0; i < num_weights; i += 3) {
1134bf215546Sopenharmony_ci         int bits_to_read = MIN2(bits_left, 7 + 3*wt_bits);
1135bf215546Sopenharmony_ci         /* If wt_quints then wt_bits <= 2, so bits_to_read <= 13 and we can use uint32_t */
1136bf215546Sopenharmony_ci         uint32_t raw = in.get_bits_rev(offset, bits_to_read);
1137bf215546Sopenharmony_ci         unpack_quint_block(wt_bits, raw, &weights_quant[i]);
1138bf215546Sopenharmony_ci
1139bf215546Sopenharmony_ci         if (VERBOSE_DECODE)
1140bf215546Sopenharmony_ci            in.printf_bits(offset - bits_to_read, bits_to_read, "weight quints [%d,%d,%d]",
1141bf215546Sopenharmony_ci                           weights_quant[i], weights_quant[i+1], weights_quant[i+2]);
1142bf215546Sopenharmony_ci
1143bf215546Sopenharmony_ci         offset -= 7 + wt_bits * 3;
1144bf215546Sopenharmony_ci         bits_left -= 7 + wt_bits * 3;
1145bf215546Sopenharmony_ci      }
1146bf215546Sopenharmony_ci
1147bf215546Sopenharmony_ci   } else {
1148bf215546Sopenharmony_ci      int offset = 128;
1149bf215546Sopenharmony_ci      assert((weight_bits % wt_bits) == 0);
1150bf215546Sopenharmony_ci      for (int i = 0; i < num_weights; ++i) {
1151bf215546Sopenharmony_ci         weights_quant[i] = in.get_bits_rev(offset, wt_bits);
1152bf215546Sopenharmony_ci
1153bf215546Sopenharmony_ci         if (VERBOSE_DECODE)
1154bf215546Sopenharmony_ci            in.printf_bits(offset - wt_bits, wt_bits, "weight bits [%d]", weights_quant[i]);
1155bf215546Sopenharmony_ci
1156bf215546Sopenharmony_ci         offset -= wt_bits;
1157bf215546Sopenharmony_ci      }
1158bf215546Sopenharmony_ci   }
1159bf215546Sopenharmony_ci}
1160bf215546Sopenharmony_ci
1161bf215546Sopenharmony_civoid Block::unquantise_weights()
1162bf215546Sopenharmony_ci{
1163bf215546Sopenharmony_ci   assert(num_weights <= (int)ARRAY_SIZE(weights_quant));
1164bf215546Sopenharmony_ci   assert(num_weights <= (int)ARRAY_SIZE(weights));
1165bf215546Sopenharmony_ci
1166bf215546Sopenharmony_ci   memset(weights, 0, sizeof(weights));
1167bf215546Sopenharmony_ci
1168bf215546Sopenharmony_ci   for (int i = 0; i < num_weights; ++i) {
1169bf215546Sopenharmony_ci
1170bf215546Sopenharmony_ci      uint8_t v = weights_quant[i];
1171bf215546Sopenharmony_ci      uint8_t w;
1172bf215546Sopenharmony_ci
1173bf215546Sopenharmony_ci      if (wt_trits) {
1174bf215546Sopenharmony_ci
1175bf215546Sopenharmony_ci         if (wt_bits == 0) {
1176bf215546Sopenharmony_ci            w = v * 32;
1177bf215546Sopenharmony_ci         } else {
1178bf215546Sopenharmony_ci            uint8_t A, B, C, D;
1179bf215546Sopenharmony_ci            A = (v & 0x1) ? 0x7F : 0x00;
1180bf215546Sopenharmony_ci            switch (wt_bits) {
1181bf215546Sopenharmony_ci            case 1:
1182bf215546Sopenharmony_ci               B = 0;
1183bf215546Sopenharmony_ci               C = 50;
1184bf215546Sopenharmony_ci               D = v >> 1;
1185bf215546Sopenharmony_ci               break;
1186bf215546Sopenharmony_ci            case 2:
1187bf215546Sopenharmony_ci               B = (v & 0x2) ? 0x45 : 0x00;
1188bf215546Sopenharmony_ci               C = 23;
1189bf215546Sopenharmony_ci               D = v >> 2;
1190bf215546Sopenharmony_ci               break;
1191bf215546Sopenharmony_ci            case 3:
1192bf215546Sopenharmony_ci               B = ((v & 0x6) >> 1) | ((v & 0x6) << 4);
1193bf215546Sopenharmony_ci               C = 11;
1194bf215546Sopenharmony_ci               D = v >> 3;
1195bf215546Sopenharmony_ci               break;
1196bf215546Sopenharmony_ci            default:
1197bf215546Sopenharmony_ci               unreachable("");
1198bf215546Sopenharmony_ci            }
1199bf215546Sopenharmony_ci            uint16_t T = D * C + B;
1200bf215546Sopenharmony_ci            T = T ^ A;
1201bf215546Sopenharmony_ci            T = (A & 0x20) | (T >> 2);
1202bf215546Sopenharmony_ci            assert(T < 64);
1203bf215546Sopenharmony_ci            if (T > 32)
1204bf215546Sopenharmony_ci               T++;
1205bf215546Sopenharmony_ci            w = T;
1206bf215546Sopenharmony_ci         }
1207bf215546Sopenharmony_ci
1208bf215546Sopenharmony_ci      } else if (wt_quints) {
1209bf215546Sopenharmony_ci
1210bf215546Sopenharmony_ci         if (wt_bits == 0) {
1211bf215546Sopenharmony_ci            w = v * 16;
1212bf215546Sopenharmony_ci         } else {
1213bf215546Sopenharmony_ci            uint8_t A, B, C, D;
1214bf215546Sopenharmony_ci            A = (v & 0x1) ? 0x7F : 0x00;
1215bf215546Sopenharmony_ci            switch (wt_bits) {
1216bf215546Sopenharmony_ci            case 1:
1217bf215546Sopenharmony_ci               B = 0;
1218bf215546Sopenharmony_ci               C = 28;
1219bf215546Sopenharmony_ci               D = v >> 1;
1220bf215546Sopenharmony_ci               break;
1221bf215546Sopenharmony_ci            case 2:
1222bf215546Sopenharmony_ci               B = (v & 0x2) ? 0x42 : 0x00;
1223bf215546Sopenharmony_ci               C = 13;
1224bf215546Sopenharmony_ci               D = v >> 2;
1225bf215546Sopenharmony_ci               break;
1226bf215546Sopenharmony_ci            default:
1227bf215546Sopenharmony_ci               unreachable("");
1228bf215546Sopenharmony_ci            }
1229bf215546Sopenharmony_ci            uint16_t T = D * C + B;
1230bf215546Sopenharmony_ci            T = T ^ A;
1231bf215546Sopenharmony_ci            T = (A & 0x20) | (T >> 2);
1232bf215546Sopenharmony_ci            assert(T < 64);
1233bf215546Sopenharmony_ci            if (T > 32)
1234bf215546Sopenharmony_ci               T++;
1235bf215546Sopenharmony_ci            w = T;
1236bf215546Sopenharmony_ci         }
1237bf215546Sopenharmony_ci         weights[i] = w;
1238bf215546Sopenharmony_ci
1239bf215546Sopenharmony_ci      } else {
1240bf215546Sopenharmony_ci
1241bf215546Sopenharmony_ci         switch (wt_bits) {
1242bf215546Sopenharmony_ci         case 1: w = v ? 0x3F : 0x00; break;
1243bf215546Sopenharmony_ci         case 2: w = v | (v << 2) | (v << 4); break;
1244bf215546Sopenharmony_ci         case 3: w = v | (v << 3); break;
1245bf215546Sopenharmony_ci         case 4: w = (v >> 2) | (v << 2); break;
1246bf215546Sopenharmony_ci         case 5: w = (v >> 4) | (v << 1); break;
1247bf215546Sopenharmony_ci         default: unreachable("");
1248bf215546Sopenharmony_ci         }
1249bf215546Sopenharmony_ci         assert(w < 64);
1250bf215546Sopenharmony_ci         if (w > 32)
1251bf215546Sopenharmony_ci            w++;
1252bf215546Sopenharmony_ci      }
1253bf215546Sopenharmony_ci      weights[i] = w;
1254bf215546Sopenharmony_ci   }
1255bf215546Sopenharmony_ci}
1256bf215546Sopenharmony_ci
1257bf215546Sopenharmony_civoid Block::compute_infill_weights(int block_w, int block_h, int block_d)
1258bf215546Sopenharmony_ci{
1259bf215546Sopenharmony_ci   int Ds = block_w <= 1 ? 0 : (1024 + block_w / 2) / (block_w - 1);
1260bf215546Sopenharmony_ci   int Dt = block_h <= 1 ? 0 : (1024 + block_h / 2) / (block_h - 1);
1261bf215546Sopenharmony_ci   int Dr = block_d <= 1 ? 0 : (1024 + block_d / 2) / (block_d - 1);
1262bf215546Sopenharmony_ci   for (int r = 0; r < block_d; ++r) {
1263bf215546Sopenharmony_ci      for (int t = 0; t < block_h; ++t) {
1264bf215546Sopenharmony_ci         for (int s = 0; s < block_w; ++s) {
1265bf215546Sopenharmony_ci            int cs = Ds * s;
1266bf215546Sopenharmony_ci            int ct = Dt * t;
1267bf215546Sopenharmony_ci            int cr = Dr * r;
1268bf215546Sopenharmony_ci            int gs = (cs * (wt_w - 1) + 32) >> 6;
1269bf215546Sopenharmony_ci            int gt = (ct * (wt_h - 1) + 32) >> 6;
1270bf215546Sopenharmony_ci            int gr = (cr * (wt_d - 1) + 32) >> 6;
1271bf215546Sopenharmony_ci            assert(gs >= 0 && gs <= 176);
1272bf215546Sopenharmony_ci            assert(gt >= 0 && gt <= 176);
1273bf215546Sopenharmony_ci            assert(gr >= 0 && gr <= 176);
1274bf215546Sopenharmony_ci            int js = gs >> 4;
1275bf215546Sopenharmony_ci            int fs = gs & 0xf;
1276bf215546Sopenharmony_ci            int jt = gt >> 4;
1277bf215546Sopenharmony_ci            int ft = gt & 0xf;
1278bf215546Sopenharmony_ci            int jr = gr >> 4;
1279bf215546Sopenharmony_ci            int fr = gr & 0xf;
1280bf215546Sopenharmony_ci
1281bf215546Sopenharmony_ci            /* TODO: 3D */
1282bf215546Sopenharmony_ci            (void)jr;
1283bf215546Sopenharmony_ci            (void)fr;
1284bf215546Sopenharmony_ci
1285bf215546Sopenharmony_ci            int w11 = (fs * ft + 8) >> 4;
1286bf215546Sopenharmony_ci            int w10 = ft - w11;
1287bf215546Sopenharmony_ci            int w01 = fs - w11;
1288bf215546Sopenharmony_ci            int w00 = 16 - fs - ft + w11;
1289bf215546Sopenharmony_ci
1290bf215546Sopenharmony_ci            if (dual_plane) {
1291bf215546Sopenharmony_ci               int p00, p01, p10, p11, i0, i1;
1292bf215546Sopenharmony_ci               int v0 = js + jt * wt_w;
1293bf215546Sopenharmony_ci               p00 = weights[(v0) * 2];
1294bf215546Sopenharmony_ci               p01 = weights[(v0 + 1) * 2];
1295bf215546Sopenharmony_ci               p10 = weights[(v0 + wt_w) * 2];
1296bf215546Sopenharmony_ci               p11 = weights[(v0 + wt_w + 1) * 2];
1297bf215546Sopenharmony_ci               i0 = (p00*w00 + p01*w01 + p10*w10 + p11*w11 + 8) >> 4;
1298bf215546Sopenharmony_ci               p00 = weights[(v0) * 2 + 1];
1299bf215546Sopenharmony_ci               p01 = weights[(v0 + 1) * 2 + 1];
1300bf215546Sopenharmony_ci               p10 = weights[(v0 + wt_w) * 2 + 1];
1301bf215546Sopenharmony_ci               p11 = weights[(v0 + wt_w + 1) * 2 + 1];
1302bf215546Sopenharmony_ci               assert((v0 + wt_w + 1) * 2 + 1 < (int)ARRAY_SIZE(weights));
1303bf215546Sopenharmony_ci               i1 = (p00*w00 + p01*w01 + p10*w10 + p11*w11 + 8) >> 4;
1304bf215546Sopenharmony_ci               assert(0 <= i0 && i0 <= 64);
1305bf215546Sopenharmony_ci               infill_weights[0][s + t*block_w + r*block_w*block_h] = i0;
1306bf215546Sopenharmony_ci               infill_weights[1][s + t*block_w + r*block_w*block_h] = i1;
1307bf215546Sopenharmony_ci            } else {
1308bf215546Sopenharmony_ci               int p00, p01, p10, p11, i;
1309bf215546Sopenharmony_ci               int v0 = js + jt * wt_w;
1310bf215546Sopenharmony_ci               p00 = weights[v0];
1311bf215546Sopenharmony_ci               p01 = weights[v0 + 1];
1312bf215546Sopenharmony_ci               p10 = weights[v0 + wt_w];
1313bf215546Sopenharmony_ci               p11 = weights[v0 + wt_w + 1];
1314bf215546Sopenharmony_ci               assert(v0 + wt_w + 1 < (int)ARRAY_SIZE(weights));
1315bf215546Sopenharmony_ci               i = (p00*w00 + p01*w01 + p10*w10 + p11*w11 + 8) >> 4;
1316bf215546Sopenharmony_ci               assert(0 <= i && i <= 64);
1317bf215546Sopenharmony_ci               infill_weights[0][s + t*block_w + r*block_w*block_h] = i;
1318bf215546Sopenharmony_ci            }
1319bf215546Sopenharmony_ci         }
1320bf215546Sopenharmony_ci      }
1321bf215546Sopenharmony_ci   }
1322bf215546Sopenharmony_ci}
1323bf215546Sopenharmony_ci
1324bf215546Sopenharmony_civoid Block::unquantise_colour_endpoints()
1325bf215546Sopenharmony_ci{
1326bf215546Sopenharmony_ci   assert(num_cem_values <= (int)ARRAY_SIZE(colour_endpoints_quant));
1327bf215546Sopenharmony_ci   assert(num_cem_values <= (int)ARRAY_SIZE(colour_endpoints));
1328bf215546Sopenharmony_ci
1329bf215546Sopenharmony_ci   for (int i = 0; i < num_cem_values; ++i) {
1330bf215546Sopenharmony_ci      uint8_t v = colour_endpoints_quant[i];
1331bf215546Sopenharmony_ci
1332bf215546Sopenharmony_ci      if (ce_trits) {
1333bf215546Sopenharmony_ci         uint16_t A, B, C, D;
1334bf215546Sopenharmony_ci         uint16_t t;
1335bf215546Sopenharmony_ci         A = (v & 0x1) ? 0x1FF : 0x000;
1336bf215546Sopenharmony_ci         switch (ce_bits) {
1337bf215546Sopenharmony_ci         case 1:
1338bf215546Sopenharmony_ci            B = 0;
1339bf215546Sopenharmony_ci            C = 204;
1340bf215546Sopenharmony_ci            D = v >> 1;
1341bf215546Sopenharmony_ci            break;
1342bf215546Sopenharmony_ci         case 2:
1343bf215546Sopenharmony_ci            B = (v & 0x2) ? 0x116 : 0x000;
1344bf215546Sopenharmony_ci            C = 93;
1345bf215546Sopenharmony_ci            D = v >> 2;
1346bf215546Sopenharmony_ci            break;
1347bf215546Sopenharmony_ci         case 3:
1348bf215546Sopenharmony_ci            t = ((v >> 1) & 0x3);
1349bf215546Sopenharmony_ci            B = t | (t << 2) | (t << 7);
1350bf215546Sopenharmony_ci            C = 44;
1351bf215546Sopenharmony_ci            D = v >> 3;
1352bf215546Sopenharmony_ci            break;
1353bf215546Sopenharmony_ci         case 4:
1354bf215546Sopenharmony_ci            t = ((v >> 1) & 0x7);
1355bf215546Sopenharmony_ci            B = t | (t << 6);
1356bf215546Sopenharmony_ci            C = 22;
1357bf215546Sopenharmony_ci            D = v >> 4;
1358bf215546Sopenharmony_ci            break;
1359bf215546Sopenharmony_ci         case 5:
1360bf215546Sopenharmony_ci            t = ((v >> 1) & 0xF);
1361bf215546Sopenharmony_ci            B = (t >> 2) | (t << 5);
1362bf215546Sopenharmony_ci            C = 11;
1363bf215546Sopenharmony_ci            D = v >> 5;
1364bf215546Sopenharmony_ci            break;
1365bf215546Sopenharmony_ci         case 6:
1366bf215546Sopenharmony_ci            B = ((v & 0x3E) << 3) | ((v >> 5) & 0x1);
1367bf215546Sopenharmony_ci            C = 5;
1368bf215546Sopenharmony_ci            D = v >> 6;
1369bf215546Sopenharmony_ci            break;
1370bf215546Sopenharmony_ci         default:
1371bf215546Sopenharmony_ci            unreachable("");
1372bf215546Sopenharmony_ci         }
1373bf215546Sopenharmony_ci         uint16_t T = D * C + B;
1374bf215546Sopenharmony_ci         T = T ^ A;
1375bf215546Sopenharmony_ci         T = (A & 0x80) | (T >> 2);
1376bf215546Sopenharmony_ci         assert(T < 256);
1377bf215546Sopenharmony_ci         colour_endpoints[i] = T;
1378bf215546Sopenharmony_ci      } else if (ce_quints) {
1379bf215546Sopenharmony_ci         uint16_t A, B, C, D;
1380bf215546Sopenharmony_ci         uint16_t t;
1381bf215546Sopenharmony_ci         A = (v & 0x1) ? 0x1FF : 0x000;
1382bf215546Sopenharmony_ci         switch (ce_bits) {
1383bf215546Sopenharmony_ci         case 1:
1384bf215546Sopenharmony_ci            B = 0;
1385bf215546Sopenharmony_ci            C = 113;
1386bf215546Sopenharmony_ci            D = v >> 1;
1387bf215546Sopenharmony_ci            break;
1388bf215546Sopenharmony_ci         case 2:
1389bf215546Sopenharmony_ci            B = (v & 0x2) ? 0x10C : 0x000;
1390bf215546Sopenharmony_ci            C = 54;
1391bf215546Sopenharmony_ci            D = v >> 2;
1392bf215546Sopenharmony_ci            break;
1393bf215546Sopenharmony_ci         case 3:
1394bf215546Sopenharmony_ci            t = ((v >> 1) & 0x3);
1395bf215546Sopenharmony_ci            B = (t >> 1) | (t << 1) | (t << 7);
1396bf215546Sopenharmony_ci            C = 26;
1397bf215546Sopenharmony_ci            D = v >> 3;
1398bf215546Sopenharmony_ci            break;
1399bf215546Sopenharmony_ci         case 4:
1400bf215546Sopenharmony_ci            t = ((v >> 1) & 0x7);
1401bf215546Sopenharmony_ci            B = (t >> 1) | (t << 6);
1402bf215546Sopenharmony_ci            C = 13;
1403bf215546Sopenharmony_ci            D = v >> 4;
1404bf215546Sopenharmony_ci            break;
1405bf215546Sopenharmony_ci         case 5:
1406bf215546Sopenharmony_ci            t = ((v >> 1) & 0xF);
1407bf215546Sopenharmony_ci            B = (t >> 4) | (t << 5);
1408bf215546Sopenharmony_ci            C = 6;
1409bf215546Sopenharmony_ci            D = v >> 5;
1410bf215546Sopenharmony_ci            break;
1411bf215546Sopenharmony_ci         default:
1412bf215546Sopenharmony_ci            unreachable("");
1413bf215546Sopenharmony_ci         }
1414bf215546Sopenharmony_ci         uint16_t T = D * C + B;
1415bf215546Sopenharmony_ci         T = T ^ A;
1416bf215546Sopenharmony_ci         T = (A & 0x80) | (T >> 2);
1417bf215546Sopenharmony_ci         assert(T < 256);
1418bf215546Sopenharmony_ci         colour_endpoints[i] = T;
1419bf215546Sopenharmony_ci      } else {
1420bf215546Sopenharmony_ci         switch (ce_bits) {
1421bf215546Sopenharmony_ci         case 1: v = v ? 0xFF : 0x00; break;
1422bf215546Sopenharmony_ci         case 2: v = (v << 6) | (v << 4) | (v << 2) | v; break;
1423bf215546Sopenharmony_ci         case 3: v = (v << 5) | (v << 2) | (v >> 1); break;
1424bf215546Sopenharmony_ci         case 4: v = (v << 4) | v; break;
1425bf215546Sopenharmony_ci         case 5: v = (v << 3) | (v >> 2); break;
1426bf215546Sopenharmony_ci         case 6: v = (v << 2) | (v >> 4); break;
1427bf215546Sopenharmony_ci         case 7: v = (v << 1) | (v >> 6); break;
1428bf215546Sopenharmony_ci         case 8: break;
1429bf215546Sopenharmony_ci         default: unreachable("");
1430bf215546Sopenharmony_ci         }
1431bf215546Sopenharmony_ci         colour_endpoints[i] = v;
1432bf215546Sopenharmony_ci      }
1433bf215546Sopenharmony_ci   }
1434bf215546Sopenharmony_ci}
1435bf215546Sopenharmony_ci
1436bf215546Sopenharmony_cidecode_error::type Block::decode(const Decoder &decoder, InputBitVector in)
1437bf215546Sopenharmony_ci{
1438bf215546Sopenharmony_ci   decode_error::type err;
1439bf215546Sopenharmony_ci
1440bf215546Sopenharmony_ci   is_error = false;
1441bf215546Sopenharmony_ci   bogus_colour_endpoints = false;
1442bf215546Sopenharmony_ci   bogus_weights = false;
1443bf215546Sopenharmony_ci   is_void_extent = false;
1444bf215546Sopenharmony_ci
1445bf215546Sopenharmony_ci   wt_d = 1;
1446bf215546Sopenharmony_ci   /* TODO: 3D */
1447bf215546Sopenharmony_ci
1448bf215546Sopenharmony_ci   /* TODO: test for all the illegal encodings */
1449bf215546Sopenharmony_ci
1450bf215546Sopenharmony_ci   if (VERBOSE_DECODE)
1451bf215546Sopenharmony_ci      in.printf_bits(0, 128);
1452bf215546Sopenharmony_ci
1453bf215546Sopenharmony_ci   err = decode_block_mode(in);
1454bf215546Sopenharmony_ci   if (err != decode_error::ok)
1455bf215546Sopenharmony_ci      return err;
1456bf215546Sopenharmony_ci
1457bf215546Sopenharmony_ci   if (is_void_extent)
1458bf215546Sopenharmony_ci      return decode_error::ok;
1459bf215546Sopenharmony_ci
1460bf215546Sopenharmony_ci   /* TODO: 3D */
1461bf215546Sopenharmony_ci
1462bf215546Sopenharmony_ci   calculate_from_weights();
1463bf215546Sopenharmony_ci
1464bf215546Sopenharmony_ci   if (VERBOSE_DECODE)
1465bf215546Sopenharmony_ci      printf("weights_grid=%dx%dx%d dual_plane=%d num_weights=%d high_prec=%d r=%d range=0..%d (%dt %dq %db) weight_bits=%d\n",
1466bf215546Sopenharmony_ci             wt_w, wt_h, wt_d, dual_plane, num_weights, high_prec, wt_range, wt_max, wt_trits, wt_quints, wt_bits, weight_bits);
1467bf215546Sopenharmony_ci
1468bf215546Sopenharmony_ci   if (wt_w > decoder.block_w || wt_h > decoder.block_h || wt_d > decoder.block_d)
1469bf215546Sopenharmony_ci      return decode_error::weight_grid_exceeds_block_size;
1470bf215546Sopenharmony_ci
1471bf215546Sopenharmony_ci   num_parts = in.get_bits(11, 2) + 1;
1472bf215546Sopenharmony_ci
1473bf215546Sopenharmony_ci   if (VERBOSE_DECODE)
1474bf215546Sopenharmony_ci      in.printf_bits(11, 2, "partitions = %d", num_parts);
1475bf215546Sopenharmony_ci
1476bf215546Sopenharmony_ci   if (dual_plane && num_parts > 3)
1477bf215546Sopenharmony_ci      return decode_error::dual_plane_and_too_many_partitions;
1478bf215546Sopenharmony_ci
1479bf215546Sopenharmony_ci   decode_cem(in);
1480bf215546Sopenharmony_ci
1481bf215546Sopenharmony_ci   if (VERBOSE_DECODE)
1482bf215546Sopenharmony_ci      printf("cem=[%d,%d,%d,%d] base_cem_class=%d\n", cems[0], cems[1], cems[2], cems[3], cem_base_class);
1483bf215546Sopenharmony_ci
1484bf215546Sopenharmony_ci   int num_cem_pairs = (cem_base_class + 1) * num_parts + extra_cem_bits;
1485bf215546Sopenharmony_ci   num_cem_values = num_cem_pairs * 2;
1486bf215546Sopenharmony_ci
1487bf215546Sopenharmony_ci   calculate_remaining_bits();
1488bf215546Sopenharmony_ci   err = calculate_colour_endpoints_size();
1489bf215546Sopenharmony_ci   if (err != decode_error::ok)
1490bf215546Sopenharmony_ci      return err;
1491bf215546Sopenharmony_ci
1492bf215546Sopenharmony_ci   if (VERBOSE_DECODE)
1493bf215546Sopenharmony_ci      in.printf_bits(colour_endpoint_data_offset, colour_endpoint_bits,
1494bf215546Sopenharmony_ci                     "endpoint data (%d bits, %d vals, %dt %dq %db)",
1495bf215546Sopenharmony_ci                     colour_endpoint_bits, num_cem_values, ce_trits, ce_quints, ce_bits);
1496bf215546Sopenharmony_ci
1497bf215546Sopenharmony_ci   unpack_colour_endpoints(in);
1498bf215546Sopenharmony_ci
1499bf215546Sopenharmony_ci   if (VERBOSE_DECODE) {
1500bf215546Sopenharmony_ci      printf("cem values raw =[");
1501bf215546Sopenharmony_ci      for (int i = 0; i < num_cem_values; i++) {
1502bf215546Sopenharmony_ci         if (i)
1503bf215546Sopenharmony_ci            printf(", ");
1504bf215546Sopenharmony_ci         printf("%3d", colour_endpoints_quant[i]);
1505bf215546Sopenharmony_ci      }
1506bf215546Sopenharmony_ci      printf("]\n");
1507bf215546Sopenharmony_ci   }
1508bf215546Sopenharmony_ci
1509bf215546Sopenharmony_ci   if (num_cem_values > 18)
1510bf215546Sopenharmony_ci      return decode_error::invalid_colour_endpoints_count;
1511bf215546Sopenharmony_ci
1512bf215546Sopenharmony_ci   unquantise_colour_endpoints();
1513bf215546Sopenharmony_ci
1514bf215546Sopenharmony_ci   if (VERBOSE_DECODE) {
1515bf215546Sopenharmony_ci      printf("cem values norm=[");
1516bf215546Sopenharmony_ci      for (int i = 0; i < num_cem_values; i++) {
1517bf215546Sopenharmony_ci         if (i)
1518bf215546Sopenharmony_ci            printf(", ");
1519bf215546Sopenharmony_ci         printf("%3d", colour_endpoints[i]);
1520bf215546Sopenharmony_ci      }
1521bf215546Sopenharmony_ci      printf("]\n");
1522bf215546Sopenharmony_ci   }
1523bf215546Sopenharmony_ci
1524bf215546Sopenharmony_ci   decode_colour_endpoints();
1525bf215546Sopenharmony_ci
1526bf215546Sopenharmony_ci   if (dual_plane) {
1527bf215546Sopenharmony_ci      int ccs_offset = 128 - weight_bits - num_extra_cem_bits - 2;
1528bf215546Sopenharmony_ci      colour_component_selector = in.get_bits(ccs_offset, 2);
1529bf215546Sopenharmony_ci
1530bf215546Sopenharmony_ci      if (VERBOSE_DECODE)
1531bf215546Sopenharmony_ci         in.printf_bits(ccs_offset, 2, "colour component selector = %d", colour_component_selector);
1532bf215546Sopenharmony_ci   } else {
1533bf215546Sopenharmony_ci      colour_component_selector = 0;
1534bf215546Sopenharmony_ci   }
1535bf215546Sopenharmony_ci
1536bf215546Sopenharmony_ci
1537bf215546Sopenharmony_ci   if (VERBOSE_DECODE)
1538bf215546Sopenharmony_ci      in.printf_bits(128 - weight_bits, weight_bits, "weights (%d bits)", weight_bits);
1539bf215546Sopenharmony_ci
1540bf215546Sopenharmony_ci   if (num_weights > 64)
1541bf215546Sopenharmony_ci      return decode_error::invalid_num_weights;
1542bf215546Sopenharmony_ci
1543bf215546Sopenharmony_ci   if (weight_bits < 24 || weight_bits > 96)
1544bf215546Sopenharmony_ci      return decode_error::invalid_weight_bits;
1545bf215546Sopenharmony_ci
1546bf215546Sopenharmony_ci   unpack_weights(in);
1547bf215546Sopenharmony_ci
1548bf215546Sopenharmony_ci   unquantise_weights();
1549bf215546Sopenharmony_ci
1550bf215546Sopenharmony_ci   if (VERBOSE_DECODE) {
1551bf215546Sopenharmony_ci      printf("weights=[");
1552bf215546Sopenharmony_ci      for (int i = 0; i < num_weights; ++i) {
1553bf215546Sopenharmony_ci         if (i)
1554bf215546Sopenharmony_ci            printf(", ");
1555bf215546Sopenharmony_ci         printf("%d", weights[i]);
1556bf215546Sopenharmony_ci      }
1557bf215546Sopenharmony_ci      printf("]\n");
1558bf215546Sopenharmony_ci
1559bf215546Sopenharmony_ci      for (int plane = 0; plane <= dual_plane; ++plane) {
1560bf215546Sopenharmony_ci         printf("weights (plane %d):\n", plane);
1561bf215546Sopenharmony_ci         int i = 0;
1562bf215546Sopenharmony_ci         (void)i;
1563bf215546Sopenharmony_ci
1564bf215546Sopenharmony_ci         for (int r = 0; r < wt_d; ++r) {
1565bf215546Sopenharmony_ci            for (int t = 0; t < wt_h; ++t) {
1566bf215546Sopenharmony_ci               for (int s = 0; s < wt_w; ++s) {
1567bf215546Sopenharmony_ci                  printf("%3d", weights[i++ * (1 + dual_plane) + plane]);
1568bf215546Sopenharmony_ci               }
1569bf215546Sopenharmony_ci               printf("\n");
1570bf215546Sopenharmony_ci            }
1571bf215546Sopenharmony_ci            if (r < wt_d - 1)
1572bf215546Sopenharmony_ci               printf("\n");
1573bf215546Sopenharmony_ci         }
1574bf215546Sopenharmony_ci      }
1575bf215546Sopenharmony_ci   }
1576bf215546Sopenharmony_ci
1577bf215546Sopenharmony_ci   compute_infill_weights(decoder.block_w, decoder.block_h, decoder.block_d);
1578bf215546Sopenharmony_ci
1579bf215546Sopenharmony_ci   if (VERBOSE_DECODE) {
1580bf215546Sopenharmony_ci      for (int plane = 0; plane <= dual_plane; ++plane) {
1581bf215546Sopenharmony_ci         printf("infilled weights (plane %d):\n", plane);
1582bf215546Sopenharmony_ci         int i = 0;
1583bf215546Sopenharmony_ci         (void)i;
1584bf215546Sopenharmony_ci
1585bf215546Sopenharmony_ci         for (int r = 0; r < decoder.block_d; ++r) {
1586bf215546Sopenharmony_ci            for (int t = 0; t < decoder.block_h; ++t) {
1587bf215546Sopenharmony_ci               for (int s = 0; s < decoder.block_w; ++s) {
1588bf215546Sopenharmony_ci                  printf("%3d", infill_weights[plane][i++]);
1589bf215546Sopenharmony_ci               }
1590bf215546Sopenharmony_ci               printf("\n");
1591bf215546Sopenharmony_ci            }
1592bf215546Sopenharmony_ci            if (r < decoder.block_d - 1)
1593bf215546Sopenharmony_ci               printf("\n");
1594bf215546Sopenharmony_ci         }
1595bf215546Sopenharmony_ci      }
1596bf215546Sopenharmony_ci   }
1597bf215546Sopenharmony_ci   if (VERBOSE_DECODE)
1598bf215546Sopenharmony_ci      printf("\n");
1599bf215546Sopenharmony_ci
1600bf215546Sopenharmony_ci   return decode_error::ok;
1601bf215546Sopenharmony_ci}
1602bf215546Sopenharmony_ci
1603bf215546Sopenharmony_civoid Block::write_decoded(const Decoder &decoder, uint16_t *output)
1604bf215546Sopenharmony_ci{
1605bf215546Sopenharmony_ci   /* sRGB can only be stored as unorm8. */
1606bf215546Sopenharmony_ci   assert(!decoder.srgb || decoder.output_unorm8);
1607bf215546Sopenharmony_ci
1608bf215546Sopenharmony_ci   if (is_void_extent) {
1609bf215546Sopenharmony_ci      for (int idx = 0; idx < decoder.block_w*decoder.block_h*decoder.block_d; ++idx) {
1610bf215546Sopenharmony_ci         if (decoder.output_unorm8) {
1611bf215546Sopenharmony_ci            output[idx*4+0] = void_extent_colour_r >> 8;
1612bf215546Sopenharmony_ci            output[idx*4+1] = void_extent_colour_g >> 8;
1613bf215546Sopenharmony_ci            output[idx*4+2] = void_extent_colour_b >> 8;
1614bf215546Sopenharmony_ci            output[idx*4+3] = void_extent_colour_a >> 8;
1615bf215546Sopenharmony_ci         } else {
1616bf215546Sopenharmony_ci            /* Store the color as FP16. */
1617bf215546Sopenharmony_ci            output[idx*4+0] = _mesa_uint16_div_64k_to_half(void_extent_colour_r);
1618bf215546Sopenharmony_ci            output[idx*4+1] = _mesa_uint16_div_64k_to_half(void_extent_colour_g);
1619bf215546Sopenharmony_ci            output[idx*4+2] = _mesa_uint16_div_64k_to_half(void_extent_colour_b);
1620bf215546Sopenharmony_ci            output[idx*4+3] = _mesa_uint16_div_64k_to_half(void_extent_colour_a);
1621bf215546Sopenharmony_ci         }
1622bf215546Sopenharmony_ci      }
1623bf215546Sopenharmony_ci      return;
1624bf215546Sopenharmony_ci   }
1625bf215546Sopenharmony_ci
1626bf215546Sopenharmony_ci   int small_block = (decoder.block_w * decoder.block_h * decoder.block_d) < 31;
1627bf215546Sopenharmony_ci
1628bf215546Sopenharmony_ci   int idx = 0;
1629bf215546Sopenharmony_ci   for (int z = 0; z < decoder.block_d; ++z) {
1630bf215546Sopenharmony_ci      for (int y = 0; y < decoder.block_h; ++y) {
1631bf215546Sopenharmony_ci         for (int x = 0; x < decoder.block_w; ++x) {
1632bf215546Sopenharmony_ci
1633bf215546Sopenharmony_ci            int partition;
1634bf215546Sopenharmony_ci            if (num_parts > 1) {
1635bf215546Sopenharmony_ci               partition = select_partition(partition_index, x, y, z, num_parts, small_block);
1636bf215546Sopenharmony_ci               assert(partition < num_parts);
1637bf215546Sopenharmony_ci            } else {
1638bf215546Sopenharmony_ci               partition = 0;
1639bf215546Sopenharmony_ci            }
1640bf215546Sopenharmony_ci
1641bf215546Sopenharmony_ci            /* TODO: HDR */
1642bf215546Sopenharmony_ci
1643bf215546Sopenharmony_ci            uint8x4_t e0 = endpoints_decoded[0][partition];
1644bf215546Sopenharmony_ci            uint8x4_t e1 = endpoints_decoded[1][partition];
1645bf215546Sopenharmony_ci            uint16_t c0[4], c1[4];
1646bf215546Sopenharmony_ci
1647bf215546Sopenharmony_ci            /* Expand to 16 bits. */
1648bf215546Sopenharmony_ci            if (decoder.srgb) {
1649bf215546Sopenharmony_ci               c0[0] = (uint16_t)((e0.v[0] << 8) | 0x80);
1650bf215546Sopenharmony_ci               c0[1] = (uint16_t)((e0.v[1] << 8) | 0x80);
1651bf215546Sopenharmony_ci               c0[2] = (uint16_t)((e0.v[2] << 8) | 0x80);
1652bf215546Sopenharmony_ci               c0[3] = (uint16_t)((e0.v[3] << 8) | 0x80);
1653bf215546Sopenharmony_ci
1654bf215546Sopenharmony_ci               c1[0] = (uint16_t)((e1.v[0] << 8) | 0x80);
1655bf215546Sopenharmony_ci               c1[1] = (uint16_t)((e1.v[1] << 8) | 0x80);
1656bf215546Sopenharmony_ci               c1[2] = (uint16_t)((e1.v[2] << 8) | 0x80);
1657bf215546Sopenharmony_ci               c1[3] = (uint16_t)((e1.v[3] << 8) | 0x80);
1658bf215546Sopenharmony_ci            } else {
1659bf215546Sopenharmony_ci               c0[0] = (uint16_t)((e0.v[0] << 8) | e0.v[0]);
1660bf215546Sopenharmony_ci               c0[1] = (uint16_t)((e0.v[1] << 8) | e0.v[1]);
1661bf215546Sopenharmony_ci               c0[2] = (uint16_t)((e0.v[2] << 8) | e0.v[2]);
1662bf215546Sopenharmony_ci               c0[3] = (uint16_t)((e0.v[3] << 8) | e0.v[3]);
1663bf215546Sopenharmony_ci
1664bf215546Sopenharmony_ci               c1[0] = (uint16_t)((e1.v[0] << 8) | e1.v[0]);
1665bf215546Sopenharmony_ci               c1[1] = (uint16_t)((e1.v[1] << 8) | e1.v[1]);
1666bf215546Sopenharmony_ci               c1[2] = (uint16_t)((e1.v[2] << 8) | e1.v[2]);
1667bf215546Sopenharmony_ci               c1[3] = (uint16_t)((e1.v[3] << 8) | e1.v[3]);
1668bf215546Sopenharmony_ci            }
1669bf215546Sopenharmony_ci
1670bf215546Sopenharmony_ci            int w[4];
1671bf215546Sopenharmony_ci            if (dual_plane) {
1672bf215546Sopenharmony_ci               int w0 = infill_weights[0][idx];
1673bf215546Sopenharmony_ci               int w1 = infill_weights[1][idx];
1674bf215546Sopenharmony_ci               w[0] = w[1] = w[2] = w[3] = w0;
1675bf215546Sopenharmony_ci               w[colour_component_selector] = w1;
1676bf215546Sopenharmony_ci            } else {
1677bf215546Sopenharmony_ci               int w0 = infill_weights[0][idx];
1678bf215546Sopenharmony_ci               w[0] = w[1] = w[2] = w[3] = w0;
1679bf215546Sopenharmony_ci            }
1680bf215546Sopenharmony_ci
1681bf215546Sopenharmony_ci            /* Interpolate to produce UNORM16, applying weights. */
1682bf215546Sopenharmony_ci            uint16_t c[4] = {
1683bf215546Sopenharmony_ci               (uint16_t)((c0[0] * (64 - w[0]) + c1[0] * w[0] + 32) >> 6),
1684bf215546Sopenharmony_ci               (uint16_t)((c0[1] * (64 - w[1]) + c1[1] * w[1] + 32) >> 6),
1685bf215546Sopenharmony_ci               (uint16_t)((c0[2] * (64 - w[2]) + c1[2] * w[2] + 32) >> 6),
1686bf215546Sopenharmony_ci               (uint16_t)((c0[3] * (64 - w[3]) + c1[3] * w[3] + 32) >> 6),
1687bf215546Sopenharmony_ci            };
1688bf215546Sopenharmony_ci
1689bf215546Sopenharmony_ci            if (decoder.output_unorm8) {
1690bf215546Sopenharmony_ci               output[idx*4+0] = c[0] >> 8;
1691bf215546Sopenharmony_ci               output[idx*4+1] = c[1] >> 8;
1692bf215546Sopenharmony_ci               output[idx*4+2] = c[2] >> 8;
1693bf215546Sopenharmony_ci               output[idx*4+3] = c[3] >> 8;
1694bf215546Sopenharmony_ci            } else {
1695bf215546Sopenharmony_ci               /* Store the color as FP16. */
1696bf215546Sopenharmony_ci               output[idx*4+0] = c[0] == 65535 ? FP16_ONE : _mesa_uint16_div_64k_to_half(c[0]);
1697bf215546Sopenharmony_ci               output[idx*4+1] = c[1] == 65535 ? FP16_ONE : _mesa_uint16_div_64k_to_half(c[1]);
1698bf215546Sopenharmony_ci               output[idx*4+2] = c[2] == 65535 ? FP16_ONE : _mesa_uint16_div_64k_to_half(c[2]);
1699bf215546Sopenharmony_ci               output[idx*4+3] = c[3] == 65535 ? FP16_ONE : _mesa_uint16_div_64k_to_half(c[3]);
1700bf215546Sopenharmony_ci            }
1701bf215546Sopenharmony_ci
1702bf215546Sopenharmony_ci            idx++;
1703bf215546Sopenharmony_ci         }
1704bf215546Sopenharmony_ci      }
1705bf215546Sopenharmony_ci   }
1706bf215546Sopenharmony_ci}
1707bf215546Sopenharmony_ci
1708bf215546Sopenharmony_civoid Block::calculate_from_weights()
1709bf215546Sopenharmony_ci{
1710bf215546Sopenharmony_ci   wt_trits = 0;
1711bf215546Sopenharmony_ci   wt_quints = 0;
1712bf215546Sopenharmony_ci   wt_bits = 0;
1713bf215546Sopenharmony_ci   switch (high_prec) {
1714bf215546Sopenharmony_ci   case 0:
1715bf215546Sopenharmony_ci      switch (wt_range) {
1716bf215546Sopenharmony_ci      case 0x2: wt_max = 1; wt_bits = 1; break;
1717bf215546Sopenharmony_ci      case 0x3: wt_max = 2; wt_trits = 1; break;
1718bf215546Sopenharmony_ci      case 0x4: wt_max = 3; wt_bits = 2; break;
1719bf215546Sopenharmony_ci      case 0x5: wt_max = 4; wt_quints = 1; break;
1720bf215546Sopenharmony_ci      case 0x6: wt_max = 5; wt_trits = 1; wt_bits = 1; break;
1721bf215546Sopenharmony_ci      case 0x7: wt_max = 7; wt_bits = 3; break;
1722bf215546Sopenharmony_ci      default: abort();
1723bf215546Sopenharmony_ci      }
1724bf215546Sopenharmony_ci      break;
1725bf215546Sopenharmony_ci   case 1:
1726bf215546Sopenharmony_ci      switch (wt_range) {
1727bf215546Sopenharmony_ci      case 0x2: wt_max = 9; wt_quints = 1; wt_bits = 1; break;
1728bf215546Sopenharmony_ci      case 0x3: wt_max = 11; wt_trits = 1; wt_bits = 2; break;
1729bf215546Sopenharmony_ci      case 0x4: wt_max = 15; wt_bits = 4; break;
1730bf215546Sopenharmony_ci      case 0x5: wt_max = 19; wt_quints = 1; wt_bits = 2; break;
1731bf215546Sopenharmony_ci      case 0x6: wt_max = 23; wt_trits = 1; wt_bits = 3; break;
1732bf215546Sopenharmony_ci      case 0x7: wt_max = 31; wt_bits = 5; break;
1733bf215546Sopenharmony_ci      default: abort();
1734bf215546Sopenharmony_ci      }
1735bf215546Sopenharmony_ci      break;
1736bf215546Sopenharmony_ci   }
1737bf215546Sopenharmony_ci
1738bf215546Sopenharmony_ci   assert(wt_trits || wt_quints || wt_bits);
1739bf215546Sopenharmony_ci
1740bf215546Sopenharmony_ci   num_weights = wt_w * wt_h * wt_d;
1741bf215546Sopenharmony_ci
1742bf215546Sopenharmony_ci   if (dual_plane)
1743bf215546Sopenharmony_ci      num_weights *= 2;
1744bf215546Sopenharmony_ci
1745bf215546Sopenharmony_ci   weight_bits =
1746bf215546Sopenharmony_ci         (num_weights * 8 * wt_trits + 4) / 5
1747bf215546Sopenharmony_ci         + (num_weights * 7 * wt_quints + 2) / 3
1748bf215546Sopenharmony_ci         +  num_weights * wt_bits;
1749bf215546Sopenharmony_ci}
1750bf215546Sopenharmony_ci
1751bf215546Sopenharmony_civoid Block::calculate_remaining_bits()
1752bf215546Sopenharmony_ci{
1753bf215546Sopenharmony_ci   int config_bits;
1754bf215546Sopenharmony_ci   if (num_parts > 1) {
1755bf215546Sopenharmony_ci      if (!is_multi_cem)
1756bf215546Sopenharmony_ci         config_bits = 29;
1757bf215546Sopenharmony_ci      else
1758bf215546Sopenharmony_ci         config_bits = 25 + 3 * num_parts;
1759bf215546Sopenharmony_ci   } else {
1760bf215546Sopenharmony_ci      config_bits = 17;
1761bf215546Sopenharmony_ci   }
1762bf215546Sopenharmony_ci
1763bf215546Sopenharmony_ci   if (dual_plane)
1764bf215546Sopenharmony_ci      config_bits += 2;
1765bf215546Sopenharmony_ci
1766bf215546Sopenharmony_ci   remaining_bits = 128 - config_bits - weight_bits;
1767bf215546Sopenharmony_ci}
1768bf215546Sopenharmony_ci
1769bf215546Sopenharmony_cidecode_error::type Block::calculate_colour_endpoints_size()
1770bf215546Sopenharmony_ci{
1771bf215546Sopenharmony_ci   /* Specified as illegal */
1772bf215546Sopenharmony_ci   if (remaining_bits < (13 * num_cem_values + 4) / 5) {
1773bf215546Sopenharmony_ci      colour_endpoint_bits = ce_max = ce_trits = ce_quints = ce_bits = 0;
1774bf215546Sopenharmony_ci      return decode_error::invalid_colour_endpoints_size;
1775bf215546Sopenharmony_ci   }
1776bf215546Sopenharmony_ci
1777bf215546Sopenharmony_ci   /* Find the largest cem_ranges that fits within remaining_bits */
1778bf215546Sopenharmony_ci   for (int i = ARRAY_SIZE(cem_ranges)-1; i >= 0; --i) {
1779bf215546Sopenharmony_ci      int cem_bits;
1780bf215546Sopenharmony_ci      cem_bits = (num_cem_values * 8 * cem_ranges[i].t + 4) / 5
1781bf215546Sopenharmony_ci                 + (num_cem_values * 7 * cem_ranges[i].q + 2) / 3
1782bf215546Sopenharmony_ci                 +  num_cem_values * cem_ranges[i].b;
1783bf215546Sopenharmony_ci
1784bf215546Sopenharmony_ci      if (cem_bits <= remaining_bits)
1785bf215546Sopenharmony_ci      {
1786bf215546Sopenharmony_ci         colour_endpoint_bits = cem_bits;
1787bf215546Sopenharmony_ci         ce_max = cem_ranges[i].max;
1788bf215546Sopenharmony_ci         ce_trits = cem_ranges[i].t;
1789bf215546Sopenharmony_ci         ce_quints = cem_ranges[i].q;
1790bf215546Sopenharmony_ci         ce_bits = cem_ranges[i].b;
1791bf215546Sopenharmony_ci         return decode_error::ok;
1792bf215546Sopenharmony_ci      }
1793bf215546Sopenharmony_ci   }
1794bf215546Sopenharmony_ci
1795bf215546Sopenharmony_ci   assert(0);
1796bf215546Sopenharmony_ci   return decode_error::invalid_colour_endpoints_size;
1797bf215546Sopenharmony_ci}
1798bf215546Sopenharmony_ci
1799bf215546Sopenharmony_ci/**
1800bf215546Sopenharmony_ci * Decode ASTC 2D LDR texture data.
1801bf215546Sopenharmony_ci *
1802bf215546Sopenharmony_ci * \param src_width in pixels
1803bf215546Sopenharmony_ci * \param src_height in pixels
1804bf215546Sopenharmony_ci * \param dst_stride in bytes
1805bf215546Sopenharmony_ci */
1806bf215546Sopenharmony_ciextern "C" void
1807bf215546Sopenharmony_ci_mesa_unpack_astc_2d_ldr(uint8_t *dst_row,
1808bf215546Sopenharmony_ci                         unsigned dst_stride,
1809bf215546Sopenharmony_ci                         const uint8_t *src_row,
1810bf215546Sopenharmony_ci                         unsigned src_stride,
1811bf215546Sopenharmony_ci                         unsigned src_width,
1812bf215546Sopenharmony_ci                         unsigned src_height,
1813bf215546Sopenharmony_ci                         mesa_format format)
1814bf215546Sopenharmony_ci{
1815bf215546Sopenharmony_ci   assert(_mesa_is_format_astc_2d(format));
1816bf215546Sopenharmony_ci   bool srgb = _mesa_is_format_srgb(format);
1817bf215546Sopenharmony_ci
1818bf215546Sopenharmony_ci   unsigned blk_w, blk_h;
1819bf215546Sopenharmony_ci   _mesa_get_format_block_size(format, &blk_w, &blk_h);
1820bf215546Sopenharmony_ci
1821bf215546Sopenharmony_ci   const unsigned block_size = 16;
1822bf215546Sopenharmony_ci   unsigned x_blocks = (src_width + blk_w - 1) / blk_w;
1823bf215546Sopenharmony_ci   unsigned y_blocks = (src_height + blk_h - 1) / blk_h;
1824bf215546Sopenharmony_ci
1825bf215546Sopenharmony_ci   Decoder dec(blk_w, blk_h, 1, srgb, true);
1826bf215546Sopenharmony_ci
1827bf215546Sopenharmony_ci   for (unsigned y = 0; y < y_blocks; ++y) {
1828bf215546Sopenharmony_ci      for (unsigned x = 0; x < x_blocks; ++x) {
1829bf215546Sopenharmony_ci         /* Same size as the largest block. */
1830bf215546Sopenharmony_ci         uint16_t block_out[12 * 12 * 4];
1831bf215546Sopenharmony_ci
1832bf215546Sopenharmony_ci         dec.decode(src_row + x * block_size, block_out);
1833bf215546Sopenharmony_ci
1834bf215546Sopenharmony_ci         /* This can be smaller with NPOT dimensions. */
1835bf215546Sopenharmony_ci         unsigned dst_blk_w = MIN2(blk_w, src_width  - x*blk_w);
1836bf215546Sopenharmony_ci         unsigned dst_blk_h = MIN2(blk_h, src_height - y*blk_h);
1837bf215546Sopenharmony_ci
1838bf215546Sopenharmony_ci         for (unsigned sub_y = 0; sub_y < dst_blk_h; ++sub_y) {
1839bf215546Sopenharmony_ci            for (unsigned sub_x = 0; sub_x < dst_blk_w; ++sub_x) {
1840bf215546Sopenharmony_ci               uint8_t *dst = dst_row + sub_y * dst_stride +
1841bf215546Sopenharmony_ci                              (x * blk_w + sub_x) * 4;
1842bf215546Sopenharmony_ci               const uint16_t *src = &block_out[(sub_y * blk_w + sub_x) * 4];
1843bf215546Sopenharmony_ci
1844bf215546Sopenharmony_ci               dst[0] = src[0];
1845bf215546Sopenharmony_ci               dst[1] = src[1];
1846bf215546Sopenharmony_ci               dst[2] = src[2];
1847bf215546Sopenharmony_ci               dst[3] = src[3];
1848bf215546Sopenharmony_ci            }
1849bf215546Sopenharmony_ci         }
1850bf215546Sopenharmony_ci      }
1851bf215546Sopenharmony_ci      src_row += src_stride;
1852bf215546Sopenharmony_ci      dst_row += dst_stride * blk_h;
1853bf215546Sopenharmony_ci   }
1854bf215546Sopenharmony_ci}
1855