1cc1dc7a3Sopenharmony_ci// SPDX-License-Identifier: Apache-2.0 2cc1dc7a3Sopenharmony_ci// ---------------------------------------------------------------------------- 3cc1dc7a3Sopenharmony_ci// Copyright 2011-2024 Arm Limited 4cc1dc7a3Sopenharmony_ci// 5cc1dc7a3Sopenharmony_ci// Licensed under the Apache License, Version 2.0 (the "License"); you may not 6cc1dc7a3Sopenharmony_ci// use this file except in compliance with the License. You may obtain a copy 7cc1dc7a3Sopenharmony_ci// of the License at: 8cc1dc7a3Sopenharmony_ci// 9cc1dc7a3Sopenharmony_ci// http://www.apache.org/licenses/LICENSE-2.0 10cc1dc7a3Sopenharmony_ci// 11cc1dc7a3Sopenharmony_ci// Unless required by applicable law or agreed to in writing, software 12cc1dc7a3Sopenharmony_ci// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 13cc1dc7a3Sopenharmony_ci// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 14cc1dc7a3Sopenharmony_ci// License for the specific language governing permissions and limitations 15cc1dc7a3Sopenharmony_ci// under the License. 16cc1dc7a3Sopenharmony_ci// ---------------------------------------------------------------------------- 17cc1dc7a3Sopenharmony_ci 18cc1dc7a3Sopenharmony_ci#if !defined(ASTCENC_DECOMPRESS_ONLY) 19cc1dc7a3Sopenharmony_ci 20cc1dc7a3Sopenharmony_ci/** 21cc1dc7a3Sopenharmony_ci * @brief Functions for angular-sum algorithm for weight alignment. 22cc1dc7a3Sopenharmony_ci * 23cc1dc7a3Sopenharmony_ci * This algorithm works as follows: 24cc1dc7a3Sopenharmony_ci * - we compute a complex number P as (cos s*i, sin s*i) for each weight, 25cc1dc7a3Sopenharmony_ci * where i is the input value and s is a scaling factor based on the spacing between the weights. 26cc1dc7a3Sopenharmony_ci * - we then add together complex numbers for all the weights. 27cc1dc7a3Sopenharmony_ci * - we then compute the length and angle of the resulting sum. 28cc1dc7a3Sopenharmony_ci * 29cc1dc7a3Sopenharmony_ci * This should produce the following results: 30cc1dc7a3Sopenharmony_ci * - perfect alignment results in a vector whose length is equal to the sum of lengths of all inputs 31cc1dc7a3Sopenharmony_ci * - even distribution results in a vector of length 0. 32cc1dc7a3Sopenharmony_ci * - all samples identical results in perfect alignment for every scaling. 33cc1dc7a3Sopenharmony_ci * 34cc1dc7a3Sopenharmony_ci * For each scaling factor within a given set, we compute an alignment factor from 0 to 1. This 35cc1dc7a3Sopenharmony_ci * should then result in some scalings standing out as having particularly good alignment factors; 36cc1dc7a3Sopenharmony_ci * we can use this to produce a set of candidate scale/shift values for various quantization levels; 37cc1dc7a3Sopenharmony_ci * we should then actually try them and see what happens. 38cc1dc7a3Sopenharmony_ci */ 39cc1dc7a3Sopenharmony_ci 40cc1dc7a3Sopenharmony_ci#include "astcenc_internal.h" 41cc1dc7a3Sopenharmony_ci#include "astcenc_vecmathlib.h" 42cc1dc7a3Sopenharmony_ci 43cc1dc7a3Sopenharmony_ci#include <stdio.h> 44cc1dc7a3Sopenharmony_ci#include <cassert> 45cc1dc7a3Sopenharmony_ci#include <cstring> 46cc1dc7a3Sopenharmony_ci 47cc1dc7a3Sopenharmony_cistatic constexpr unsigned int ANGULAR_STEPS { 32 }; 48cc1dc7a3Sopenharmony_ci 49cc1dc7a3Sopenharmony_cistatic_assert((ANGULAR_STEPS % ASTCENC_SIMD_WIDTH) == 0, 50cc1dc7a3Sopenharmony_ci "ANGULAR_STEPS must be multiple of ASTCENC_SIMD_WIDTH"); 51cc1dc7a3Sopenharmony_ci 52cc1dc7a3Sopenharmony_cistatic_assert(ANGULAR_STEPS >= 32, 53cc1dc7a3Sopenharmony_ci "ANGULAR_STEPS must be at least max(steps_for_quant_level)"); 54cc1dc7a3Sopenharmony_ci 55cc1dc7a3Sopenharmony_ci// Store a reduced sin/cos table for 64 possible weight values; this causes 56cc1dc7a3Sopenharmony_ci// slight quality loss compared to using sin() and cos() directly. Must be 2^N. 57cc1dc7a3Sopenharmony_cistatic constexpr unsigned int SINCOS_STEPS { 64 }; 58cc1dc7a3Sopenharmony_ci 59cc1dc7a3Sopenharmony_cistatic const uint8_t steps_for_quant_level[12] { 60cc1dc7a3Sopenharmony_ci 2, 3, 4, 5, 6, 8, 10, 12, 16, 20, 24, 32 61cc1dc7a3Sopenharmony_ci}; 62cc1dc7a3Sopenharmony_ci 63cc1dc7a3Sopenharmony_ciASTCENC_ALIGNAS static float sin_table[SINCOS_STEPS][ANGULAR_STEPS]; 64cc1dc7a3Sopenharmony_ciASTCENC_ALIGNAS static float cos_table[SINCOS_STEPS][ANGULAR_STEPS]; 65cc1dc7a3Sopenharmony_ci 66cc1dc7a3Sopenharmony_ci#if defined(ASTCENC_DIAGNOSTICS) 67cc1dc7a3Sopenharmony_ci static bool print_once { true }; 68cc1dc7a3Sopenharmony_ci#endif 69cc1dc7a3Sopenharmony_ci 70cc1dc7a3Sopenharmony_ci/* See header for documentation. */ 71cc1dc7a3Sopenharmony_civoid prepare_angular_tables() 72cc1dc7a3Sopenharmony_ci{ 73cc1dc7a3Sopenharmony_ci for (unsigned int i = 0; i < ANGULAR_STEPS; i++) 74cc1dc7a3Sopenharmony_ci { 75cc1dc7a3Sopenharmony_ci float angle_step = static_cast<float>(i + 1); 76cc1dc7a3Sopenharmony_ci 77cc1dc7a3Sopenharmony_ci for (unsigned int j = 0; j < SINCOS_STEPS; j++) 78cc1dc7a3Sopenharmony_ci { 79cc1dc7a3Sopenharmony_ci sin_table[j][i] = static_cast<float>(sinf((2.0f * astc::PI / (SINCOS_STEPS - 1.0f)) * angle_step * static_cast<float>(j))); 80cc1dc7a3Sopenharmony_ci cos_table[j][i] = static_cast<float>(cosf((2.0f * astc::PI / (SINCOS_STEPS - 1.0f)) * angle_step * static_cast<float>(j))); 81cc1dc7a3Sopenharmony_ci } 82cc1dc7a3Sopenharmony_ci } 83cc1dc7a3Sopenharmony_ci} 84cc1dc7a3Sopenharmony_ci 85cc1dc7a3Sopenharmony_ci/** 86cc1dc7a3Sopenharmony_ci * @brief Compute the angular alignment factors and offsets. 87cc1dc7a3Sopenharmony_ci * 88cc1dc7a3Sopenharmony_ci * @param weight_count The number of (decimated) weights. 89cc1dc7a3Sopenharmony_ci * @param dec_weight_ideal_value The ideal decimated unquantized weight values. 90cc1dc7a3Sopenharmony_ci * @param max_angular_steps The maximum number of steps to be tested. 91cc1dc7a3Sopenharmony_ci * @param[out] offsets The output angular offsets array. 92cc1dc7a3Sopenharmony_ci */ 93cc1dc7a3Sopenharmony_cistatic void compute_angular_offsets( 94cc1dc7a3Sopenharmony_ci unsigned int weight_count, 95cc1dc7a3Sopenharmony_ci const float* dec_weight_ideal_value, 96cc1dc7a3Sopenharmony_ci unsigned int max_angular_steps, 97cc1dc7a3Sopenharmony_ci float* offsets 98cc1dc7a3Sopenharmony_ci) { 99cc1dc7a3Sopenharmony_ci promise(weight_count > 0); 100cc1dc7a3Sopenharmony_ci promise(max_angular_steps > 0); 101cc1dc7a3Sopenharmony_ci 102cc1dc7a3Sopenharmony_ci ASTCENC_ALIGNAS int isamplev[BLOCK_MAX_WEIGHTS]; 103cc1dc7a3Sopenharmony_ci 104cc1dc7a3Sopenharmony_ci // Precompute isample; arrays are always allocated 64 elements long 105cc1dc7a3Sopenharmony_ci for (unsigned int i = 0; i < weight_count; i += ASTCENC_SIMD_WIDTH) 106cc1dc7a3Sopenharmony_ci { 107cc1dc7a3Sopenharmony_ci // Add 2^23 and interpreting bits extracts round-to-nearest int 108cc1dc7a3Sopenharmony_ci vfloat sample = loada(dec_weight_ideal_value + i) * (SINCOS_STEPS - 1.0f) + vfloat(12582912.0f); 109cc1dc7a3Sopenharmony_ci vint isample = float_as_int(sample) & vint((SINCOS_STEPS - 1)); 110cc1dc7a3Sopenharmony_ci storea(isample, isamplev + i); 111cc1dc7a3Sopenharmony_ci } 112cc1dc7a3Sopenharmony_ci 113cc1dc7a3Sopenharmony_ci // Arrays are multiple of SIMD width (ANGULAR_STEPS), safe to overshoot max 114cc1dc7a3Sopenharmony_ci vfloat mult = vfloat(1.0f / (2.0f * astc::PI)); 115cc1dc7a3Sopenharmony_ci 116cc1dc7a3Sopenharmony_ci for (unsigned int i = 0; i < max_angular_steps; i += ASTCENC_SIMD_WIDTH) 117cc1dc7a3Sopenharmony_ci { 118cc1dc7a3Sopenharmony_ci vfloat anglesum_x = vfloat::zero(); 119cc1dc7a3Sopenharmony_ci vfloat anglesum_y = vfloat::zero(); 120cc1dc7a3Sopenharmony_ci 121cc1dc7a3Sopenharmony_ci for (unsigned int j = 0; j < weight_count; j++) 122cc1dc7a3Sopenharmony_ci { 123cc1dc7a3Sopenharmony_ci int isample = isamplev[j]; 124cc1dc7a3Sopenharmony_ci anglesum_x += loada(cos_table[isample] + i); 125cc1dc7a3Sopenharmony_ci anglesum_y += loada(sin_table[isample] + i); 126cc1dc7a3Sopenharmony_ci } 127cc1dc7a3Sopenharmony_ci 128cc1dc7a3Sopenharmony_ci vfloat angle = atan2(anglesum_y, anglesum_x); 129cc1dc7a3Sopenharmony_ci vfloat ofs = angle * mult; 130cc1dc7a3Sopenharmony_ci storea(ofs, offsets + i); 131cc1dc7a3Sopenharmony_ci } 132cc1dc7a3Sopenharmony_ci} 133cc1dc7a3Sopenharmony_ci 134cc1dc7a3Sopenharmony_ci/** 135cc1dc7a3Sopenharmony_ci * @brief For a given step size compute the lowest and highest weight. 136cc1dc7a3Sopenharmony_ci * 137cc1dc7a3Sopenharmony_ci * Compute the lowest and highest weight that results from quantizing using the given stepsize and 138cc1dc7a3Sopenharmony_ci * offset, and then compute the resulting error. The cut errors indicate the error that results from 139cc1dc7a3Sopenharmony_ci * forcing samples that should have had one weight value one step up or down. 140cc1dc7a3Sopenharmony_ci * 141cc1dc7a3Sopenharmony_ci * @param weight_count The number of (decimated) weights. 142cc1dc7a3Sopenharmony_ci * @param dec_weight_ideal_value The ideal decimated unquantized weight values. 143cc1dc7a3Sopenharmony_ci * @param max_angular_steps The maximum number of steps to be tested. 144cc1dc7a3Sopenharmony_ci * @param max_quant_steps The maximum quantization level to be tested. 145cc1dc7a3Sopenharmony_ci * @param offsets The angular offsets array. 146cc1dc7a3Sopenharmony_ci * @param[out] lowest_weight Per angular step, the lowest weight. 147cc1dc7a3Sopenharmony_ci * @param[out] weight_span Per angular step, the span between lowest and highest weight. 148cc1dc7a3Sopenharmony_ci * @param[out] error Per angular step, the error. 149cc1dc7a3Sopenharmony_ci * @param[out] cut_low_weight_error Per angular step, the low weight cut error. 150cc1dc7a3Sopenharmony_ci * @param[out] cut_high_weight_error Per angular step, the high weight cut error. 151cc1dc7a3Sopenharmony_ci */ 152cc1dc7a3Sopenharmony_ci#if ASTCENC_NEON != 0 153cc1dc7a3Sopenharmony_cistatic void compute_lowest_and_highest_weight( 154cc1dc7a3Sopenharmony_ci QualityProfile privateProfile, 155cc1dc7a3Sopenharmony_ci unsigned int weight_count, 156cc1dc7a3Sopenharmony_ci const float* dec_weight_ideal_value, 157cc1dc7a3Sopenharmony_ci unsigned int max_angular_steps, 158cc1dc7a3Sopenharmony_ci unsigned int max_quant_steps, 159cc1dc7a3Sopenharmony_ci const float* offsets, 160cc1dc7a3Sopenharmony_ci float* lowest_weight, 161cc1dc7a3Sopenharmony_ci int* weight_span, 162cc1dc7a3Sopenharmony_ci float* error, 163cc1dc7a3Sopenharmony_ci float* cut_low_weight_error, 164cc1dc7a3Sopenharmony_ci float* cut_high_weight_error 165cc1dc7a3Sopenharmony_ci) { 166cc1dc7a3Sopenharmony_ci promise(weight_count > 0); 167cc1dc7a3Sopenharmony_ci promise(max_angular_steps > 0); 168cc1dc7a3Sopenharmony_ci 169cc1dc7a3Sopenharmony_ci vfloat rcp_stepsize = vfloat::lane_id() + vfloat(1.0f); 170cc1dc7a3Sopenharmony_ci 171cc1dc7a3Sopenharmony_ci float max_weight = 1.0f; 172cc1dc7a3Sopenharmony_ci float min_weight = 0.0f; 173cc1dc7a3Sopenharmony_ci // in HIGH_SPEED_PROFILE, max_weight is always equal to 1.0, and min_weight is always equal to 0 174cc1dc7a3Sopenharmony_ci if (privateProfile != HIGH_SPEED_PROFILE) 175cc1dc7a3Sopenharmony_ci { 176cc1dc7a3Sopenharmony_ci max_weight = dec_weight_ideal_value[0]; 177cc1dc7a3Sopenharmony_ci min_weight = dec_weight_ideal_value[0]; 178cc1dc7a3Sopenharmony_ci for (unsigned int j = 1; j < weight_count; j++) 179cc1dc7a3Sopenharmony_ci { 180cc1dc7a3Sopenharmony_ci float weight = dec_weight_ideal_value[j]; 181cc1dc7a3Sopenharmony_ci __asm__ volatile("fmax %s0, %s0, %s1" : "+w"(max_weight) : "w"(weight)); 182cc1dc7a3Sopenharmony_ci __asm__ volatile("fmin %s0, %s0, %s1" : "+w"(min_weight) : "w"(weight)); 183cc1dc7a3Sopenharmony_ci } 184cc1dc7a3Sopenharmony_ci } 185cc1dc7a3Sopenharmony_ci 186cc1dc7a3Sopenharmony_ci // Arrays are ANGULAR_STEPS long, so always safe to run full vectors 187cc1dc7a3Sopenharmony_ci for (unsigned int sp = 0; sp < max_angular_steps; sp += ASTCENC_SIMD_WIDTH) 188cc1dc7a3Sopenharmony_ci { 189cc1dc7a3Sopenharmony_ci vfloat errval = vfloat::zero(); 190cc1dc7a3Sopenharmony_ci vfloat cut_low_weight_err = vfloat::zero(); 191cc1dc7a3Sopenharmony_ci vfloat cut_high_weight_err = vfloat::zero(); 192cc1dc7a3Sopenharmony_ci vfloat offset = loada(offsets + sp); 193cc1dc7a3Sopenharmony_ci 194cc1dc7a3Sopenharmony_ci offset = (vfloat)vnegq_f32(offset.m); 195cc1dc7a3Sopenharmony_ci vfloat maxidx = vfloat::zero(); 196cc1dc7a3Sopenharmony_ci vfloat minidx = vfloat::zero(); 197cc1dc7a3Sopenharmony_ci 198cc1dc7a3Sopenharmony_ci if (privateProfile == HIGH_SPEED_PROFILE) 199cc1dc7a3Sopenharmony_ci { 200cc1dc7a3Sopenharmony_ci maxidx = round((vfloat)vaddq_f32(rcp_stepsize.m, offset.m)); 201cc1dc7a3Sopenharmony_ci minidx = round(offset); 202cc1dc7a3Sopenharmony_ci } 203cc1dc7a3Sopenharmony_ci else 204cc1dc7a3Sopenharmony_ci { 205cc1dc7a3Sopenharmony_ci maxidx = round((vfloat)vfmaq_n_f32(offset.m, rcp_stepsize.m, max_weight)); 206cc1dc7a3Sopenharmony_ci minidx = round((vfloat)vfmaq_n_f32(offset.m, rcp_stepsize.m, min_weight)); 207cc1dc7a3Sopenharmony_ci } 208cc1dc7a3Sopenharmony_ci 209cc1dc7a3Sopenharmony_ci for (unsigned int j = 0; j < weight_count; j++) 210cc1dc7a3Sopenharmony_ci { 211cc1dc7a3Sopenharmony_ci vfloat sval = (vfloat)vfmaq_n_f32(offset.m, rcp_stepsize.m, *(dec_weight_ideal_value + j)); 212cc1dc7a3Sopenharmony_ci vfloat svalrte = round(sval); 213cc1dc7a3Sopenharmony_ci vfloat diff = sval - svalrte; 214cc1dc7a3Sopenharmony_ci errval += diff * diff; 215cc1dc7a3Sopenharmony_ci 216cc1dc7a3Sopenharmony_ci // Accumulate on min hit 217cc1dc7a3Sopenharmony_ci vmask mask = svalrte == minidx; 218cc1dc7a3Sopenharmony_ci vfloat accum = cut_low_weight_err + vfloat(1.0f) - vfloat(2.0f) * diff; 219cc1dc7a3Sopenharmony_ci cut_low_weight_err = select(cut_low_weight_err, accum, mask); 220cc1dc7a3Sopenharmony_ci 221cc1dc7a3Sopenharmony_ci // Accumulate on max hit 222cc1dc7a3Sopenharmony_ci mask = svalrte == maxidx; 223cc1dc7a3Sopenharmony_ci accum = cut_high_weight_err + vfloat(1.0f) + vfloat(2.0f) * diff; 224cc1dc7a3Sopenharmony_ci cut_high_weight_err = select(cut_high_weight_err, accum, mask); 225cc1dc7a3Sopenharmony_ci } 226cc1dc7a3Sopenharmony_ci 227cc1dc7a3Sopenharmony_ci // Write out min weight and weight span; clamp span to a usable range 228cc1dc7a3Sopenharmony_ci vint span = float_to_int(maxidx - minidx + vfloat(1)); 229cc1dc7a3Sopenharmony_ci span = min(span, vint(max_quant_steps + 3)); 230cc1dc7a3Sopenharmony_ci span = max(span, vint(2)); 231cc1dc7a3Sopenharmony_ci storea(minidx, lowest_weight + sp); 232cc1dc7a3Sopenharmony_ci storea(span, weight_span + sp); 233cc1dc7a3Sopenharmony_ci 234cc1dc7a3Sopenharmony_ci // The cut_(lowest/highest)_weight_error indicate the error that results from forcing 235cc1dc7a3Sopenharmony_ci // samples that should have had the weight value one step (up/down). 236cc1dc7a3Sopenharmony_ci vfloat ssize = 1.0f / rcp_stepsize; 237cc1dc7a3Sopenharmony_ci vfloat errscale = ssize * ssize; 238cc1dc7a3Sopenharmony_ci storea(errval * errscale, error + sp); 239cc1dc7a3Sopenharmony_ci storea(cut_low_weight_err * errscale, cut_low_weight_error + sp); 240cc1dc7a3Sopenharmony_ci storea(cut_high_weight_err * errscale, cut_high_weight_error + sp); 241cc1dc7a3Sopenharmony_ci 242cc1dc7a3Sopenharmony_ci rcp_stepsize = rcp_stepsize + vfloat(ASTCENC_SIMD_WIDTH); 243cc1dc7a3Sopenharmony_ci } 244cc1dc7a3Sopenharmony_ci} 245cc1dc7a3Sopenharmony_ci#else 246cc1dc7a3Sopenharmony_cistatic void compute_lowest_and_highest_weight( 247cc1dc7a3Sopenharmony_ci QualityProfile privateProfile, 248cc1dc7a3Sopenharmony_ci unsigned int weight_count, 249cc1dc7a3Sopenharmony_ci const float* dec_weight_ideal_value, 250cc1dc7a3Sopenharmony_ci unsigned int max_angular_steps, 251cc1dc7a3Sopenharmony_ci unsigned int max_quant_steps, 252cc1dc7a3Sopenharmony_ci const float* offsets, 253cc1dc7a3Sopenharmony_ci float* lowest_weight, 254cc1dc7a3Sopenharmony_ci int* weight_span, 255cc1dc7a3Sopenharmony_ci float* error, 256cc1dc7a3Sopenharmony_ci float* cut_low_weight_error, 257cc1dc7a3Sopenharmony_ci float* cut_high_weight_error 258cc1dc7a3Sopenharmony_ci) { 259cc1dc7a3Sopenharmony_ci (void) privateProfile; 260cc1dc7a3Sopenharmony_ci promise(weight_count > 0); 261cc1dc7a3Sopenharmony_ci promise(max_angular_steps > 0); 262cc1dc7a3Sopenharmony_ci 263cc1dc7a3Sopenharmony_ci vfloat rcp_stepsize = vfloat::lane_id() + vfloat(1.0f); 264cc1dc7a3Sopenharmony_ci 265cc1dc7a3Sopenharmony_ci // Arrays are ANGULAR_STEPS long, so always safe to run full vectors 266cc1dc7a3Sopenharmony_ci for (unsigned int sp = 0; sp < max_angular_steps; sp += ASTCENC_SIMD_WIDTH) 267cc1dc7a3Sopenharmony_ci { 268cc1dc7a3Sopenharmony_ci vfloat minidx(128.0f); 269cc1dc7a3Sopenharmony_ci vfloat maxidx(-128.0f); 270cc1dc7a3Sopenharmony_ci vfloat errval = vfloat::zero(); 271cc1dc7a3Sopenharmony_ci vfloat cut_low_weight_err = vfloat::zero(); 272cc1dc7a3Sopenharmony_ci vfloat cut_high_weight_err = vfloat::zero(); 273cc1dc7a3Sopenharmony_ci vfloat offset = loada(offsets + sp); 274cc1dc7a3Sopenharmony_ci 275cc1dc7a3Sopenharmony_ci for (unsigned int j = 0; j < weight_count; j++) 276cc1dc7a3Sopenharmony_ci { 277cc1dc7a3Sopenharmony_ci vfloat sval = load1(dec_weight_ideal_value + j) * rcp_stepsize - offset; 278cc1dc7a3Sopenharmony_ci vfloat svalrte = round(sval); 279cc1dc7a3Sopenharmony_ci vfloat diff = sval - svalrte; 280cc1dc7a3Sopenharmony_ci errval += diff * diff; 281cc1dc7a3Sopenharmony_ci 282cc1dc7a3Sopenharmony_ci // Reset tracker on min hit 283cc1dc7a3Sopenharmony_ci vmask mask = svalrte < minidx; 284cc1dc7a3Sopenharmony_ci minidx = select(minidx, svalrte, mask); 285cc1dc7a3Sopenharmony_ci cut_low_weight_err = select(cut_low_weight_err, vfloat::zero(), mask); 286cc1dc7a3Sopenharmony_ci 287cc1dc7a3Sopenharmony_ci // Accumulate on min hit 288cc1dc7a3Sopenharmony_ci mask = svalrte == minidx; 289cc1dc7a3Sopenharmony_ci vfloat accum = cut_low_weight_err + vfloat(1.0f) - vfloat(2.0f) * diff; 290cc1dc7a3Sopenharmony_ci cut_low_weight_err = select(cut_low_weight_err, accum, mask); 291cc1dc7a3Sopenharmony_ci 292cc1dc7a3Sopenharmony_ci // Reset tracker on max hit 293cc1dc7a3Sopenharmony_ci mask = svalrte > maxidx; 294cc1dc7a3Sopenharmony_ci maxidx = select(maxidx, svalrte, mask); 295cc1dc7a3Sopenharmony_ci cut_high_weight_err = select(cut_high_weight_err, vfloat::zero(), mask); 296cc1dc7a3Sopenharmony_ci 297cc1dc7a3Sopenharmony_ci // Accumulate on max hit 298cc1dc7a3Sopenharmony_ci mask = svalrte == maxidx; 299cc1dc7a3Sopenharmony_ci accum = cut_high_weight_err + vfloat(1.0f) + vfloat(2.0f) * diff; 300cc1dc7a3Sopenharmony_ci cut_high_weight_err = select(cut_high_weight_err, accum, mask); 301cc1dc7a3Sopenharmony_ci } 302cc1dc7a3Sopenharmony_ci 303cc1dc7a3Sopenharmony_ci // Write out min weight and weight span; clamp span to a usable range 304cc1dc7a3Sopenharmony_ci vint span = float_to_int(maxidx - minidx + vfloat(1)); 305cc1dc7a3Sopenharmony_ci span = min(span, vint(max_quant_steps + 3)); 306cc1dc7a3Sopenharmony_ci span = max(span, vint(2)); 307cc1dc7a3Sopenharmony_ci storea(minidx, lowest_weight + sp); 308cc1dc7a3Sopenharmony_ci storea(span, weight_span + sp); 309cc1dc7a3Sopenharmony_ci 310cc1dc7a3Sopenharmony_ci // The cut_(lowest/highest)_weight_error indicate the error that results from forcing 311cc1dc7a3Sopenharmony_ci // samples that should have had the weight value one step (up/down). 312cc1dc7a3Sopenharmony_ci vfloat ssize = 1.0f / rcp_stepsize; 313cc1dc7a3Sopenharmony_ci vfloat errscale = ssize * ssize; 314cc1dc7a3Sopenharmony_ci storea(errval * errscale, error + sp); 315cc1dc7a3Sopenharmony_ci storea(cut_low_weight_err * errscale, cut_low_weight_error + sp); 316cc1dc7a3Sopenharmony_ci storea(cut_high_weight_err * errscale, cut_high_weight_error + sp); 317cc1dc7a3Sopenharmony_ci 318cc1dc7a3Sopenharmony_ci rcp_stepsize = rcp_stepsize + vfloat(ASTCENC_SIMD_WIDTH); 319cc1dc7a3Sopenharmony_ci } 320cc1dc7a3Sopenharmony_ci} 321cc1dc7a3Sopenharmony_ci#endif 322cc1dc7a3Sopenharmony_ci 323cc1dc7a3Sopenharmony_ci/** 324cc1dc7a3Sopenharmony_ci * @brief The main function for the angular algorithm. 325cc1dc7a3Sopenharmony_ci * 326cc1dc7a3Sopenharmony_ci * @param weight_count The number of (decimated) weights. 327cc1dc7a3Sopenharmony_ci * @param dec_weight_ideal_value The ideal decimated unquantized weight values. 328cc1dc7a3Sopenharmony_ci * @param max_quant_level The maximum quantization level to be tested. 329cc1dc7a3Sopenharmony_ci * @param[out] low_value Per angular step, the lowest weight value. 330cc1dc7a3Sopenharmony_ci * @param[out] high_value Per angular step, the highest weight value. 331cc1dc7a3Sopenharmony_ci */ 332cc1dc7a3Sopenharmony_cistatic void compute_angular_endpoints_for_quant_levels( 333cc1dc7a3Sopenharmony_ci QualityProfile privateProfile, 334cc1dc7a3Sopenharmony_ci unsigned int weight_count, 335cc1dc7a3Sopenharmony_ci const float* dec_weight_ideal_value, 336cc1dc7a3Sopenharmony_ci unsigned int max_quant_level, 337cc1dc7a3Sopenharmony_ci float low_value[TUNE_MAX_ANGULAR_QUANT + 1], 338cc1dc7a3Sopenharmony_ci float high_value[TUNE_MAX_ANGULAR_QUANT + 1] 339cc1dc7a3Sopenharmony_ci) { 340cc1dc7a3Sopenharmony_ci unsigned int max_quant_steps = steps_for_quant_level[max_quant_level]; 341cc1dc7a3Sopenharmony_ci unsigned int max_angular_steps = steps_for_quant_level[max_quant_level]; 342cc1dc7a3Sopenharmony_ci 343cc1dc7a3Sopenharmony_ci ASTCENC_ALIGNAS float angular_offsets[ANGULAR_STEPS]; 344cc1dc7a3Sopenharmony_ci 345cc1dc7a3Sopenharmony_ci compute_angular_offsets(weight_count, dec_weight_ideal_value, 346cc1dc7a3Sopenharmony_ci max_angular_steps, angular_offsets); 347cc1dc7a3Sopenharmony_ci 348cc1dc7a3Sopenharmony_ci ASTCENC_ALIGNAS float lowest_weight[ANGULAR_STEPS]; 349cc1dc7a3Sopenharmony_ci ASTCENC_ALIGNAS int32_t weight_span[ANGULAR_STEPS]; 350cc1dc7a3Sopenharmony_ci ASTCENC_ALIGNAS float error[ANGULAR_STEPS]; 351cc1dc7a3Sopenharmony_ci ASTCENC_ALIGNAS float cut_low_weight_error[ANGULAR_STEPS]; 352cc1dc7a3Sopenharmony_ci ASTCENC_ALIGNAS float cut_high_weight_error[ANGULAR_STEPS]; 353cc1dc7a3Sopenharmony_ci 354cc1dc7a3Sopenharmony_ci compute_lowest_and_highest_weight(privateProfile, weight_count, dec_weight_ideal_value, 355cc1dc7a3Sopenharmony_ci max_angular_steps, max_quant_steps, 356cc1dc7a3Sopenharmony_ci angular_offsets, lowest_weight, weight_span, error, 357cc1dc7a3Sopenharmony_ci cut_low_weight_error, cut_high_weight_error); 358cc1dc7a3Sopenharmony_ci 359cc1dc7a3Sopenharmony_ci // For each quantization level, find the best error terms. Use packed vectors so data-dependent 360cc1dc7a3Sopenharmony_ci // branches can become selects. This involves some integer to float casts, but the values are 361cc1dc7a3Sopenharmony_ci // small enough so they never round the wrong way. 362cc1dc7a3Sopenharmony_ci vfloat4 best_results[36]; 363cc1dc7a3Sopenharmony_ci 364cc1dc7a3Sopenharmony_ci // Initialize the array to some safe defaults 365cc1dc7a3Sopenharmony_ci promise(max_quant_steps > 0); 366cc1dc7a3Sopenharmony_ci for (unsigned int i = 0; i < (max_quant_steps + 4); i++) 367cc1dc7a3Sopenharmony_ci { 368cc1dc7a3Sopenharmony_ci // Lane<0> = Best error 369cc1dc7a3Sopenharmony_ci // Lane<1> = Best scale; -1 indicates no solution found 370cc1dc7a3Sopenharmony_ci // Lane<2> = Cut low weight 371cc1dc7a3Sopenharmony_ci best_results[i] = vfloat4(ERROR_CALC_DEFAULT, -1.0f, 0.0f, 0.0f); 372cc1dc7a3Sopenharmony_ci } 373cc1dc7a3Sopenharmony_ci 374cc1dc7a3Sopenharmony_ci promise(max_angular_steps > 0); 375cc1dc7a3Sopenharmony_ci for (unsigned int i = 0; i < max_angular_steps; i++) 376cc1dc7a3Sopenharmony_ci { 377cc1dc7a3Sopenharmony_ci float i_flt = static_cast<float>(i); 378cc1dc7a3Sopenharmony_ci 379cc1dc7a3Sopenharmony_ci int idx_span = weight_span[i]; 380cc1dc7a3Sopenharmony_ci 381cc1dc7a3Sopenharmony_ci float error_cut_low = error[i] + cut_low_weight_error[i]; 382cc1dc7a3Sopenharmony_ci float error_cut_high = error[i] + cut_high_weight_error[i]; 383cc1dc7a3Sopenharmony_ci float error_cut_low_high = error[i] + cut_low_weight_error[i] + cut_high_weight_error[i]; 384cc1dc7a3Sopenharmony_ci 385cc1dc7a3Sopenharmony_ci // Check best error against record N 386cc1dc7a3Sopenharmony_ci vfloat4 best_result = best_results[idx_span]; 387cc1dc7a3Sopenharmony_ci vfloat4 new_result = vfloat4(error[i], i_flt, 0.0f, 0.0f); 388cc1dc7a3Sopenharmony_ci vmask4 mask = vfloat4(best_result.lane<0>()) > vfloat4(error[i]); 389cc1dc7a3Sopenharmony_ci best_results[idx_span] = select(best_result, new_result, mask); 390cc1dc7a3Sopenharmony_ci 391cc1dc7a3Sopenharmony_ci // Check best error against record N-1 with either cut low or cut high 392cc1dc7a3Sopenharmony_ci best_result = best_results[idx_span - 1]; 393cc1dc7a3Sopenharmony_ci 394cc1dc7a3Sopenharmony_ci new_result = vfloat4(error_cut_low, i_flt, 1.0f, 0.0f); 395cc1dc7a3Sopenharmony_ci mask = vfloat4(best_result.lane<0>()) > vfloat4(error_cut_low); 396cc1dc7a3Sopenharmony_ci best_result = select(best_result, new_result, mask); 397cc1dc7a3Sopenharmony_ci 398cc1dc7a3Sopenharmony_ci new_result = vfloat4(error_cut_high, i_flt, 0.0f, 0.0f); 399cc1dc7a3Sopenharmony_ci mask = vfloat4(best_result.lane<0>()) > vfloat4(error_cut_high); 400cc1dc7a3Sopenharmony_ci best_results[idx_span - 1] = select(best_result, new_result, mask); 401cc1dc7a3Sopenharmony_ci 402cc1dc7a3Sopenharmony_ci // Check best error against record N-2 with both cut low and high 403cc1dc7a3Sopenharmony_ci best_result = best_results[idx_span - 2]; 404cc1dc7a3Sopenharmony_ci new_result = vfloat4(error_cut_low_high, i_flt, 1.0f, 0.0f); 405cc1dc7a3Sopenharmony_ci mask = vfloat4(best_result.lane<0>()) > vfloat4(error_cut_low_high); 406cc1dc7a3Sopenharmony_ci best_results[idx_span - 2] = select(best_result, new_result, mask); 407cc1dc7a3Sopenharmony_ci } 408cc1dc7a3Sopenharmony_ci 409cc1dc7a3Sopenharmony_ci for (unsigned int i = 0; i <= max_quant_level; i++) 410cc1dc7a3Sopenharmony_ci { 411cc1dc7a3Sopenharmony_ci unsigned int q = steps_for_quant_level[i]; 412cc1dc7a3Sopenharmony_ci int bsi = static_cast<int>(best_results[q].lane<1>()); 413cc1dc7a3Sopenharmony_ci 414cc1dc7a3Sopenharmony_ci // Did we find anything? 415cc1dc7a3Sopenharmony_ci#if defined(ASTCENC_DIAGNOSTICS) 416cc1dc7a3Sopenharmony_ci if ((bsi < 0) && print_once) 417cc1dc7a3Sopenharmony_ci { 418cc1dc7a3Sopenharmony_ci print_once = false; 419cc1dc7a3Sopenharmony_ci printf("INFO: Unable to find full encoding within search error limit.\n\n"); 420cc1dc7a3Sopenharmony_ci } 421cc1dc7a3Sopenharmony_ci#endif 422cc1dc7a3Sopenharmony_ci 423cc1dc7a3Sopenharmony_ci bsi = astc::max(0, bsi); 424cc1dc7a3Sopenharmony_ci 425cc1dc7a3Sopenharmony_ci float lwi = lowest_weight[bsi] + best_results[q].lane<2>(); 426cc1dc7a3Sopenharmony_ci float hwi = lwi + static_cast<float>(q) - 1.0f; 427cc1dc7a3Sopenharmony_ci 428cc1dc7a3Sopenharmony_ci float stepsize = 1.0f / (1.0f + static_cast<float>(bsi)); 429cc1dc7a3Sopenharmony_ci low_value[i] = (angular_offsets[bsi] + lwi) * stepsize; 430cc1dc7a3Sopenharmony_ci high_value[i] = (angular_offsets[bsi] + hwi) * stepsize; 431cc1dc7a3Sopenharmony_ci } 432cc1dc7a3Sopenharmony_ci} 433cc1dc7a3Sopenharmony_ci 434cc1dc7a3Sopenharmony_ci/* See header for documentation. */ 435cc1dc7a3Sopenharmony_civoid compute_angular_endpoints_1plane( 436cc1dc7a3Sopenharmony_ci QualityProfile privateProfile, 437cc1dc7a3Sopenharmony_ci bool only_always, 438cc1dc7a3Sopenharmony_ci const block_size_descriptor& bsd, 439cc1dc7a3Sopenharmony_ci const float* dec_weight_ideal_value, 440cc1dc7a3Sopenharmony_ci unsigned int max_weight_quant, 441cc1dc7a3Sopenharmony_ci compression_working_buffers& tmpbuf 442cc1dc7a3Sopenharmony_ci) { 443cc1dc7a3Sopenharmony_ci float (&low_value)[WEIGHTS_MAX_BLOCK_MODES] = tmpbuf.weight_low_value1; 444cc1dc7a3Sopenharmony_ci float (&high_value)[WEIGHTS_MAX_BLOCK_MODES] = tmpbuf.weight_high_value1; 445cc1dc7a3Sopenharmony_ci 446cc1dc7a3Sopenharmony_ci float (&low_values)[WEIGHTS_MAX_DECIMATION_MODES][TUNE_MAX_ANGULAR_QUANT + 1] = tmpbuf.weight_low_values1; 447cc1dc7a3Sopenharmony_ci float (&high_values)[WEIGHTS_MAX_DECIMATION_MODES][TUNE_MAX_ANGULAR_QUANT + 1] = tmpbuf.weight_high_values1; 448cc1dc7a3Sopenharmony_ci 449cc1dc7a3Sopenharmony_ci unsigned int max_decimation_modes = only_always ? bsd.decimation_mode_count_always 450cc1dc7a3Sopenharmony_ci : bsd.decimation_mode_count_selected; 451cc1dc7a3Sopenharmony_ci promise(max_decimation_modes > 0); 452cc1dc7a3Sopenharmony_ci for (unsigned int i = 0; i < max_decimation_modes; i++) 453cc1dc7a3Sopenharmony_ci { 454cc1dc7a3Sopenharmony_ci const decimation_mode& dm = bsd.decimation_modes[i]; 455cc1dc7a3Sopenharmony_ci if (!dm.is_ref_1plane(static_cast<quant_method>(max_weight_quant))) 456cc1dc7a3Sopenharmony_ci { 457cc1dc7a3Sopenharmony_ci continue; 458cc1dc7a3Sopenharmony_ci } 459cc1dc7a3Sopenharmony_ci 460cc1dc7a3Sopenharmony_ci unsigned int weight_count = bsd.get_decimation_info(i).weight_count; 461cc1dc7a3Sopenharmony_ci 462cc1dc7a3Sopenharmony_ci unsigned int max_precision = dm.maxprec_1plane; 463cc1dc7a3Sopenharmony_ci if (max_precision > TUNE_MAX_ANGULAR_QUANT) 464cc1dc7a3Sopenharmony_ci { 465cc1dc7a3Sopenharmony_ci max_precision = TUNE_MAX_ANGULAR_QUANT; 466cc1dc7a3Sopenharmony_ci } 467cc1dc7a3Sopenharmony_ci 468cc1dc7a3Sopenharmony_ci if (max_precision > max_weight_quant) 469cc1dc7a3Sopenharmony_ci { 470cc1dc7a3Sopenharmony_ci max_precision = max_weight_quant; 471cc1dc7a3Sopenharmony_ci } 472cc1dc7a3Sopenharmony_ci 473cc1dc7a3Sopenharmony_ci compute_angular_endpoints_for_quant_levels( 474cc1dc7a3Sopenharmony_ci privateProfile, 475cc1dc7a3Sopenharmony_ci weight_count, 476cc1dc7a3Sopenharmony_ci dec_weight_ideal_value + i * BLOCK_MAX_WEIGHTS, 477cc1dc7a3Sopenharmony_ci max_precision, low_values[i], high_values[i]); 478cc1dc7a3Sopenharmony_ci } 479cc1dc7a3Sopenharmony_ci 480cc1dc7a3Sopenharmony_ci unsigned int max_block_modes = only_always ? bsd.block_mode_count_1plane_always 481cc1dc7a3Sopenharmony_ci : bsd.block_mode_count_1plane_selected; 482cc1dc7a3Sopenharmony_ci promise(max_block_modes > 0); 483cc1dc7a3Sopenharmony_ci for (unsigned int i = 0; i < max_block_modes; i++) 484cc1dc7a3Sopenharmony_ci { 485cc1dc7a3Sopenharmony_ci const block_mode& bm = bsd.block_modes[i]; 486cc1dc7a3Sopenharmony_ci assert(!bm.is_dual_plane); 487cc1dc7a3Sopenharmony_ci 488cc1dc7a3Sopenharmony_ci unsigned int quant_mode = bm.quant_mode; 489cc1dc7a3Sopenharmony_ci unsigned int decim_mode = bm.decimation_mode; 490cc1dc7a3Sopenharmony_ci 491cc1dc7a3Sopenharmony_ci if (quant_mode <= TUNE_MAX_ANGULAR_QUANT) 492cc1dc7a3Sopenharmony_ci { 493cc1dc7a3Sopenharmony_ci low_value[i] = low_values[decim_mode][quant_mode]; 494cc1dc7a3Sopenharmony_ci high_value[i] = high_values[decim_mode][quant_mode]; 495cc1dc7a3Sopenharmony_ci } 496cc1dc7a3Sopenharmony_ci else 497cc1dc7a3Sopenharmony_ci { 498cc1dc7a3Sopenharmony_ci low_value[i] = 0.0f; 499cc1dc7a3Sopenharmony_ci high_value[i] = 1.0f; 500cc1dc7a3Sopenharmony_ci } 501cc1dc7a3Sopenharmony_ci } 502cc1dc7a3Sopenharmony_ci} 503cc1dc7a3Sopenharmony_ci 504cc1dc7a3Sopenharmony_ci/* See header for documentation. */ 505cc1dc7a3Sopenharmony_civoid compute_angular_endpoints_2planes( 506cc1dc7a3Sopenharmony_ci QualityProfile privateProfile, 507cc1dc7a3Sopenharmony_ci const block_size_descriptor& bsd, 508cc1dc7a3Sopenharmony_ci const float* dec_weight_ideal_value, 509cc1dc7a3Sopenharmony_ci unsigned int max_weight_quant, 510cc1dc7a3Sopenharmony_ci compression_working_buffers& tmpbuf 511cc1dc7a3Sopenharmony_ci) { 512cc1dc7a3Sopenharmony_ci float (&low_value1)[WEIGHTS_MAX_BLOCK_MODES] = tmpbuf.weight_low_value1; 513cc1dc7a3Sopenharmony_ci float (&high_value1)[WEIGHTS_MAX_BLOCK_MODES] = tmpbuf.weight_high_value1; 514cc1dc7a3Sopenharmony_ci float (&low_value2)[WEIGHTS_MAX_BLOCK_MODES] = tmpbuf.weight_low_value2; 515cc1dc7a3Sopenharmony_ci float (&high_value2)[WEIGHTS_MAX_BLOCK_MODES] = tmpbuf.weight_high_value2; 516cc1dc7a3Sopenharmony_ci 517cc1dc7a3Sopenharmony_ci float (&low_values1)[WEIGHTS_MAX_DECIMATION_MODES][TUNE_MAX_ANGULAR_QUANT + 1] = tmpbuf.weight_low_values1; 518cc1dc7a3Sopenharmony_ci float (&high_values1)[WEIGHTS_MAX_DECIMATION_MODES][TUNE_MAX_ANGULAR_QUANT + 1] = tmpbuf.weight_high_values1; 519cc1dc7a3Sopenharmony_ci float (&low_values2)[WEIGHTS_MAX_DECIMATION_MODES][TUNE_MAX_ANGULAR_QUANT + 1] = tmpbuf.weight_low_values2; 520cc1dc7a3Sopenharmony_ci float (&high_values2)[WEIGHTS_MAX_DECIMATION_MODES][TUNE_MAX_ANGULAR_QUANT + 1] = tmpbuf.weight_high_values2; 521cc1dc7a3Sopenharmony_ci 522cc1dc7a3Sopenharmony_ci promise(bsd.decimation_mode_count_selected > 0); 523cc1dc7a3Sopenharmony_ci for (unsigned int i = 0; i < bsd.decimation_mode_count_selected; i++) 524cc1dc7a3Sopenharmony_ci { 525cc1dc7a3Sopenharmony_ci const decimation_mode& dm = bsd.decimation_modes[i]; 526cc1dc7a3Sopenharmony_ci if (!dm.is_ref_2plane(static_cast<quant_method>(max_weight_quant))) 527cc1dc7a3Sopenharmony_ci { 528cc1dc7a3Sopenharmony_ci continue; 529cc1dc7a3Sopenharmony_ci } 530cc1dc7a3Sopenharmony_ci 531cc1dc7a3Sopenharmony_ci unsigned int weight_count = bsd.get_decimation_info(i).weight_count; 532cc1dc7a3Sopenharmony_ci 533cc1dc7a3Sopenharmony_ci unsigned int max_precision = dm.maxprec_2planes; 534cc1dc7a3Sopenharmony_ci if (max_precision > TUNE_MAX_ANGULAR_QUANT) 535cc1dc7a3Sopenharmony_ci { 536cc1dc7a3Sopenharmony_ci max_precision = TUNE_MAX_ANGULAR_QUANT; 537cc1dc7a3Sopenharmony_ci } 538cc1dc7a3Sopenharmony_ci 539cc1dc7a3Sopenharmony_ci if (max_precision > max_weight_quant) 540cc1dc7a3Sopenharmony_ci { 541cc1dc7a3Sopenharmony_ci max_precision = max_weight_quant; 542cc1dc7a3Sopenharmony_ci } 543cc1dc7a3Sopenharmony_ci 544cc1dc7a3Sopenharmony_ci compute_angular_endpoints_for_quant_levels( 545cc1dc7a3Sopenharmony_ci privateProfile, 546cc1dc7a3Sopenharmony_ci weight_count, 547cc1dc7a3Sopenharmony_ci dec_weight_ideal_value + i * BLOCK_MAX_WEIGHTS, 548cc1dc7a3Sopenharmony_ci max_precision, low_values1[i], high_values1[i]); 549cc1dc7a3Sopenharmony_ci 550cc1dc7a3Sopenharmony_ci compute_angular_endpoints_for_quant_levels( 551cc1dc7a3Sopenharmony_ci privateProfile, 552cc1dc7a3Sopenharmony_ci weight_count, 553cc1dc7a3Sopenharmony_ci dec_weight_ideal_value + i * BLOCK_MAX_WEIGHTS + WEIGHTS_PLANE2_OFFSET, 554cc1dc7a3Sopenharmony_ci max_precision, low_values2[i], high_values2[i]); 555cc1dc7a3Sopenharmony_ci } 556cc1dc7a3Sopenharmony_ci 557cc1dc7a3Sopenharmony_ci unsigned int start = bsd.block_mode_count_1plane_selected; 558cc1dc7a3Sopenharmony_ci unsigned int end = bsd.block_mode_count_1plane_2plane_selected; 559cc1dc7a3Sopenharmony_ci for (unsigned int i = start; i < end; i++) 560cc1dc7a3Sopenharmony_ci { 561cc1dc7a3Sopenharmony_ci const block_mode& bm = bsd.block_modes[i]; 562cc1dc7a3Sopenharmony_ci unsigned int quant_mode = bm.quant_mode; 563cc1dc7a3Sopenharmony_ci unsigned int decim_mode = bm.decimation_mode; 564cc1dc7a3Sopenharmony_ci 565cc1dc7a3Sopenharmony_ci if (quant_mode <= TUNE_MAX_ANGULAR_QUANT) 566cc1dc7a3Sopenharmony_ci { 567cc1dc7a3Sopenharmony_ci low_value1[i] = low_values1[decim_mode][quant_mode]; 568cc1dc7a3Sopenharmony_ci high_value1[i] = high_values1[decim_mode][quant_mode]; 569cc1dc7a3Sopenharmony_ci low_value2[i] = low_values2[decim_mode][quant_mode]; 570cc1dc7a3Sopenharmony_ci high_value2[i] = high_values2[decim_mode][quant_mode]; 571cc1dc7a3Sopenharmony_ci } 572cc1dc7a3Sopenharmony_ci else 573cc1dc7a3Sopenharmony_ci { 574cc1dc7a3Sopenharmony_ci low_value1[i] = 0.0f; 575cc1dc7a3Sopenharmony_ci high_value1[i] = 1.0f; 576cc1dc7a3Sopenharmony_ci low_value2[i] = 0.0f; 577cc1dc7a3Sopenharmony_ci high_value2[i] = 1.0f; 578cc1dc7a3Sopenharmony_ci } 579cc1dc7a3Sopenharmony_ci } 580cc1dc7a3Sopenharmony_ci} 581cc1dc7a3Sopenharmony_ci 582cc1dc7a3Sopenharmony_ci#endif 583