xref: /third_party/ffmpeg/libavcodec/sbcdsp.c (revision cabdff1a)
1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * Bluetooth low-complexity, subband codec (SBC)
3cabdff1aSopenharmony_ci *
4cabdff1aSopenharmony_ci * Copyright (C) 2017  Aurelien Jacobs <aurel@gnuage.org>
5cabdff1aSopenharmony_ci * Copyright (C) 2012-2013  Intel Corporation
6cabdff1aSopenharmony_ci * Copyright (C) 2008-2010  Nokia Corporation
7cabdff1aSopenharmony_ci * Copyright (C) 2004-2010  Marcel Holtmann <marcel@holtmann.org>
8cabdff1aSopenharmony_ci * Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch>
9cabdff1aSopenharmony_ci * Copyright (C) 2005-2006  Brad Midgley <bmidgley@xmission.com>
10cabdff1aSopenharmony_ci *
11cabdff1aSopenharmony_ci * This file is part of FFmpeg.
12cabdff1aSopenharmony_ci *
13cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
14cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
15cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
16cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
17cabdff1aSopenharmony_ci *
18cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
19cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
20cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21cabdff1aSopenharmony_ci * Lesser General Public License for more details.
22cabdff1aSopenharmony_ci *
23cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
24cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
25cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26cabdff1aSopenharmony_ci */
27cabdff1aSopenharmony_ci
28cabdff1aSopenharmony_ci/**
29cabdff1aSopenharmony_ci * @file
30cabdff1aSopenharmony_ci * SBC basic "building bricks"
31cabdff1aSopenharmony_ci */
32cabdff1aSopenharmony_ci
33cabdff1aSopenharmony_ci#include <stdint.h>
34cabdff1aSopenharmony_ci#include <limits.h>
35cabdff1aSopenharmony_ci#include <string.h>
36cabdff1aSopenharmony_ci#include "libavutil/common.h"
37cabdff1aSopenharmony_ci#include "libavutil/intmath.h"
38cabdff1aSopenharmony_ci#include "libavutil/intreadwrite.h"
39cabdff1aSopenharmony_ci#include "sbc.h"
40cabdff1aSopenharmony_ci#include "sbcdsp.h"
41cabdff1aSopenharmony_ci#include "sbcdsp_data.h"
42cabdff1aSopenharmony_ci
43cabdff1aSopenharmony_ci/*
44cabdff1aSopenharmony_ci * A reference C code of analysis filter with SIMD-friendly tables
45cabdff1aSopenharmony_ci * reordering and code layout. This code can be used to develop platform
46cabdff1aSopenharmony_ci * specific SIMD optimizations. Also it may be used as some kind of test
47cabdff1aSopenharmony_ci * for compiler autovectorization capabilities (who knows, if the compiler
48cabdff1aSopenharmony_ci * is very good at this stuff, hand optimized assembly may be not strictly
49cabdff1aSopenharmony_ci * needed for some platform).
50cabdff1aSopenharmony_ci *
51cabdff1aSopenharmony_ci * Note: It is also possible to make a simple variant of analysis filter,
52cabdff1aSopenharmony_ci * which needs only a single constants table without taking care about
53cabdff1aSopenharmony_ci * even/odd cases. This simple variant of filter can be implemented without
54cabdff1aSopenharmony_ci * input data permutation. The only thing that would be lost is the
55cabdff1aSopenharmony_ci * possibility to use pairwise SIMD multiplications. But for some simple
56cabdff1aSopenharmony_ci * CPU cores without SIMD extensions it can be useful. If anybody is
57cabdff1aSopenharmony_ci * interested in implementing such variant of a filter, sourcecode from
58cabdff1aSopenharmony_ci * bluez versions 4.26/4.27 can be used as a reference and the history of
59cabdff1aSopenharmony_ci * the changes in git repository done around that time may be worth checking.
60cabdff1aSopenharmony_ci */
61cabdff1aSopenharmony_ci
62cabdff1aSopenharmony_cistatic av_always_inline void sbc_analyze_simd(const int16_t *in, int32_t *out,
63cabdff1aSopenharmony_ci                                              const int16_t *consts,
64cabdff1aSopenharmony_ci                                              unsigned subbands)
65cabdff1aSopenharmony_ci{
66cabdff1aSopenharmony_ci    int32_t t1[8];
67cabdff1aSopenharmony_ci    int16_t t2[8];
68cabdff1aSopenharmony_ci    int i, j, hop = 0;
69cabdff1aSopenharmony_ci
70cabdff1aSopenharmony_ci    /* rounding coefficient */
71cabdff1aSopenharmony_ci    for (i = 0; i < subbands; i++)
72cabdff1aSopenharmony_ci        t1[i] = 1 << (SBC_PROTO_FIXED_SCALE - 1);
73cabdff1aSopenharmony_ci
74cabdff1aSopenharmony_ci    /* low pass polyphase filter */
75cabdff1aSopenharmony_ci    for (hop = 0; hop < 10*subbands; hop += 2*subbands)
76cabdff1aSopenharmony_ci        for (i = 0; i < 2*subbands; i++)
77cabdff1aSopenharmony_ci            t1[i >> 1] += in[hop + i] * consts[hop + i];
78cabdff1aSopenharmony_ci
79cabdff1aSopenharmony_ci    /* scaling */
80cabdff1aSopenharmony_ci    for (i = 0; i < subbands; i++)
81cabdff1aSopenharmony_ci        t2[i] = t1[i] >> SBC_PROTO_FIXED_SCALE;
82cabdff1aSopenharmony_ci
83cabdff1aSopenharmony_ci    memset(t1, 0, sizeof(t1));
84cabdff1aSopenharmony_ci
85cabdff1aSopenharmony_ci    /* do the cos transform */
86cabdff1aSopenharmony_ci    for (i = 0; i < subbands/2; i++)
87cabdff1aSopenharmony_ci        for (j = 0; j < 2*subbands; j++)
88cabdff1aSopenharmony_ci            t1[j>>1] += t2[i * 2 + (j&1)] * consts[10*subbands + i*2*subbands + j];
89cabdff1aSopenharmony_ci
90cabdff1aSopenharmony_ci    for (i = 0; i < subbands; i++)
91cabdff1aSopenharmony_ci        out[i] = t1[i] >> (SBC_COS_TABLE_FIXED_SCALE - SCALE_OUT_BITS);
92cabdff1aSopenharmony_ci}
93cabdff1aSopenharmony_ci
94cabdff1aSopenharmony_cistatic void sbc_analyze_4_simd(const int16_t *in, int32_t *out,
95cabdff1aSopenharmony_ci                               const int16_t *consts)
96cabdff1aSopenharmony_ci{
97cabdff1aSopenharmony_ci    sbc_analyze_simd(in, out, consts, 4);
98cabdff1aSopenharmony_ci}
99cabdff1aSopenharmony_ci
100cabdff1aSopenharmony_cistatic void sbc_analyze_8_simd(const int16_t *in, int32_t *out,
101cabdff1aSopenharmony_ci                               const int16_t *consts)
102cabdff1aSopenharmony_ci{
103cabdff1aSopenharmony_ci    sbc_analyze_simd(in, out, consts, 8);
104cabdff1aSopenharmony_ci}
105cabdff1aSopenharmony_ci
106cabdff1aSopenharmony_cistatic inline void sbc_analyze_4b_4s_simd(SBCDSPContext *s,
107cabdff1aSopenharmony_ci                                          int16_t *x, int32_t *out, int out_stride)
108cabdff1aSopenharmony_ci{
109cabdff1aSopenharmony_ci    /* Analyze blocks */
110cabdff1aSopenharmony_ci    s->sbc_analyze_4(x + 12, out, ff_sbcdsp_analysis_consts_fixed4_simd_odd);
111cabdff1aSopenharmony_ci    out += out_stride;
112cabdff1aSopenharmony_ci    s->sbc_analyze_4(x + 8, out, ff_sbcdsp_analysis_consts_fixed4_simd_even);
113cabdff1aSopenharmony_ci    out += out_stride;
114cabdff1aSopenharmony_ci    s->sbc_analyze_4(x + 4, out, ff_sbcdsp_analysis_consts_fixed4_simd_odd);
115cabdff1aSopenharmony_ci    out += out_stride;
116cabdff1aSopenharmony_ci    s->sbc_analyze_4(x + 0, out, ff_sbcdsp_analysis_consts_fixed4_simd_even);
117cabdff1aSopenharmony_ci}
118cabdff1aSopenharmony_ci
119cabdff1aSopenharmony_cistatic inline void sbc_analyze_4b_8s_simd(SBCDSPContext *s,
120cabdff1aSopenharmony_ci                                          int16_t *x, int32_t *out, int out_stride)
121cabdff1aSopenharmony_ci{
122cabdff1aSopenharmony_ci    /* Analyze blocks */
123cabdff1aSopenharmony_ci    s->sbc_analyze_8(x + 24, out, ff_sbcdsp_analysis_consts_fixed8_simd_odd);
124cabdff1aSopenharmony_ci    out += out_stride;
125cabdff1aSopenharmony_ci    s->sbc_analyze_8(x + 16, out, ff_sbcdsp_analysis_consts_fixed8_simd_even);
126cabdff1aSopenharmony_ci    out += out_stride;
127cabdff1aSopenharmony_ci    s->sbc_analyze_8(x + 8, out, ff_sbcdsp_analysis_consts_fixed8_simd_odd);
128cabdff1aSopenharmony_ci    out += out_stride;
129cabdff1aSopenharmony_ci    s->sbc_analyze_8(x + 0, out, ff_sbcdsp_analysis_consts_fixed8_simd_even);
130cabdff1aSopenharmony_ci}
131cabdff1aSopenharmony_ci
132cabdff1aSopenharmony_cistatic inline void sbc_analyze_1b_8s_simd_even(SBCDSPContext *s,
133cabdff1aSopenharmony_ci                                               int16_t *x, int32_t *out,
134cabdff1aSopenharmony_ci                                               int out_stride);
135cabdff1aSopenharmony_ci
136cabdff1aSopenharmony_cistatic inline void sbc_analyze_1b_8s_simd_odd(SBCDSPContext *s,
137cabdff1aSopenharmony_ci                                              int16_t *x, int32_t *out,
138cabdff1aSopenharmony_ci                                              int out_stride)
139cabdff1aSopenharmony_ci{
140cabdff1aSopenharmony_ci    s->sbc_analyze_8(x, out, ff_sbcdsp_analysis_consts_fixed8_simd_odd);
141cabdff1aSopenharmony_ci    s->sbc_analyze_8s = sbc_analyze_1b_8s_simd_even;
142cabdff1aSopenharmony_ci}
143cabdff1aSopenharmony_ci
144cabdff1aSopenharmony_cistatic inline void sbc_analyze_1b_8s_simd_even(SBCDSPContext *s,
145cabdff1aSopenharmony_ci                                               int16_t *x, int32_t *out,
146cabdff1aSopenharmony_ci                                               int out_stride)
147cabdff1aSopenharmony_ci{
148cabdff1aSopenharmony_ci    s->sbc_analyze_8(x, out, ff_sbcdsp_analysis_consts_fixed8_simd_even);
149cabdff1aSopenharmony_ci    s->sbc_analyze_8s = sbc_analyze_1b_8s_simd_odd;
150cabdff1aSopenharmony_ci}
151cabdff1aSopenharmony_ci
152cabdff1aSopenharmony_ci/*
153cabdff1aSopenharmony_ci * Input data processing functions. The data is endian converted if needed,
154cabdff1aSopenharmony_ci * channels are deintrleaved and audio samples are reordered for use in
155cabdff1aSopenharmony_ci * SIMD-friendly analysis filter function. The results are put into "X"
156cabdff1aSopenharmony_ci * array, getting appended to the previous data (or it is better to say
157cabdff1aSopenharmony_ci * prepended, as the buffer is filled from top to bottom). Old data is
158cabdff1aSopenharmony_ci * discarded when neededed, but availability of (10 * nrof_subbands)
159cabdff1aSopenharmony_ci * contiguous samples is always guaranteed for the input to the analysis
160cabdff1aSopenharmony_ci * filter. This is achieved by copying a sufficient part of old data
161cabdff1aSopenharmony_ci * to the top of the buffer on buffer wraparound.
162cabdff1aSopenharmony_ci */
163cabdff1aSopenharmony_ci
164cabdff1aSopenharmony_cistatic int sbc_enc_process_input_4s(int position, const uint8_t *pcm,
165cabdff1aSopenharmony_ci                                    int16_t X[2][SBC_X_BUFFER_SIZE],
166cabdff1aSopenharmony_ci                                    int nsamples, int nchannels)
167cabdff1aSopenharmony_ci{
168cabdff1aSopenharmony_ci    int c;
169cabdff1aSopenharmony_ci
170cabdff1aSopenharmony_ci    /* handle X buffer wraparound */
171cabdff1aSopenharmony_ci    if (position < nsamples) {
172cabdff1aSopenharmony_ci        for (c = 0; c < nchannels; c++)
173cabdff1aSopenharmony_ci            memcpy(&X[c][SBC_X_BUFFER_SIZE - 40], &X[c][position],
174cabdff1aSopenharmony_ci                            36 * sizeof(int16_t));
175cabdff1aSopenharmony_ci        position = SBC_X_BUFFER_SIZE - 40;
176cabdff1aSopenharmony_ci    }
177cabdff1aSopenharmony_ci
178cabdff1aSopenharmony_ci    /* copy/permutate audio samples */
179cabdff1aSopenharmony_ci    for (; nsamples >= 8; nsamples -= 8, pcm += 16 * nchannels) {
180cabdff1aSopenharmony_ci        position -= 8;
181cabdff1aSopenharmony_ci        for (c = 0; c < nchannels; c++) {
182cabdff1aSopenharmony_ci            int16_t *x = &X[c][position];
183cabdff1aSopenharmony_ci            x[0] = AV_RN16(pcm + 14*nchannels + 2*c);
184cabdff1aSopenharmony_ci            x[1] = AV_RN16(pcm +  6*nchannels + 2*c);
185cabdff1aSopenharmony_ci            x[2] = AV_RN16(pcm + 12*nchannels + 2*c);
186cabdff1aSopenharmony_ci            x[3] = AV_RN16(pcm +  8*nchannels + 2*c);
187cabdff1aSopenharmony_ci            x[4] = AV_RN16(pcm +  0*nchannels + 2*c);
188cabdff1aSopenharmony_ci            x[5] = AV_RN16(pcm +  4*nchannels + 2*c);
189cabdff1aSopenharmony_ci            x[6] = AV_RN16(pcm +  2*nchannels + 2*c);
190cabdff1aSopenharmony_ci            x[7] = AV_RN16(pcm + 10*nchannels + 2*c);
191cabdff1aSopenharmony_ci        }
192cabdff1aSopenharmony_ci    }
193cabdff1aSopenharmony_ci
194cabdff1aSopenharmony_ci    return position;
195cabdff1aSopenharmony_ci}
196cabdff1aSopenharmony_ci
197cabdff1aSopenharmony_cistatic int sbc_enc_process_input_8s(int position, const uint8_t *pcm,
198cabdff1aSopenharmony_ci                                    int16_t X[2][SBC_X_BUFFER_SIZE],
199cabdff1aSopenharmony_ci                                    int nsamples, int nchannels)
200cabdff1aSopenharmony_ci{
201cabdff1aSopenharmony_ci    int c;
202cabdff1aSopenharmony_ci
203cabdff1aSopenharmony_ci    /* handle X buffer wraparound */
204cabdff1aSopenharmony_ci    if (position < nsamples) {
205cabdff1aSopenharmony_ci        for (c = 0; c < nchannels; c++)
206cabdff1aSopenharmony_ci            memcpy(&X[c][SBC_X_BUFFER_SIZE - 72], &X[c][position],
207cabdff1aSopenharmony_ci                            72 * sizeof(int16_t));
208cabdff1aSopenharmony_ci        position = SBC_X_BUFFER_SIZE - 72;
209cabdff1aSopenharmony_ci    }
210cabdff1aSopenharmony_ci
211cabdff1aSopenharmony_ci    if (position % 16 == 8) {
212cabdff1aSopenharmony_ci        position -= 8;
213cabdff1aSopenharmony_ci        nsamples -= 8;
214cabdff1aSopenharmony_ci        for (c = 0; c < nchannels; c++) {
215cabdff1aSopenharmony_ci            int16_t *x = &X[c][position];
216cabdff1aSopenharmony_ci            x[0] = AV_RN16(pcm + 14*nchannels + 2*c);
217cabdff1aSopenharmony_ci            x[2] = AV_RN16(pcm + 12*nchannels + 2*c);
218cabdff1aSopenharmony_ci            x[3] = AV_RN16(pcm +  0*nchannels + 2*c);
219cabdff1aSopenharmony_ci            x[4] = AV_RN16(pcm + 10*nchannels + 2*c);
220cabdff1aSopenharmony_ci            x[5] = AV_RN16(pcm +  2*nchannels + 2*c);
221cabdff1aSopenharmony_ci            x[6] = AV_RN16(pcm +  8*nchannels + 2*c);
222cabdff1aSopenharmony_ci            x[7] = AV_RN16(pcm +  4*nchannels + 2*c);
223cabdff1aSopenharmony_ci            x[8] = AV_RN16(pcm +  6*nchannels + 2*c);
224cabdff1aSopenharmony_ci        }
225cabdff1aSopenharmony_ci        pcm += 16 * nchannels;
226cabdff1aSopenharmony_ci    }
227cabdff1aSopenharmony_ci
228cabdff1aSopenharmony_ci    /* copy/permutate audio samples */
229cabdff1aSopenharmony_ci    for (; nsamples >= 16; nsamples -= 16, pcm += 32 * nchannels) {
230cabdff1aSopenharmony_ci        position -= 16;
231cabdff1aSopenharmony_ci        for (c = 0; c < nchannels; c++) {
232cabdff1aSopenharmony_ci            int16_t *x = &X[c][position];
233cabdff1aSopenharmony_ci            x[0]  = AV_RN16(pcm + 30*nchannels + 2*c);
234cabdff1aSopenharmony_ci            x[1]  = AV_RN16(pcm + 14*nchannels + 2*c);
235cabdff1aSopenharmony_ci            x[2]  = AV_RN16(pcm + 28*nchannels + 2*c);
236cabdff1aSopenharmony_ci            x[3]  = AV_RN16(pcm + 16*nchannels + 2*c);
237cabdff1aSopenharmony_ci            x[4]  = AV_RN16(pcm + 26*nchannels + 2*c);
238cabdff1aSopenharmony_ci            x[5]  = AV_RN16(pcm + 18*nchannels + 2*c);
239cabdff1aSopenharmony_ci            x[6]  = AV_RN16(pcm + 24*nchannels + 2*c);
240cabdff1aSopenharmony_ci            x[7]  = AV_RN16(pcm + 20*nchannels + 2*c);
241cabdff1aSopenharmony_ci            x[8]  = AV_RN16(pcm + 22*nchannels + 2*c);
242cabdff1aSopenharmony_ci            x[9]  = AV_RN16(pcm +  6*nchannels + 2*c);
243cabdff1aSopenharmony_ci            x[10] = AV_RN16(pcm + 12*nchannels + 2*c);
244cabdff1aSopenharmony_ci            x[11] = AV_RN16(pcm +  0*nchannels + 2*c);
245cabdff1aSopenharmony_ci            x[12] = AV_RN16(pcm + 10*nchannels + 2*c);
246cabdff1aSopenharmony_ci            x[13] = AV_RN16(pcm +  2*nchannels + 2*c);
247cabdff1aSopenharmony_ci            x[14] = AV_RN16(pcm +  8*nchannels + 2*c);
248cabdff1aSopenharmony_ci            x[15] = AV_RN16(pcm +  4*nchannels + 2*c);
249cabdff1aSopenharmony_ci        }
250cabdff1aSopenharmony_ci    }
251cabdff1aSopenharmony_ci
252cabdff1aSopenharmony_ci    if (nsamples == 8) {
253cabdff1aSopenharmony_ci        position -= 8;
254cabdff1aSopenharmony_ci        for (c = 0; c < nchannels; c++) {
255cabdff1aSopenharmony_ci            int16_t *x = &X[c][position];
256cabdff1aSopenharmony_ci            x[-7] = AV_RN16(pcm + 14*nchannels + 2*c);
257cabdff1aSopenharmony_ci            x[1]  = AV_RN16(pcm +  6*nchannels + 2*c);
258cabdff1aSopenharmony_ci            x[2]  = AV_RN16(pcm + 12*nchannels + 2*c);
259cabdff1aSopenharmony_ci            x[3]  = AV_RN16(pcm +  0*nchannels + 2*c);
260cabdff1aSopenharmony_ci            x[4]  = AV_RN16(pcm + 10*nchannels + 2*c);
261cabdff1aSopenharmony_ci            x[5]  = AV_RN16(pcm +  2*nchannels + 2*c);
262cabdff1aSopenharmony_ci            x[6]  = AV_RN16(pcm +  8*nchannels + 2*c);
263cabdff1aSopenharmony_ci            x[7]  = AV_RN16(pcm +  4*nchannels + 2*c);
264cabdff1aSopenharmony_ci        }
265cabdff1aSopenharmony_ci    }
266cabdff1aSopenharmony_ci
267cabdff1aSopenharmony_ci    return position;
268cabdff1aSopenharmony_ci}
269cabdff1aSopenharmony_ci
270cabdff1aSopenharmony_cistatic void sbc_calc_scalefactors(int32_t sb_sample_f[16][2][8],
271cabdff1aSopenharmony_ci                                  uint32_t scale_factor[2][8],
272cabdff1aSopenharmony_ci                                  int blocks, int channels, int subbands)
273cabdff1aSopenharmony_ci{
274cabdff1aSopenharmony_ci    int ch, sb, blk;
275cabdff1aSopenharmony_ci    for (ch = 0; ch < channels; ch++) {
276cabdff1aSopenharmony_ci        for (sb = 0; sb < subbands; sb++) {
277cabdff1aSopenharmony_ci            uint32_t x = 1 << SCALE_OUT_BITS;
278cabdff1aSopenharmony_ci            for (blk = 0; blk < blocks; blk++) {
279cabdff1aSopenharmony_ci                int32_t tmp = FFABS(sb_sample_f[blk][ch][sb]);
280cabdff1aSopenharmony_ci                if (tmp != 0)
281cabdff1aSopenharmony_ci                    x |= tmp - 1;
282cabdff1aSopenharmony_ci            }
283cabdff1aSopenharmony_ci            scale_factor[ch][sb] = (31 - SCALE_OUT_BITS) - ff_clz(x);
284cabdff1aSopenharmony_ci        }
285cabdff1aSopenharmony_ci    }
286cabdff1aSopenharmony_ci}
287cabdff1aSopenharmony_ci
288cabdff1aSopenharmony_cistatic int sbc_calc_scalefactors_j(int32_t sb_sample_f[16][2][8],
289cabdff1aSopenharmony_ci                                   uint32_t scale_factor[2][8],
290cabdff1aSopenharmony_ci                                   int blocks, int subbands)
291cabdff1aSopenharmony_ci{
292cabdff1aSopenharmony_ci    int blk, joint = 0;
293cabdff1aSopenharmony_ci    int32_t tmp0, tmp1;
294cabdff1aSopenharmony_ci    uint32_t x, y;
295cabdff1aSopenharmony_ci
296cabdff1aSopenharmony_ci    /* last subband does not use joint stereo */
297cabdff1aSopenharmony_ci    int sb = subbands - 1;
298cabdff1aSopenharmony_ci    x = 1 << SCALE_OUT_BITS;
299cabdff1aSopenharmony_ci    y = 1 << SCALE_OUT_BITS;
300cabdff1aSopenharmony_ci    for (blk = 0; blk < blocks; blk++) {
301cabdff1aSopenharmony_ci        tmp0 = FFABS(sb_sample_f[blk][0][sb]);
302cabdff1aSopenharmony_ci        tmp1 = FFABS(sb_sample_f[blk][1][sb]);
303cabdff1aSopenharmony_ci        if (tmp0 != 0)
304cabdff1aSopenharmony_ci            x |= tmp0 - 1;
305cabdff1aSopenharmony_ci        if (tmp1 != 0)
306cabdff1aSopenharmony_ci            y |= tmp1 - 1;
307cabdff1aSopenharmony_ci    }
308cabdff1aSopenharmony_ci    scale_factor[0][sb] = (31 - SCALE_OUT_BITS) - ff_clz(x);
309cabdff1aSopenharmony_ci    scale_factor[1][sb] = (31 - SCALE_OUT_BITS) - ff_clz(y);
310cabdff1aSopenharmony_ci
311cabdff1aSopenharmony_ci    /* the rest of subbands can use joint stereo */
312cabdff1aSopenharmony_ci    while (--sb >= 0) {
313cabdff1aSopenharmony_ci        int32_t sb_sample_j[16][2];
314cabdff1aSopenharmony_ci        x = 1 << SCALE_OUT_BITS;
315cabdff1aSopenharmony_ci        y = 1 << SCALE_OUT_BITS;
316cabdff1aSopenharmony_ci        for (blk = 0; blk < blocks; blk++) {
317cabdff1aSopenharmony_ci            tmp0 = sb_sample_f[blk][0][sb];
318cabdff1aSopenharmony_ci            tmp1 = sb_sample_f[blk][1][sb];
319cabdff1aSopenharmony_ci            sb_sample_j[blk][0] = (tmp0 >> 1) + (tmp1 >> 1);
320cabdff1aSopenharmony_ci            sb_sample_j[blk][1] = (tmp0 >> 1) - (tmp1 >> 1);
321cabdff1aSopenharmony_ci            tmp0 = FFABS(tmp0);
322cabdff1aSopenharmony_ci            tmp1 = FFABS(tmp1);
323cabdff1aSopenharmony_ci            if (tmp0 != 0)
324cabdff1aSopenharmony_ci                x |= tmp0 - 1;
325cabdff1aSopenharmony_ci            if (tmp1 != 0)
326cabdff1aSopenharmony_ci                y |= tmp1 - 1;
327cabdff1aSopenharmony_ci        }
328cabdff1aSopenharmony_ci        scale_factor[0][sb] = (31 - SCALE_OUT_BITS) -
329cabdff1aSopenharmony_ci            ff_clz(x);
330cabdff1aSopenharmony_ci        scale_factor[1][sb] = (31 - SCALE_OUT_BITS) -
331cabdff1aSopenharmony_ci            ff_clz(y);
332cabdff1aSopenharmony_ci        x = 1 << SCALE_OUT_BITS;
333cabdff1aSopenharmony_ci        y = 1 << SCALE_OUT_BITS;
334cabdff1aSopenharmony_ci        for (blk = 0; blk < blocks; blk++) {
335cabdff1aSopenharmony_ci            tmp0 = FFABS(sb_sample_j[blk][0]);
336cabdff1aSopenharmony_ci            tmp1 = FFABS(sb_sample_j[blk][1]);
337cabdff1aSopenharmony_ci            if (tmp0 != 0)
338cabdff1aSopenharmony_ci                x |= tmp0 - 1;
339cabdff1aSopenharmony_ci            if (tmp1 != 0)
340cabdff1aSopenharmony_ci                y |= tmp1 - 1;
341cabdff1aSopenharmony_ci        }
342cabdff1aSopenharmony_ci        x = (31 - SCALE_OUT_BITS) - ff_clz(x);
343cabdff1aSopenharmony_ci        y = (31 - SCALE_OUT_BITS) - ff_clz(y);
344cabdff1aSopenharmony_ci
345cabdff1aSopenharmony_ci        /* decide whether to use joint stereo for this subband */
346cabdff1aSopenharmony_ci        if ((scale_factor[0][sb] + scale_factor[1][sb]) > x + y) {
347cabdff1aSopenharmony_ci            joint |= 1 << (subbands - 1 - sb);
348cabdff1aSopenharmony_ci            scale_factor[0][sb] = x;
349cabdff1aSopenharmony_ci            scale_factor[1][sb] = y;
350cabdff1aSopenharmony_ci            for (blk = 0; blk < blocks; blk++) {
351cabdff1aSopenharmony_ci                sb_sample_f[blk][0][sb] = sb_sample_j[blk][0];
352cabdff1aSopenharmony_ci                sb_sample_f[blk][1][sb] = sb_sample_j[blk][1];
353cabdff1aSopenharmony_ci            }
354cabdff1aSopenharmony_ci        }
355cabdff1aSopenharmony_ci    }
356cabdff1aSopenharmony_ci
357cabdff1aSopenharmony_ci    /* bitmask with the information about subbands using joint stereo */
358cabdff1aSopenharmony_ci    return joint;
359cabdff1aSopenharmony_ci}
360cabdff1aSopenharmony_ci
361cabdff1aSopenharmony_ci/*
362cabdff1aSopenharmony_ci * Detect CPU features and setup function pointers
363cabdff1aSopenharmony_ci */
364cabdff1aSopenharmony_ciav_cold void ff_sbcdsp_init(SBCDSPContext *s)
365cabdff1aSopenharmony_ci{
366cabdff1aSopenharmony_ci    /* Default implementation for analyze functions */
367cabdff1aSopenharmony_ci    s->sbc_analyze_4 = sbc_analyze_4_simd;
368cabdff1aSopenharmony_ci    s->sbc_analyze_8 = sbc_analyze_8_simd;
369cabdff1aSopenharmony_ci    s->sbc_analyze_4s = sbc_analyze_4b_4s_simd;
370cabdff1aSopenharmony_ci    if (s->increment == 1)
371cabdff1aSopenharmony_ci        s->sbc_analyze_8s = sbc_analyze_1b_8s_simd_odd;
372cabdff1aSopenharmony_ci    else
373cabdff1aSopenharmony_ci        s->sbc_analyze_8s = sbc_analyze_4b_8s_simd;
374cabdff1aSopenharmony_ci
375cabdff1aSopenharmony_ci    /* Default implementation for input reordering / deinterleaving */
376cabdff1aSopenharmony_ci    s->sbc_enc_process_input_4s = sbc_enc_process_input_4s;
377cabdff1aSopenharmony_ci    s->sbc_enc_process_input_8s = sbc_enc_process_input_8s;
378cabdff1aSopenharmony_ci
379cabdff1aSopenharmony_ci    /* Default implementation for scale factors calculation */
380cabdff1aSopenharmony_ci    s->sbc_calc_scalefactors = sbc_calc_scalefactors;
381cabdff1aSopenharmony_ci    s->sbc_calc_scalefactors_j = sbc_calc_scalefactors_j;
382cabdff1aSopenharmony_ci
383cabdff1aSopenharmony_ci#if ARCH_ARM
384cabdff1aSopenharmony_ci    ff_sbcdsp_init_arm(s);
385cabdff1aSopenharmony_ci#elif ARCH_X86
386cabdff1aSopenharmony_ci    ff_sbcdsp_init_x86(s);
387cabdff1aSopenharmony_ci#endif
388cabdff1aSopenharmony_ci}
389