1/*
2 * Bluetooth low-complexity, subband codec (SBC)
3 *
4 * Copyright (C) 2017  Aurelien Jacobs <aurel@gnuage.org>
5 * Copyright (C) 2008-2010  Nokia Corporation
6 * Copyright (C) 2004-2010  Marcel Holtmann <marcel@holtmann.org>
7 * Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch>
8 * Copyright (C) 2005-2006  Brad Midgley <bmidgley@xmission.com>
9 *
10 * This file is part of FFmpeg.
11 *
12 * FFmpeg is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
16 *
17 * FFmpeg is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 * Lesser General Public License for more details.
21 *
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with FFmpeg; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 */
26
27/**
28 * @file
29 * SBC ARMv6 optimization for some basic "building bricks"
30 */
31
32#include "libavutil/attributes.h"
33#include "libavutil/cpu.h"
34#include "libavutil/mem_internal.h"
35#include "libavutil/arm/cpu.h"
36#include "libavcodec/sbcdsp.h"
37
38void ff_sbc_analyze_4_armv6(const int16_t *in, int32_t *out, const int16_t *consts);
39void ff_sbc_analyze_8_armv6(const int16_t *in, int32_t *out, const int16_t *consts);
40
41void ff_sbc_analyze_4_neon(const int16_t *in, int32_t *out, const int16_t *consts);
42void ff_sbc_analyze_8_neon(const int16_t *in, int32_t *out, const int16_t *consts);
43void ff_sbc_calc_scalefactors_neon(int32_t sb_sample_f[16][2][8],
44                                   uint32_t scale_factor[2][8],
45                                   int blocks, int channels, int subbands);
46int ff_sbc_calc_scalefactors_j_neon(int32_t sb_sample_f[16][2][8],
47                                    uint32_t scale_factor[2][8],
48                                    int blocks, int subbands);
49int ff_sbc_enc_process_input_4s_neon(int position, const uint8_t *pcm,
50                                     int16_t X[2][SBC_X_BUFFER_SIZE],
51                                     int nsamples, int nchannels);
52int ff_sbc_enc_process_input_8s_neon(int position, const uint8_t *pcm,
53                                     int16_t X[2][SBC_X_BUFFER_SIZE],
54                                     int nsamples, int nchannels);
55
56DECLARE_ALIGNED(SBC_ALIGN, int32_t, ff_sbcdsp_joint_bits_mask)[8] = {
57    8,   4,  2,  1, 128, 64, 32, 16
58};
59
60#if HAVE_BIGENDIAN
61#define PERM(a, b, c, d) {        \
62        (a * 2) + 1, (a * 2) + 0, \
63        (b * 2) + 1, (b * 2) + 0, \
64        (c * 2) + 1, (c * 2) + 0, \
65        (d * 2) + 1, (d * 2) + 0  \
66    }
67#else
68#define PERM(a, b, c, d) {        \
69        (a * 2) + 0, (a * 2) + 1, \
70        (b * 2) + 0, (b * 2) + 1, \
71        (c * 2) + 0, (c * 2) + 1, \
72        (d * 2) + 0, (d * 2) + 1  \
73    }
74#endif
75
76DECLARE_ALIGNED(SBC_ALIGN, uint8_t, ff_sbc_input_perm_4)[2][8] = {
77    PERM(7, 3, 6, 4),
78    PERM(0, 2, 1, 5)
79};
80
81DECLARE_ALIGNED(SBC_ALIGN, uint8_t, ff_sbc_input_perm_8)[4][8] = {
82    PERM(15, 7, 14,  8),
83    PERM(13, 9, 12, 10),
84    PERM(11, 3,  6,  0),
85    PERM( 5, 1,  4,  2)
86};
87
88av_cold void ff_sbcdsp_init_arm(SBCDSPContext *s)
89{
90    int cpu_flags = av_get_cpu_flags();
91
92    if (have_armv6(cpu_flags)) {
93        s->sbc_analyze_4 = ff_sbc_analyze_4_armv6;
94        s->sbc_analyze_8 = ff_sbc_analyze_8_armv6;
95    }
96
97    if (have_neon(cpu_flags)) {
98        s->sbc_analyze_4 = ff_sbc_analyze_4_neon;
99        s->sbc_analyze_8 = ff_sbc_analyze_8_neon;
100        s->sbc_calc_scalefactors = ff_sbc_calc_scalefactors_neon;
101        s->sbc_calc_scalefactors_j = ff_sbc_calc_scalefactors_j_neon;
102        if (s->increment != 1) {
103            s->sbc_enc_process_input_4s = ff_sbc_enc_process_input_4s_neon;
104            s->sbc_enc_process_input_8s = ff_sbc_enc_process_input_8s_neon;
105        }
106    }
107}
108