xref: /third_party/ffmpeg/libavutil/tx.c (revision cabdff1a)
1/*
2 * This file is part of FFmpeg.
3 *
4 * FFmpeg is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * FFmpeg is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with FFmpeg; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19#include "cpu.h"
20#include "qsort.h"
21#include "bprint.h"
22
23#include "tx_priv.h"
24
25#define TYPE_IS(type, x)               \
26    (((x) == AV_TX_FLOAT_ ## type)  || \
27     ((x) == AV_TX_DOUBLE_ ## type) || \
28     ((x) == AV_TX_INT32_ ## type))
29
30/* Calculates the modular multiplicative inverse */
31static av_always_inline int mulinv(int n, int m)
32{
33    n = n % m;
34    for (int x = 1; x < m; x++)
35        if (((n * x) % m) == 1)
36            return x;
37    av_assert0(0); /* Never reached */
38    return 0;
39}
40
41/* Guaranteed to work for any n, m where gcd(n, m) == 1 */
42int ff_tx_gen_compound_mapping(AVTXContext *s, int n, int m)
43{
44    int *in_map, *out_map;
45    const int inv = s->inv;
46    const int len = n*m;    /* Will not be equal to s->len for MDCTs */
47    const int mdct = TYPE_IS(MDCT, s->type);
48    int m_inv, n_inv;
49
50    /* Make sure the numbers are coprime */
51    if (av_gcd(n, m) != 1)
52        return AVERROR(EINVAL);
53
54    m_inv = mulinv(m, n);
55    n_inv = mulinv(n, m);
56
57    if (!(s->map = av_malloc(2*len*sizeof(*s->map))))
58        return AVERROR(ENOMEM);
59
60    in_map  = s->map;
61    out_map = s->map + len;
62
63    /* Ruritanian map for input, CRT map for output, can be swapped */
64    for (int j = 0; j < m; j++) {
65        for (int i = 0; i < n; i++) {
66            /* Shifted by 1 to simplify MDCTs */
67            in_map[j*n + i] = ((i*m + j*n) % len) << mdct;
68            out_map[(i*m*m_inv + j*n*n_inv) % len] = i*m + j;
69        }
70    }
71
72    /* Change transform direction by reversing all ACs */
73    if (inv) {
74        for (int i = 0; i < m; i++) {
75            int *in = &in_map[i*n + 1]; /* Skip the DC */
76            for (int j = 0; j < ((n - 1) >> 1); j++)
77                FFSWAP(int, in[j], in[n - j - 2]);
78        }
79    }
80
81    /* Our 15-point transform is also a compound one, so embed its input map */
82    if (n == 15) {
83        for (int k = 0; k < m; k++) {
84            int tmp[15];
85            memcpy(tmp, &in_map[k*15], 15*sizeof(*tmp));
86            for (int i = 0; i < 5; i++) {
87                for (int j = 0; j < 3; j++)
88                    in_map[k*15 + i*3 + j] = tmp[(i*3 + j*5) % 15];
89            }
90        }
91    }
92
93    return 0;
94}
95
96static inline int split_radix_permutation(int i, int len, int inv)
97{
98    len >>= 1;
99    if (len <= 1)
100        return i & 1;
101    if (!(i & len))
102        return split_radix_permutation(i, len, inv) * 2;
103    len >>= 1;
104    return split_radix_permutation(i, len, inv) * 4 + 1 - 2*(!(i & len) ^ inv);
105}
106
107int ff_tx_gen_ptwo_revtab(AVTXContext *s, int invert_lookup)
108{
109    int len = s->len;
110
111    if (!(s->map = av_malloc(len*sizeof(*s->map))))
112        return AVERROR(ENOMEM);
113
114    if (invert_lookup) {
115        for (int i = 0; i < s->len; i++)
116            s->map[i] = -split_radix_permutation(i, len, s->inv) & (len - 1);
117    } else {
118        for (int i = 0; i < s->len; i++)
119            s->map[-split_radix_permutation(i, len, s->inv) & (len - 1)] = i;
120    }
121
122    return 0;
123}
124
125int ff_tx_gen_ptwo_inplace_revtab_idx(AVTXContext *s)
126{
127    int *src_map, out_map_idx = 0, len = s->len;
128
129    if (!s->sub || !s->sub->map)
130        return AVERROR(EINVAL);
131
132    if (!(s->map = av_mallocz(len*sizeof(*s->map))))
133        return AVERROR(ENOMEM);
134
135    src_map = s->sub->map;
136
137    /* The first coefficient is always already in-place */
138    for (int src = 1; src < s->len; src++) {
139        int dst = src_map[src];
140        int found = 0;
141
142        if (dst <= src)
143            continue;
144
145        /* This just checks if a closed loop has been encountered before,
146         * and if so, skips it, since to fully permute a loop we must only
147         * enter it once. */
148        do {
149            for (int j = 0; j < out_map_idx; j++) {
150                if (dst == s->map[j]) {
151                    found = 1;
152                    break;
153                }
154            }
155            dst = src_map[dst];
156        } while (dst != src && !found);
157
158        if (!found)
159            s->map[out_map_idx++] = src;
160    }
161
162    s->map[out_map_idx++] = 0;
163
164    return 0;
165}
166
167static void parity_revtab_generator(int *revtab, int n, int inv, int offset,
168                                    int is_dual, int dual_high, int len,
169                                    int basis, int dual_stride, int inv_lookup)
170{
171    len >>= 1;
172
173    if (len <= basis) {
174        int k1, k2, stride, even_idx, odd_idx;
175
176        is_dual = is_dual && dual_stride;
177        dual_high = is_dual & dual_high;
178        stride = is_dual ? FFMIN(dual_stride, len) : 0;
179
180        even_idx = offset + dual_high*(stride - 2*len);
181        odd_idx  = even_idx + len + (is_dual && !dual_high)*len + dual_high*len;
182
183        for (int i = 0; i < len; i++) {
184            k1 = -split_radix_permutation(offset + i*2 + 0, n, inv) & (n - 1);
185            k2 = -split_radix_permutation(offset + i*2 + 1, n, inv) & (n - 1);
186            if (inv_lookup) {
187                revtab[even_idx++] = k1;
188                revtab[odd_idx++]  = k2;
189            } else {
190                revtab[k1] = even_idx++;
191                revtab[k2] = odd_idx++;
192            }
193            if (stride && !((i + 1) % stride)) {
194                even_idx += stride;
195                odd_idx  += stride;
196            }
197        }
198
199        return;
200    }
201
202    parity_revtab_generator(revtab, n, inv, offset,
203                            0, 0, len >> 0, basis, dual_stride, inv_lookup);
204    parity_revtab_generator(revtab, n, inv, offset + (len >> 0),
205                            1, 0, len >> 1, basis, dual_stride, inv_lookup);
206    parity_revtab_generator(revtab, n, inv, offset + (len >> 0) + (len >> 1),
207                            1, 1, len >> 1, basis, dual_stride, inv_lookup);
208}
209
210int ff_tx_gen_split_radix_parity_revtab(AVTXContext *s, int invert_lookup,
211                                        int basis, int dual_stride)
212{
213    int len = s->len;
214    int inv = s->inv;
215
216    if (!(s->map = av_mallocz(len*sizeof(*s->map))))
217        return AVERROR(ENOMEM);
218
219    basis >>= 1;
220    if (len < basis)
221        return AVERROR(EINVAL);
222
223    av_assert0(!dual_stride || !(dual_stride & (dual_stride - 1)));
224    av_assert0(dual_stride <= basis);
225    parity_revtab_generator(s->map, len, inv, 0, 0, 0, len,
226                            basis, dual_stride, invert_lookup);
227
228    return 0;
229}
230
231static void reset_ctx(AVTXContext *s)
232{
233    if (!s)
234        return;
235
236    if (s->sub)
237        for (int i = 0; i < s->nb_sub; i++)
238            reset_ctx(&s->sub[i]);
239
240    if (s->cd_self->uninit)
241        s->cd_self->uninit(s);
242
243    av_freep(&s->sub);
244    av_freep(&s->map);
245    av_freep(&s->exp);
246    av_freep(&s->tmp);
247
248    memset(s, 0, sizeof(*s));
249}
250
251av_cold void av_tx_uninit(AVTXContext **ctx)
252{
253    if (!(*ctx))
254        return;
255
256    reset_ctx(*ctx);
257    av_freep(ctx);
258}
259
260static av_cold int ff_tx_null_init(AVTXContext *s, const FFTXCodelet *cd,
261                                   uint64_t flags, FFTXCodeletOptions *opts,
262                                   int len, int inv, const void *scale)
263{
264    /* Can only handle one sample+type to one sample+type transforms */
265    if (TYPE_IS(MDCT, s->type) || TYPE_IS(RDFT, s->type))
266        return AVERROR(EINVAL);
267    return 0;
268}
269
270/* Null transform when the length is 1 */
271static void ff_tx_null(AVTXContext *s, void *_out, void *_in, ptrdiff_t stride)
272{
273    memcpy(_out, _in, stride);
274}
275
276static const FFTXCodelet ff_tx_null_def = {
277    .name       = NULL_IF_CONFIG_SMALL("null"),
278    .function   = ff_tx_null,
279    .type       = TX_TYPE_ANY,
280    .flags      = AV_TX_UNALIGNED | FF_TX_ALIGNED |
281                  FF_TX_OUT_OF_PLACE | AV_TX_INPLACE,
282    .factors[0] = TX_FACTOR_ANY,
283    .min_len    = 1,
284    .max_len    = 1,
285    .init       = ff_tx_null_init,
286    .cpu_flags  = FF_TX_CPU_FLAGS_ALL,
287    .prio       = FF_TX_PRIO_MAX,
288};
289
290static const FFTXCodelet * const ff_tx_null_list[] = {
291    &ff_tx_null_def,
292    NULL,
293};
294
295#if !CONFIG_SMALL
296static void print_flags(AVBPrint *bp, uint64_t f)
297{
298    int prev = 0;
299    const char *sep = ", ";
300    av_bprintf(bp, "flags: [");
301    if ((f & FF_TX_ALIGNED) && ++prev)
302        av_bprintf(bp, "aligned");
303    if ((f & AV_TX_UNALIGNED) && ++prev)
304        av_bprintf(bp, "%sunaligned", prev > 1 ? sep : "");
305    if ((f & AV_TX_INPLACE) && ++prev)
306        av_bprintf(bp, "%sinplace", prev > 1 ? sep : "");
307    if ((f & FF_TX_OUT_OF_PLACE) && ++prev)
308        av_bprintf(bp, "%sout_of_place", prev > 1 ? sep : "");
309    if ((f & FF_TX_FORWARD_ONLY) && ++prev)
310        av_bprintf(bp, "%sfwd_only", prev > 1 ? sep : "");
311    if ((f & FF_TX_INVERSE_ONLY) && ++prev)
312        av_bprintf(bp, "%sinv_only", prev > 1 ? sep : "");
313    if ((f & FF_TX_PRESHUFFLE) && ++prev)
314        av_bprintf(bp, "%spreshuf", prev > 1 ? sep : "");
315    if ((f & AV_TX_FULL_IMDCT) && ++prev)
316        av_bprintf(bp, "%simdct_full", prev > 1 ? sep : "");
317    av_bprintf(bp, "]");
318}
319
320static void print_type(AVBPrint *bp, enum AVTXType type)
321{
322    av_bprintf(bp, "%s",
323               type == TX_TYPE_ANY       ? "any"         :
324               type == AV_TX_FLOAT_FFT   ? "fft_float"   :
325               type == AV_TX_FLOAT_MDCT  ? "mdct_float"  :
326               type == AV_TX_FLOAT_RDFT  ? "rdft_float"  :
327               type == AV_TX_DOUBLE_FFT  ? "fft_double"  :
328               type == AV_TX_DOUBLE_MDCT ? "mdct_double" :
329               type == AV_TX_DOUBLE_RDFT ? "rdft_double" :
330               type == AV_TX_INT32_FFT   ? "fft_int32"   :
331               type == AV_TX_INT32_MDCT  ? "mdct_int32"  :
332               type == AV_TX_INT32_RDFT  ? "rdft_int32"  :
333               "unknown");
334}
335
336static void print_cd_info(const FFTXCodelet *cd, int prio, int print_prio)
337{
338    AVBPrint bp = { 0 };
339    av_bprint_init(&bp, 0, AV_BPRINT_SIZE_AUTOMATIC);
340
341    av_bprintf(&bp, "%s - type: ", cd->name);
342
343    print_type(&bp, cd->type);
344
345    av_bprintf(&bp, ", len: ");
346    if (cd->min_len != cd->max_len)
347        av_bprintf(&bp, "[%i, ", cd->min_len);
348
349    if (cd->max_len == TX_LEN_UNLIMITED)
350        av_bprintf(&bp, "∞");
351    else
352        av_bprintf(&bp, "%i", cd->max_len);
353
354    av_bprintf(&bp, "%s, factors: [", cd->min_len != cd->max_len ? "]" : "");
355    for (int i = 0; i < TX_MAX_SUB; i++) {
356        if (i && cd->factors[i])
357            av_bprintf(&bp, ", ");
358        if (cd->factors[i] == TX_FACTOR_ANY)
359            av_bprintf(&bp, "any");
360        else if (cd->factors[i])
361            av_bprintf(&bp, "%i", cd->factors[i]);
362        else
363            break;
364    }
365
366    av_bprintf(&bp, "], ");
367    print_flags(&bp, cd->flags);
368
369    if (print_prio)
370        av_bprintf(&bp, ", prio: %i", prio);
371
372    av_log(NULL, AV_LOG_VERBOSE, "%s\n", bp.str);
373}
374
375static void print_tx_structure(AVTXContext *s, int depth)
376{
377    const FFTXCodelet *cd = s->cd_self;
378
379    for (int i = 0; i <= depth; i++)
380        av_log(NULL, AV_LOG_VERBOSE, "    ");
381
382    print_cd_info(cd, cd->prio, 0);
383
384    for (int i = 0; i < s->nb_sub; i++)
385        print_tx_structure(&s->sub[i], depth + 1);
386}
387#endif /* CONFIG_SMALL */
388
389typedef struct TXCodeletMatch {
390    const FFTXCodelet *cd;
391    int prio;
392} TXCodeletMatch;
393
394static int cmp_matches(TXCodeletMatch *a, TXCodeletMatch *b)
395{
396    return FFDIFFSIGN(b->prio, a->prio);
397}
398
399/* We want all factors to completely cover the length */
400static inline int check_cd_factors(const FFTXCodelet *cd, int len)
401{
402    int all_flag = 0;
403
404    for (int i = 0; i < TX_MAX_SUB; i++) {
405        int factor = cd->factors[i];
406
407        /* Conditions satisfied */
408        if (len == 1)
409            return 1;
410
411        /* No more factors */
412        if (!factor) {
413            break;
414        } else if (factor == TX_FACTOR_ANY) {
415            all_flag = 1;
416            continue;
417        }
418
419        if (factor == 2) { /* Fast path */
420            int bits_2 = ff_ctz(len);
421            if (!bits_2)
422                return 0; /* Factor not supported */
423
424            len >>= bits_2;
425        } else {
426            int res = len % factor;
427            if (res)
428                return 0; /* Factor not supported */
429
430            while (!res) {
431                len /= factor;
432                res = len % factor;
433            }
434        }
435    }
436
437    return all_flag || (len == 1);
438}
439
440av_cold int ff_tx_init_subtx(AVTXContext *s, enum AVTXType type,
441                             uint64_t flags, FFTXCodeletOptions *opts,
442                             int len, int inv, const void *scale)
443{
444    int ret = 0;
445    AVTXContext *sub = NULL;
446    TXCodeletMatch *cd_tmp, *cd_matches = NULL;
447    unsigned int cd_matches_size = 0;
448    int nb_cd_matches = 0;
449#if !CONFIG_SMALL
450    AVBPrint bp = { 0 };
451#endif
452
453    /* Array of all compiled codelet lists. Order is irrelevant. */
454    const FFTXCodelet * const * const codelet_list[] = {
455        ff_tx_codelet_list_float_c,
456        ff_tx_codelet_list_double_c,
457        ff_tx_codelet_list_int32_c,
458        ff_tx_null_list,
459#if HAVE_X86ASM
460        ff_tx_codelet_list_float_x86,
461#endif
462    };
463    int codelet_list_num = FF_ARRAY_ELEMS(codelet_list);
464
465    /* We still accept functions marked with SLOW, even if the CPU is
466     * marked with the same flag, but we give them lower priority. */
467    const int cpu_flags = av_get_cpu_flags();
468    const int slow_mask = AV_CPU_FLAG_SSE2SLOW | AV_CPU_FLAG_SSE3SLOW  |
469                          AV_CPU_FLAG_ATOM     | AV_CPU_FLAG_SSSE3SLOW |
470                          AV_CPU_FLAG_AVXSLOW  | AV_CPU_FLAG_SLOW_GATHER;
471
472    static const int slow_penalties[][2] = {
473        { AV_CPU_FLAG_SSE2SLOW,    1 + 64  },
474        { AV_CPU_FLAG_SSE3SLOW,    1 + 64  },
475        { AV_CPU_FLAG_SSSE3SLOW,   1 + 64  },
476        { AV_CPU_FLAG_ATOM,        1 + 128 },
477        { AV_CPU_FLAG_AVXSLOW,     1 + 128 },
478        { AV_CPU_FLAG_SLOW_GATHER, 1 + 32  },
479    };
480
481    /* Flags the transform wants */
482    uint64_t req_flags = flags;
483
484    /* Flags the codelet may require to be present */
485    uint64_t inv_req_mask = AV_TX_FULL_IMDCT | FF_TX_PRESHUFFLE;
486
487    /* Unaligned codelets are compatible with the aligned flag */
488    if (req_flags & FF_TX_ALIGNED)
489        req_flags |= AV_TX_UNALIGNED;
490
491    /* If either flag is set, both are okay, so don't check for an exact match */
492    if ((req_flags & AV_TX_INPLACE) && (req_flags & FF_TX_OUT_OF_PLACE))
493        req_flags &= ~(AV_TX_INPLACE | FF_TX_OUT_OF_PLACE);
494    if ((req_flags & FF_TX_ALIGNED) && (req_flags & AV_TX_UNALIGNED))
495        req_flags &= ~(FF_TX_ALIGNED | AV_TX_UNALIGNED);
496
497    /* Loop through all codelets in all codelet lists to find matches
498     * to the requirements */
499    while (codelet_list_num--) {
500        const FFTXCodelet * const * list = codelet_list[codelet_list_num];
501        const FFTXCodelet *cd = NULL;
502
503        while ((cd = *list++)) {
504            int max_factor = 0;
505
506            /* Check if the type matches */
507            if (cd->type != TX_TYPE_ANY && type != cd->type)
508                continue;
509
510            /* Check direction for non-orthogonal codelets */
511            if (((cd->flags & FF_TX_FORWARD_ONLY) && inv) ||
512                ((cd->flags & (FF_TX_INVERSE_ONLY | AV_TX_FULL_IMDCT)) && !inv))
513                continue;
514
515            /* Check if the requested flags match from both sides */
516            if (((req_flags    & cd->flags) != (req_flags)) ||
517                ((inv_req_mask & cd->flags) != (req_flags & inv_req_mask)))
518                continue;
519
520            /* Check if length is supported */
521            if ((len < cd->min_len) || (cd->max_len != -1 && (len > cd->max_len)))
522                continue;
523
524            /* Check if the CPU supports the required ISA */
525            if (cd->cpu_flags != FF_TX_CPU_FLAGS_ALL &&
526                !(cpu_flags & (cd->cpu_flags & ~slow_mask)))
527                continue;
528
529            /* Check for factors */
530            if (!check_cd_factors(cd, len))
531                continue;
532
533            /* Realloc array and append */
534            cd_tmp = av_fast_realloc(cd_matches, &cd_matches_size,
535                                     sizeof(*cd_tmp) * (nb_cd_matches + 1));
536            if (!cd_tmp) {
537                av_free(cd_matches);
538                return AVERROR(ENOMEM);
539            }
540
541            cd_matches                     = cd_tmp;
542            cd_matches[nb_cd_matches].cd   = cd;
543            cd_matches[nb_cd_matches].prio = cd->prio;
544
545            /* If the CPU has a SLOW flag, and the instruction is also flagged
546             * as being slow for such, reduce its priority */
547            for (int i = 0; i < FF_ARRAY_ELEMS(slow_penalties); i++) {
548                if ((cpu_flags & cd->cpu_flags) & slow_penalties[i][0])
549                    cd_matches[nb_cd_matches].prio -= slow_penalties[i][1];
550            }
551
552            /* Prioritize aligned-only codelets */
553            if ((cd->flags & FF_TX_ALIGNED) && !(cd->flags & AV_TX_UNALIGNED))
554                cd_matches[nb_cd_matches].prio += 64;
555
556            /* Codelets for specific lengths are generally faster */
557            if ((len == cd->min_len) && (len == cd->max_len))
558                cd_matches[nb_cd_matches].prio += 64;
559
560            /* Forward-only or inverse-only transforms are generally better */
561            if ((cd->flags & (FF_TX_FORWARD_ONLY | FF_TX_INVERSE_ONLY)))
562                cd_matches[nb_cd_matches].prio += 64;
563
564            /* Larger factors are generally better */
565            for (int i = 0; i < TX_MAX_SUB; i++)
566                max_factor = FFMAX(cd->factors[i], max_factor);
567            if (max_factor)
568                cd_matches[nb_cd_matches].prio += 16*max_factor;
569
570            nb_cd_matches++;
571        }
572    }
573
574#if !CONFIG_SMALL
575    /* Print debugging info */
576    av_bprint_init(&bp, 0, AV_BPRINT_SIZE_AUTOMATIC);
577    av_bprintf(&bp, "For transform of length %i, %s, ", len,
578               inv ? "inverse" : "forward");
579    print_type(&bp, type);
580    av_bprintf(&bp, ", ");
581    print_flags(&bp, flags);
582    av_bprintf(&bp, ", found %i matches%s", nb_cd_matches,
583               nb_cd_matches ? ":" : ".");
584#endif
585
586    /* No matches found */
587    if (!nb_cd_matches)
588        return AVERROR(ENOSYS);
589
590    /* Sort the list */
591    AV_QSORT(cd_matches, nb_cd_matches, TXCodeletMatch, cmp_matches);
592
593#if !CONFIG_SMALL
594    av_log(NULL, AV_LOG_VERBOSE, "%s\n", bp.str);
595
596    for (int i = 0; i < nb_cd_matches; i++) {
597        av_log(NULL, AV_LOG_VERBOSE, "    %i: ", i + 1);
598        print_cd_info(cd_matches[i].cd, cd_matches[i].prio, 1);
599    }
600#endif
601
602    if (!s->sub) {
603        s->sub = sub = av_mallocz(TX_MAX_SUB*sizeof(*sub));
604        if (!sub) {
605            ret = AVERROR(ENOMEM);
606            goto end;
607        }
608    }
609
610    /* Attempt to initialize each */
611    for (int i = 0; i < nb_cd_matches; i++) {
612        const FFTXCodelet *cd = cd_matches[i].cd;
613        AVTXContext *sctx = &s->sub[s->nb_sub];
614
615        sctx->len        = len;
616        sctx->inv        = inv;
617        sctx->type       = type;
618        sctx->flags      = flags;
619        sctx->cd_self    = cd;
620
621        s->fn[s->nb_sub] = cd->function;
622        s->cd[s->nb_sub] = cd;
623
624        ret = 0;
625        if (cd->init)
626            ret = cd->init(sctx, cd, flags, opts, len, inv, scale);
627
628        if (ret >= 0) {
629            s->nb_sub++;
630            goto end;
631        }
632
633        s->fn[s->nb_sub] = NULL;
634        s->cd[s->nb_sub] = NULL;
635
636        reset_ctx(sctx);
637        if (ret == AVERROR(ENOMEM))
638            break;
639    }
640
641    if (!s->nb_sub)
642        av_freep(&s->sub);
643
644end:
645    av_free(cd_matches);
646    return ret;
647}
648
649av_cold int av_tx_init(AVTXContext **ctx, av_tx_fn *tx, enum AVTXType type,
650                       int inv, int len, const void *scale, uint64_t flags)
651{
652    int ret;
653    AVTXContext tmp = { 0 };
654    const double default_scale_d = 1.0;
655    const float  default_scale_f = 1.0f;
656
657    if (!len || type >= AV_TX_NB || !ctx || !tx)
658        return AVERROR(EINVAL);
659
660    if (!(flags & AV_TX_UNALIGNED))
661        flags |= FF_TX_ALIGNED;
662    if (!(flags & AV_TX_INPLACE))
663        flags |= FF_TX_OUT_OF_PLACE;
664
665    if (!scale && ((type == AV_TX_FLOAT_MDCT) || (type == AV_TX_INT32_MDCT)))
666        scale = &default_scale_f;
667    else if (!scale && (type == AV_TX_DOUBLE_MDCT))
668        scale = &default_scale_d;
669
670    ret = ff_tx_init_subtx(&tmp, type, flags, NULL, len, inv, scale);
671    if (ret < 0)
672        return ret;
673
674    *ctx = &tmp.sub[0];
675    *tx  = tmp.fn[0];
676
677#if !CONFIG_SMALL
678    av_log(NULL, AV_LOG_VERBOSE, "Transform tree:\n");
679    print_tx_structure(*ctx, 0);
680#endif
681
682    return ret;
683}
684