1/* 2 * This file is part of FFmpeg. 3 * 4 * FFmpeg is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU Lesser General Public 6 * License as published by the Free Software Foundation; either 7 * version 2.1 of the License, or (at your option) any later version. 8 * 9 * FFmpeg is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 * Lesser General Public License for more details. 13 * 14 * You should have received a copy of the GNU Lesser General Public 15 * License along with FFmpeg; if not, write to the Free Software 16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 19#include "cpu.h" 20#include "qsort.h" 21#include "bprint.h" 22 23#include "tx_priv.h" 24 25#define TYPE_IS(type, x) \ 26 (((x) == AV_TX_FLOAT_ ## type) || \ 27 ((x) == AV_TX_DOUBLE_ ## type) || \ 28 ((x) == AV_TX_INT32_ ## type)) 29 30/* Calculates the modular multiplicative inverse */ 31static av_always_inline int mulinv(int n, int m) 32{ 33 n = n % m; 34 for (int x = 1; x < m; x++) 35 if (((n * x) % m) == 1) 36 return x; 37 av_assert0(0); /* Never reached */ 38 return 0; 39} 40 41/* Guaranteed to work for any n, m where gcd(n, m) == 1 */ 42int ff_tx_gen_compound_mapping(AVTXContext *s, int n, int m) 43{ 44 int *in_map, *out_map; 45 const int inv = s->inv; 46 const int len = n*m; /* Will not be equal to s->len for MDCTs */ 47 const int mdct = TYPE_IS(MDCT, s->type); 48 int m_inv, n_inv; 49 50 /* Make sure the numbers are coprime */ 51 if (av_gcd(n, m) != 1) 52 return AVERROR(EINVAL); 53 54 m_inv = mulinv(m, n); 55 n_inv = mulinv(n, m); 56 57 if (!(s->map = av_malloc(2*len*sizeof(*s->map)))) 58 return AVERROR(ENOMEM); 59 60 in_map = s->map; 61 out_map = s->map + len; 62 63 /* Ruritanian map for input, CRT map for output, can be swapped */ 64 for (int j = 0; j < m; j++) { 65 for (int i = 0; i < n; i++) { 66 /* Shifted by 1 to simplify MDCTs */ 67 in_map[j*n + i] = ((i*m + j*n) % len) << mdct; 68 out_map[(i*m*m_inv + j*n*n_inv) % len] = i*m + j; 69 } 70 } 71 72 /* Change transform direction by reversing all ACs */ 73 if (inv) { 74 for (int i = 0; i < m; i++) { 75 int *in = &in_map[i*n + 1]; /* Skip the DC */ 76 for (int j = 0; j < ((n - 1) >> 1); j++) 77 FFSWAP(int, in[j], in[n - j - 2]); 78 } 79 } 80 81 /* Our 15-point transform is also a compound one, so embed its input map */ 82 if (n == 15) { 83 for (int k = 0; k < m; k++) { 84 int tmp[15]; 85 memcpy(tmp, &in_map[k*15], 15*sizeof(*tmp)); 86 for (int i = 0; i < 5; i++) { 87 for (int j = 0; j < 3; j++) 88 in_map[k*15 + i*3 + j] = tmp[(i*3 + j*5) % 15]; 89 } 90 } 91 } 92 93 return 0; 94} 95 96static inline int split_radix_permutation(int i, int len, int inv) 97{ 98 len >>= 1; 99 if (len <= 1) 100 return i & 1; 101 if (!(i & len)) 102 return split_radix_permutation(i, len, inv) * 2; 103 len >>= 1; 104 return split_radix_permutation(i, len, inv) * 4 + 1 - 2*(!(i & len) ^ inv); 105} 106 107int ff_tx_gen_ptwo_revtab(AVTXContext *s, int invert_lookup) 108{ 109 int len = s->len; 110 111 if (!(s->map = av_malloc(len*sizeof(*s->map)))) 112 return AVERROR(ENOMEM); 113 114 if (invert_lookup) { 115 for (int i = 0; i < s->len; i++) 116 s->map[i] = -split_radix_permutation(i, len, s->inv) & (len - 1); 117 } else { 118 for (int i = 0; i < s->len; i++) 119 s->map[-split_radix_permutation(i, len, s->inv) & (len - 1)] = i; 120 } 121 122 return 0; 123} 124 125int ff_tx_gen_ptwo_inplace_revtab_idx(AVTXContext *s) 126{ 127 int *src_map, out_map_idx = 0, len = s->len; 128 129 if (!s->sub || !s->sub->map) 130 return AVERROR(EINVAL); 131 132 if (!(s->map = av_mallocz(len*sizeof(*s->map)))) 133 return AVERROR(ENOMEM); 134 135 src_map = s->sub->map; 136 137 /* The first coefficient is always already in-place */ 138 for (int src = 1; src < s->len; src++) { 139 int dst = src_map[src]; 140 int found = 0; 141 142 if (dst <= src) 143 continue; 144 145 /* This just checks if a closed loop has been encountered before, 146 * and if so, skips it, since to fully permute a loop we must only 147 * enter it once. */ 148 do { 149 for (int j = 0; j < out_map_idx; j++) { 150 if (dst == s->map[j]) { 151 found = 1; 152 break; 153 } 154 } 155 dst = src_map[dst]; 156 } while (dst != src && !found); 157 158 if (!found) 159 s->map[out_map_idx++] = src; 160 } 161 162 s->map[out_map_idx++] = 0; 163 164 return 0; 165} 166 167static void parity_revtab_generator(int *revtab, int n, int inv, int offset, 168 int is_dual, int dual_high, int len, 169 int basis, int dual_stride, int inv_lookup) 170{ 171 len >>= 1; 172 173 if (len <= basis) { 174 int k1, k2, stride, even_idx, odd_idx; 175 176 is_dual = is_dual && dual_stride; 177 dual_high = is_dual & dual_high; 178 stride = is_dual ? FFMIN(dual_stride, len) : 0; 179 180 even_idx = offset + dual_high*(stride - 2*len); 181 odd_idx = even_idx + len + (is_dual && !dual_high)*len + dual_high*len; 182 183 for (int i = 0; i < len; i++) { 184 k1 = -split_radix_permutation(offset + i*2 + 0, n, inv) & (n - 1); 185 k2 = -split_radix_permutation(offset + i*2 + 1, n, inv) & (n - 1); 186 if (inv_lookup) { 187 revtab[even_idx++] = k1; 188 revtab[odd_idx++] = k2; 189 } else { 190 revtab[k1] = even_idx++; 191 revtab[k2] = odd_idx++; 192 } 193 if (stride && !((i + 1) % stride)) { 194 even_idx += stride; 195 odd_idx += stride; 196 } 197 } 198 199 return; 200 } 201 202 parity_revtab_generator(revtab, n, inv, offset, 203 0, 0, len >> 0, basis, dual_stride, inv_lookup); 204 parity_revtab_generator(revtab, n, inv, offset + (len >> 0), 205 1, 0, len >> 1, basis, dual_stride, inv_lookup); 206 parity_revtab_generator(revtab, n, inv, offset + (len >> 0) + (len >> 1), 207 1, 1, len >> 1, basis, dual_stride, inv_lookup); 208} 209 210int ff_tx_gen_split_radix_parity_revtab(AVTXContext *s, int invert_lookup, 211 int basis, int dual_stride) 212{ 213 int len = s->len; 214 int inv = s->inv; 215 216 if (!(s->map = av_mallocz(len*sizeof(*s->map)))) 217 return AVERROR(ENOMEM); 218 219 basis >>= 1; 220 if (len < basis) 221 return AVERROR(EINVAL); 222 223 av_assert0(!dual_stride || !(dual_stride & (dual_stride - 1))); 224 av_assert0(dual_stride <= basis); 225 parity_revtab_generator(s->map, len, inv, 0, 0, 0, len, 226 basis, dual_stride, invert_lookup); 227 228 return 0; 229} 230 231static void reset_ctx(AVTXContext *s) 232{ 233 if (!s) 234 return; 235 236 if (s->sub) 237 for (int i = 0; i < s->nb_sub; i++) 238 reset_ctx(&s->sub[i]); 239 240 if (s->cd_self->uninit) 241 s->cd_self->uninit(s); 242 243 av_freep(&s->sub); 244 av_freep(&s->map); 245 av_freep(&s->exp); 246 av_freep(&s->tmp); 247 248 memset(s, 0, sizeof(*s)); 249} 250 251av_cold void av_tx_uninit(AVTXContext **ctx) 252{ 253 if (!(*ctx)) 254 return; 255 256 reset_ctx(*ctx); 257 av_freep(ctx); 258} 259 260static av_cold int ff_tx_null_init(AVTXContext *s, const FFTXCodelet *cd, 261 uint64_t flags, FFTXCodeletOptions *opts, 262 int len, int inv, const void *scale) 263{ 264 /* Can only handle one sample+type to one sample+type transforms */ 265 if (TYPE_IS(MDCT, s->type) || TYPE_IS(RDFT, s->type)) 266 return AVERROR(EINVAL); 267 return 0; 268} 269 270/* Null transform when the length is 1 */ 271static void ff_tx_null(AVTXContext *s, void *_out, void *_in, ptrdiff_t stride) 272{ 273 memcpy(_out, _in, stride); 274} 275 276static const FFTXCodelet ff_tx_null_def = { 277 .name = NULL_IF_CONFIG_SMALL("null"), 278 .function = ff_tx_null, 279 .type = TX_TYPE_ANY, 280 .flags = AV_TX_UNALIGNED | FF_TX_ALIGNED | 281 FF_TX_OUT_OF_PLACE | AV_TX_INPLACE, 282 .factors[0] = TX_FACTOR_ANY, 283 .min_len = 1, 284 .max_len = 1, 285 .init = ff_tx_null_init, 286 .cpu_flags = FF_TX_CPU_FLAGS_ALL, 287 .prio = FF_TX_PRIO_MAX, 288}; 289 290static const FFTXCodelet * const ff_tx_null_list[] = { 291 &ff_tx_null_def, 292 NULL, 293}; 294 295#if !CONFIG_SMALL 296static void print_flags(AVBPrint *bp, uint64_t f) 297{ 298 int prev = 0; 299 const char *sep = ", "; 300 av_bprintf(bp, "flags: ["); 301 if ((f & FF_TX_ALIGNED) && ++prev) 302 av_bprintf(bp, "aligned"); 303 if ((f & AV_TX_UNALIGNED) && ++prev) 304 av_bprintf(bp, "%sunaligned", prev > 1 ? sep : ""); 305 if ((f & AV_TX_INPLACE) && ++prev) 306 av_bprintf(bp, "%sinplace", prev > 1 ? sep : ""); 307 if ((f & FF_TX_OUT_OF_PLACE) && ++prev) 308 av_bprintf(bp, "%sout_of_place", prev > 1 ? sep : ""); 309 if ((f & FF_TX_FORWARD_ONLY) && ++prev) 310 av_bprintf(bp, "%sfwd_only", prev > 1 ? sep : ""); 311 if ((f & FF_TX_INVERSE_ONLY) && ++prev) 312 av_bprintf(bp, "%sinv_only", prev > 1 ? sep : ""); 313 if ((f & FF_TX_PRESHUFFLE) && ++prev) 314 av_bprintf(bp, "%spreshuf", prev > 1 ? sep : ""); 315 if ((f & AV_TX_FULL_IMDCT) && ++prev) 316 av_bprintf(bp, "%simdct_full", prev > 1 ? sep : ""); 317 av_bprintf(bp, "]"); 318} 319 320static void print_type(AVBPrint *bp, enum AVTXType type) 321{ 322 av_bprintf(bp, "%s", 323 type == TX_TYPE_ANY ? "any" : 324 type == AV_TX_FLOAT_FFT ? "fft_float" : 325 type == AV_TX_FLOAT_MDCT ? "mdct_float" : 326 type == AV_TX_FLOAT_RDFT ? "rdft_float" : 327 type == AV_TX_DOUBLE_FFT ? "fft_double" : 328 type == AV_TX_DOUBLE_MDCT ? "mdct_double" : 329 type == AV_TX_DOUBLE_RDFT ? "rdft_double" : 330 type == AV_TX_INT32_FFT ? "fft_int32" : 331 type == AV_TX_INT32_MDCT ? "mdct_int32" : 332 type == AV_TX_INT32_RDFT ? "rdft_int32" : 333 "unknown"); 334} 335 336static void print_cd_info(const FFTXCodelet *cd, int prio, int print_prio) 337{ 338 AVBPrint bp = { 0 }; 339 av_bprint_init(&bp, 0, AV_BPRINT_SIZE_AUTOMATIC); 340 341 av_bprintf(&bp, "%s - type: ", cd->name); 342 343 print_type(&bp, cd->type); 344 345 av_bprintf(&bp, ", len: "); 346 if (cd->min_len != cd->max_len) 347 av_bprintf(&bp, "[%i, ", cd->min_len); 348 349 if (cd->max_len == TX_LEN_UNLIMITED) 350 av_bprintf(&bp, "∞"); 351 else 352 av_bprintf(&bp, "%i", cd->max_len); 353 354 av_bprintf(&bp, "%s, factors: [", cd->min_len != cd->max_len ? "]" : ""); 355 for (int i = 0; i < TX_MAX_SUB; i++) { 356 if (i && cd->factors[i]) 357 av_bprintf(&bp, ", "); 358 if (cd->factors[i] == TX_FACTOR_ANY) 359 av_bprintf(&bp, "any"); 360 else if (cd->factors[i]) 361 av_bprintf(&bp, "%i", cd->factors[i]); 362 else 363 break; 364 } 365 366 av_bprintf(&bp, "], "); 367 print_flags(&bp, cd->flags); 368 369 if (print_prio) 370 av_bprintf(&bp, ", prio: %i", prio); 371 372 av_log(NULL, AV_LOG_VERBOSE, "%s\n", bp.str); 373} 374 375static void print_tx_structure(AVTXContext *s, int depth) 376{ 377 const FFTXCodelet *cd = s->cd_self; 378 379 for (int i = 0; i <= depth; i++) 380 av_log(NULL, AV_LOG_VERBOSE, " "); 381 382 print_cd_info(cd, cd->prio, 0); 383 384 for (int i = 0; i < s->nb_sub; i++) 385 print_tx_structure(&s->sub[i], depth + 1); 386} 387#endif /* CONFIG_SMALL */ 388 389typedef struct TXCodeletMatch { 390 const FFTXCodelet *cd; 391 int prio; 392} TXCodeletMatch; 393 394static int cmp_matches(TXCodeletMatch *a, TXCodeletMatch *b) 395{ 396 return FFDIFFSIGN(b->prio, a->prio); 397} 398 399/* We want all factors to completely cover the length */ 400static inline int check_cd_factors(const FFTXCodelet *cd, int len) 401{ 402 int all_flag = 0; 403 404 for (int i = 0; i < TX_MAX_SUB; i++) { 405 int factor = cd->factors[i]; 406 407 /* Conditions satisfied */ 408 if (len == 1) 409 return 1; 410 411 /* No more factors */ 412 if (!factor) { 413 break; 414 } else if (factor == TX_FACTOR_ANY) { 415 all_flag = 1; 416 continue; 417 } 418 419 if (factor == 2) { /* Fast path */ 420 int bits_2 = ff_ctz(len); 421 if (!bits_2) 422 return 0; /* Factor not supported */ 423 424 len >>= bits_2; 425 } else { 426 int res = len % factor; 427 if (res) 428 return 0; /* Factor not supported */ 429 430 while (!res) { 431 len /= factor; 432 res = len % factor; 433 } 434 } 435 } 436 437 return all_flag || (len == 1); 438} 439 440av_cold int ff_tx_init_subtx(AVTXContext *s, enum AVTXType type, 441 uint64_t flags, FFTXCodeletOptions *opts, 442 int len, int inv, const void *scale) 443{ 444 int ret = 0; 445 AVTXContext *sub = NULL; 446 TXCodeletMatch *cd_tmp, *cd_matches = NULL; 447 unsigned int cd_matches_size = 0; 448 int nb_cd_matches = 0; 449#if !CONFIG_SMALL 450 AVBPrint bp = { 0 }; 451#endif 452 453 /* Array of all compiled codelet lists. Order is irrelevant. */ 454 const FFTXCodelet * const * const codelet_list[] = { 455 ff_tx_codelet_list_float_c, 456 ff_tx_codelet_list_double_c, 457 ff_tx_codelet_list_int32_c, 458 ff_tx_null_list, 459#if HAVE_X86ASM 460 ff_tx_codelet_list_float_x86, 461#endif 462 }; 463 int codelet_list_num = FF_ARRAY_ELEMS(codelet_list); 464 465 /* We still accept functions marked with SLOW, even if the CPU is 466 * marked with the same flag, but we give them lower priority. */ 467 const int cpu_flags = av_get_cpu_flags(); 468 const int slow_mask = AV_CPU_FLAG_SSE2SLOW | AV_CPU_FLAG_SSE3SLOW | 469 AV_CPU_FLAG_ATOM | AV_CPU_FLAG_SSSE3SLOW | 470 AV_CPU_FLAG_AVXSLOW | AV_CPU_FLAG_SLOW_GATHER; 471 472 static const int slow_penalties[][2] = { 473 { AV_CPU_FLAG_SSE2SLOW, 1 + 64 }, 474 { AV_CPU_FLAG_SSE3SLOW, 1 + 64 }, 475 { AV_CPU_FLAG_SSSE3SLOW, 1 + 64 }, 476 { AV_CPU_FLAG_ATOM, 1 + 128 }, 477 { AV_CPU_FLAG_AVXSLOW, 1 + 128 }, 478 { AV_CPU_FLAG_SLOW_GATHER, 1 + 32 }, 479 }; 480 481 /* Flags the transform wants */ 482 uint64_t req_flags = flags; 483 484 /* Flags the codelet may require to be present */ 485 uint64_t inv_req_mask = AV_TX_FULL_IMDCT | FF_TX_PRESHUFFLE; 486 487 /* Unaligned codelets are compatible with the aligned flag */ 488 if (req_flags & FF_TX_ALIGNED) 489 req_flags |= AV_TX_UNALIGNED; 490 491 /* If either flag is set, both are okay, so don't check for an exact match */ 492 if ((req_flags & AV_TX_INPLACE) && (req_flags & FF_TX_OUT_OF_PLACE)) 493 req_flags &= ~(AV_TX_INPLACE | FF_TX_OUT_OF_PLACE); 494 if ((req_flags & FF_TX_ALIGNED) && (req_flags & AV_TX_UNALIGNED)) 495 req_flags &= ~(FF_TX_ALIGNED | AV_TX_UNALIGNED); 496 497 /* Loop through all codelets in all codelet lists to find matches 498 * to the requirements */ 499 while (codelet_list_num--) { 500 const FFTXCodelet * const * list = codelet_list[codelet_list_num]; 501 const FFTXCodelet *cd = NULL; 502 503 while ((cd = *list++)) { 504 int max_factor = 0; 505 506 /* Check if the type matches */ 507 if (cd->type != TX_TYPE_ANY && type != cd->type) 508 continue; 509 510 /* Check direction for non-orthogonal codelets */ 511 if (((cd->flags & FF_TX_FORWARD_ONLY) && inv) || 512 ((cd->flags & (FF_TX_INVERSE_ONLY | AV_TX_FULL_IMDCT)) && !inv)) 513 continue; 514 515 /* Check if the requested flags match from both sides */ 516 if (((req_flags & cd->flags) != (req_flags)) || 517 ((inv_req_mask & cd->flags) != (req_flags & inv_req_mask))) 518 continue; 519 520 /* Check if length is supported */ 521 if ((len < cd->min_len) || (cd->max_len != -1 && (len > cd->max_len))) 522 continue; 523 524 /* Check if the CPU supports the required ISA */ 525 if (cd->cpu_flags != FF_TX_CPU_FLAGS_ALL && 526 !(cpu_flags & (cd->cpu_flags & ~slow_mask))) 527 continue; 528 529 /* Check for factors */ 530 if (!check_cd_factors(cd, len)) 531 continue; 532 533 /* Realloc array and append */ 534 cd_tmp = av_fast_realloc(cd_matches, &cd_matches_size, 535 sizeof(*cd_tmp) * (nb_cd_matches + 1)); 536 if (!cd_tmp) { 537 av_free(cd_matches); 538 return AVERROR(ENOMEM); 539 } 540 541 cd_matches = cd_tmp; 542 cd_matches[nb_cd_matches].cd = cd; 543 cd_matches[nb_cd_matches].prio = cd->prio; 544 545 /* If the CPU has a SLOW flag, and the instruction is also flagged 546 * as being slow for such, reduce its priority */ 547 for (int i = 0; i < FF_ARRAY_ELEMS(slow_penalties); i++) { 548 if ((cpu_flags & cd->cpu_flags) & slow_penalties[i][0]) 549 cd_matches[nb_cd_matches].prio -= slow_penalties[i][1]; 550 } 551 552 /* Prioritize aligned-only codelets */ 553 if ((cd->flags & FF_TX_ALIGNED) && !(cd->flags & AV_TX_UNALIGNED)) 554 cd_matches[nb_cd_matches].prio += 64; 555 556 /* Codelets for specific lengths are generally faster */ 557 if ((len == cd->min_len) && (len == cd->max_len)) 558 cd_matches[nb_cd_matches].prio += 64; 559 560 /* Forward-only or inverse-only transforms are generally better */ 561 if ((cd->flags & (FF_TX_FORWARD_ONLY | FF_TX_INVERSE_ONLY))) 562 cd_matches[nb_cd_matches].prio += 64; 563 564 /* Larger factors are generally better */ 565 for (int i = 0; i < TX_MAX_SUB; i++) 566 max_factor = FFMAX(cd->factors[i], max_factor); 567 if (max_factor) 568 cd_matches[nb_cd_matches].prio += 16*max_factor; 569 570 nb_cd_matches++; 571 } 572 } 573 574#if !CONFIG_SMALL 575 /* Print debugging info */ 576 av_bprint_init(&bp, 0, AV_BPRINT_SIZE_AUTOMATIC); 577 av_bprintf(&bp, "For transform of length %i, %s, ", len, 578 inv ? "inverse" : "forward"); 579 print_type(&bp, type); 580 av_bprintf(&bp, ", "); 581 print_flags(&bp, flags); 582 av_bprintf(&bp, ", found %i matches%s", nb_cd_matches, 583 nb_cd_matches ? ":" : "."); 584#endif 585 586 /* No matches found */ 587 if (!nb_cd_matches) 588 return AVERROR(ENOSYS); 589 590 /* Sort the list */ 591 AV_QSORT(cd_matches, nb_cd_matches, TXCodeletMatch, cmp_matches); 592 593#if !CONFIG_SMALL 594 av_log(NULL, AV_LOG_VERBOSE, "%s\n", bp.str); 595 596 for (int i = 0; i < nb_cd_matches; i++) { 597 av_log(NULL, AV_LOG_VERBOSE, " %i: ", i + 1); 598 print_cd_info(cd_matches[i].cd, cd_matches[i].prio, 1); 599 } 600#endif 601 602 if (!s->sub) { 603 s->sub = sub = av_mallocz(TX_MAX_SUB*sizeof(*sub)); 604 if (!sub) { 605 ret = AVERROR(ENOMEM); 606 goto end; 607 } 608 } 609 610 /* Attempt to initialize each */ 611 for (int i = 0; i < nb_cd_matches; i++) { 612 const FFTXCodelet *cd = cd_matches[i].cd; 613 AVTXContext *sctx = &s->sub[s->nb_sub]; 614 615 sctx->len = len; 616 sctx->inv = inv; 617 sctx->type = type; 618 sctx->flags = flags; 619 sctx->cd_self = cd; 620 621 s->fn[s->nb_sub] = cd->function; 622 s->cd[s->nb_sub] = cd; 623 624 ret = 0; 625 if (cd->init) 626 ret = cd->init(sctx, cd, flags, opts, len, inv, scale); 627 628 if (ret >= 0) { 629 s->nb_sub++; 630 goto end; 631 } 632 633 s->fn[s->nb_sub] = NULL; 634 s->cd[s->nb_sub] = NULL; 635 636 reset_ctx(sctx); 637 if (ret == AVERROR(ENOMEM)) 638 break; 639 } 640 641 if (!s->nb_sub) 642 av_freep(&s->sub); 643 644end: 645 av_free(cd_matches); 646 return ret; 647} 648 649av_cold int av_tx_init(AVTXContext **ctx, av_tx_fn *tx, enum AVTXType type, 650 int inv, int len, const void *scale, uint64_t flags) 651{ 652 int ret; 653 AVTXContext tmp = { 0 }; 654 const double default_scale_d = 1.0; 655 const float default_scale_f = 1.0f; 656 657 if (!len || type >= AV_TX_NB || !ctx || !tx) 658 return AVERROR(EINVAL); 659 660 if (!(flags & AV_TX_UNALIGNED)) 661 flags |= FF_TX_ALIGNED; 662 if (!(flags & AV_TX_INPLACE)) 663 flags |= FF_TX_OUT_OF_PLACE; 664 665 if (!scale && ((type == AV_TX_FLOAT_MDCT) || (type == AV_TX_INT32_MDCT))) 666 scale = &default_scale_f; 667 else if (!scale && (type == AV_TX_DOUBLE_MDCT)) 668 scale = &default_scale_d; 669 670 ret = ff_tx_init_subtx(&tmp, type, flags, NULL, len, inv, scale); 671 if (ret < 0) 672 return ret; 673 674 *ctx = &tmp.sub[0]; 675 *tx = tmp.fn[0]; 676 677#if !CONFIG_SMALL 678 av_log(NULL, AV_LOG_VERBOSE, "Transform tree:\n"); 679 print_tx_structure(*ctx, 0); 680#endif 681 682 return ret; 683} 684