1159b3361Sopenharmony_ci; back port from GOGO-no coda 2.24b by Takehiro TOMINAGA 2159b3361Sopenharmony_ci 3159b3361Sopenharmony_ci; GOGO-no-coda 4159b3361Sopenharmony_ci; Copyright (C) 1999 shigeo 5159b3361Sopenharmony_ci; special thanks to URURI 6159b3361Sopenharmony_ci 7159b3361Sopenharmony_ci%include "nasm.h" 8159b3361Sopenharmony_ci 9159b3361Sopenharmony_ci externdef costab_fft 10159b3361Sopenharmony_ci externdef sintab_fft 11159b3361Sopenharmony_ci 12159b3361Sopenharmony_ci segment_data 13159b3361Sopenharmony_ci align 32 14159b3361Sopenharmony_ciD_1_41421 dd 1.41421356 15159b3361Sopenharmony_ciD_1_0 dd 1.0 16159b3361Sopenharmony_ciD_0_5 dd 0.5 17159b3361Sopenharmony_ciD_0_25 dd 0.25 18159b3361Sopenharmony_ciD_0_0005 dd 0.0005 19159b3361Sopenharmony_ciD_0_0 dd 0.0 20159b3361Sopenharmony_ci 21159b3361Sopenharmony_ci segment_code 22159b3361Sopenharmony_ci 23159b3361Sopenharmony_ci;void fht(float *fz, int n); 24159b3361Sopenharmony_ciproc fht_FPU 25159b3361Sopenharmony_ci 26159b3361Sopenharmony_ci%$fz arg 4 27159b3361Sopenharmony_ci%$n arg 4 28159b3361Sopenharmony_ci 29159b3361Sopenharmony_ci%$k local 4 30159b3361Sopenharmony_ci 31159b3361Sopenharmony_ci%$f0 local 4 32159b3361Sopenharmony_ci%$f1 local 4 33159b3361Sopenharmony_ci%$f2 local 4 34159b3361Sopenharmony_ci%$f3 local 4 35159b3361Sopenharmony_ci 36159b3361Sopenharmony_ci%$g0 local 4 37159b3361Sopenharmony_ci%$g1 local 4 38159b3361Sopenharmony_ci%$g2 local 4 39159b3361Sopenharmony_ci%$g3 local 4 40159b3361Sopenharmony_ci 41159b3361Sopenharmony_ci%$s1 local 4 42159b3361Sopenharmony_ci%$c1 local 4 43159b3361Sopenharmony_ci%$s2 local 4 44159b3361Sopenharmony_ci%$c2 local 4 45159b3361Sopenharmony_ci 46159b3361Sopenharmony_ci%$t_s local 4 47159b3361Sopenharmony_ci%$t_c local 4 48159b3361Sopenharmony_ci alloc 49159b3361Sopenharmony_ci 50159b3361Sopenharmony_ci pushd ebp, ebx, esi, edi 51159b3361Sopenharmony_ci 52159b3361Sopenharmony_cifht_FPU_1st_part: 53159b3361Sopenharmony_ci 54159b3361Sopenharmony_cifht_FPU_2nd_part: 55159b3361Sopenharmony_ci 56159b3361Sopenharmony_cifht_FPU_3rd_part: 57159b3361Sopenharmony_ci 58159b3361Sopenharmony_ci.do_init: 59159b3361Sopenharmony_ci mov r3, 16 ;k1*fsize = 4*fsize = k4 60159b3361Sopenharmony_ci mov r4, 8 ;kx = k1/2 61159b3361Sopenharmony_ci mov r2, 48 ;k3*fsize 62159b3361Sopenharmony_ci mov dword [sp(%$k)], 2 ;k = 2 63159b3361Sopenharmony_ci mov r0, [sp(%$fz)] ;fi 64159b3361Sopenharmony_ci lea r1, [r0+8] ;gi = fi + kx 65159b3361Sopenharmony_ci 66159b3361Sopenharmony_ci.do: 67159b3361Sopenharmony_ci.do2: 68159b3361Sopenharmony_ci ;f 69159b3361Sopenharmony_ci fld dword [r0] 70159b3361Sopenharmony_ci fsub dword [r0+r3] 71159b3361Sopenharmony_ci 72159b3361Sopenharmony_ci fld dword [r0] 73159b3361Sopenharmony_ci fadd dword [r0+r3] 74159b3361Sopenharmony_ci 75159b3361Sopenharmony_ci fld dword [r0+r3*2] 76159b3361Sopenharmony_ci fsub dword [r0+r2] 77159b3361Sopenharmony_ci 78159b3361Sopenharmony_ci fld dword [r0+r3*2] 79159b3361Sopenharmony_ci fadd dword [r0+r2] ;f2 f3 f0 f1 80159b3361Sopenharmony_ci 81159b3361Sopenharmony_ci fld st2 ;f0 f2 f3 f0 f1 82159b3361Sopenharmony_ci fadd st0, st1 83159b3361Sopenharmony_ci fstp dword [r0] ;fi[0] 84159b3361Sopenharmony_ci 85159b3361Sopenharmony_ci fld st3 ;f1 f2 f3 f0 f1 86159b3361Sopenharmony_ci fadd st0, st2 87159b3361Sopenharmony_ci fstp dword [r0+r3] ;fi[k1] 88159b3361Sopenharmony_ci 89159b3361Sopenharmony_ci fsubr st0, st2 ;f0-f2 f3 f0 f1 90159b3361Sopenharmony_ci fstp dword [r0+r3*2] ;fi[k2] 91159b3361Sopenharmony_ci 92159b3361Sopenharmony_ci fsubr st0, st2 ;f1-f3 f0 f1 93159b3361Sopenharmony_ci fstp dword [r0+r2] ;fi[k3] 94159b3361Sopenharmony_ci fcompp 95159b3361Sopenharmony_ci 96159b3361Sopenharmony_ci ;g 97159b3361Sopenharmony_ci fld dword [r1] 98159b3361Sopenharmony_ci fsub dword [r1+r3] 99159b3361Sopenharmony_ci 100159b3361Sopenharmony_ci fld dword [r1] 101159b3361Sopenharmony_ci fadd dword [r1+r3] 102159b3361Sopenharmony_ci 103159b3361Sopenharmony_ci fld dword [D_1_41421] 104159b3361Sopenharmony_ci fmul dword [r1+r2] 105159b3361Sopenharmony_ci 106159b3361Sopenharmony_ci fld dword [D_1_41421] 107159b3361Sopenharmony_ci fmul dword [r1+r3*2] ;g2 g3 g0 g1 108159b3361Sopenharmony_ci 109159b3361Sopenharmony_ci fld st2 ;g0 g2 g3 g0 g1 110159b3361Sopenharmony_ci fadd st0, st1 111159b3361Sopenharmony_ci fstp dword [r1] ;gi[0] 112159b3361Sopenharmony_ci 113159b3361Sopenharmony_ci fld st3 ;g1 g2 g3 g0 g1 114159b3361Sopenharmony_ci fadd st0, st2 115159b3361Sopenharmony_ci fstp dword [r1+r3] ;gi[k1] 116159b3361Sopenharmony_ci 117159b3361Sopenharmony_ci fsubr st0, st2 ;g0-g2 g3 g0 g1 118159b3361Sopenharmony_ci fstp dword [r1+r3*2] ;gi[k2] 119159b3361Sopenharmony_ci 120159b3361Sopenharmony_ci fsubr st0, st2 ;g1-g3 g0 g1 121159b3361Sopenharmony_ci fstp dword [r1+r2] ;gi[k3] 122159b3361Sopenharmony_ci fcompp 123159b3361Sopenharmony_ci 124159b3361Sopenharmony_ci lea r0, [r0+r3*4] 125159b3361Sopenharmony_ci lea r1, [r1+r3*4] 126159b3361Sopenharmony_ci cmp r0, r6 127159b3361Sopenharmony_ci jb .do2 128159b3361Sopenharmony_ci 129159b3361Sopenharmony_ci 130159b3361Sopenharmony_ci mov r0, [sp(%$k)] 131159b3361Sopenharmony_ci fld dword [costab_fft +r0*4] 132159b3361Sopenharmony_ci fstp dword [sp(%$t_c)] 133159b3361Sopenharmony_ci fld dword [sintab_fft +r0*4] 134159b3361Sopenharmony_ci fstp dword [sp(%$t_s)] 135159b3361Sopenharmony_ci fld dword [D_1_0] 136159b3361Sopenharmony_ci fstp dword [sp(%$c1)] 137159b3361Sopenharmony_ci fld dword [D_0_0] 138159b3361Sopenharmony_ci fstp dword [sp(%$s1)] 139159b3361Sopenharmony_ci 140159b3361Sopenharmony_ci.for_init: 141159b3361Sopenharmony_ci mov r5, 4 ;i = 1*fsize 142159b3361Sopenharmony_ci 143159b3361Sopenharmony_ci.for: 144159b3361Sopenharmony_ci fld dword [sp(%$c1)] 145159b3361Sopenharmony_ci fmul dword [sp(%$t_c)] 146159b3361Sopenharmony_ci fld dword [sp(%$s1)] 147159b3361Sopenharmony_ci fmul dword [sp(%$t_s)] 148159b3361Sopenharmony_ci fsubp st1, st0 ;c1 149159b3361Sopenharmony_ci 150159b3361Sopenharmony_ci fld dword [sp(%$c1)] 151159b3361Sopenharmony_ci fmul dword [sp(%$t_s)] 152159b3361Sopenharmony_ci fld dword [sp(%$s1)] 153159b3361Sopenharmony_ci fmul dword [sp(%$t_c)] 154159b3361Sopenharmony_ci faddp st1, st0 ;s1 c1 155159b3361Sopenharmony_ci 156159b3361Sopenharmony_ci fld st1 157159b3361Sopenharmony_ci fmul st0, st0 ;c1c1 s1 c1 158159b3361Sopenharmony_ci fld st1 159159b3361Sopenharmony_ci fmul st0, st0 ;s1s1 c1c1 s1 c1 160159b3361Sopenharmony_ci fsubp st1, st0 ;c2 s1 c1 161159b3361Sopenharmony_ci fstp dword [sp(%$c2)] ;s1 c1 162159b3361Sopenharmony_ci 163159b3361Sopenharmony_ci fld st1 ;c1 s1 c1 164159b3361Sopenharmony_ci fmul st0, st1 ;c1s1 s1 c1 165159b3361Sopenharmony_ci fadd st0, st0 ;s2 s1 c1 166159b3361Sopenharmony_ci fstp dword [sp(%$s2)] ;s1 c1 167159b3361Sopenharmony_ci 168159b3361Sopenharmony_ci fstp dword [sp(%$s1)] ;c1 169159b3361Sopenharmony_ci fstp dword [sp(%$c1)] ; 170159b3361Sopenharmony_ci 171159b3361Sopenharmony_ci mov r0, [sp(%$fz)] 172159b3361Sopenharmony_ci add r0, r5 ;r0 = fi 173159b3361Sopenharmony_ci mov r1, [sp(%$fz)] 174159b3361Sopenharmony_ci add r1, r3 175159b3361Sopenharmony_ci sub r1, r5 ;r1 = gi 176159b3361Sopenharmony_ci 177159b3361Sopenharmony_ci.do3: 178159b3361Sopenharmony_ci fld dword [sp(%$s2)] 179159b3361Sopenharmony_ci fmul dword [r0+r3] 180159b3361Sopenharmony_ci fld dword [sp(%$c2)] 181159b3361Sopenharmony_ci fmul dword [r1+r3] 182159b3361Sopenharmony_ci fsubp st1, st0 ;b = s2*fi[k1] - c2*gi[k1] 183159b3361Sopenharmony_ci 184159b3361Sopenharmony_ci fld dword [sp(%$c2)] 185159b3361Sopenharmony_ci fmul dword [r0+r3] 186159b3361Sopenharmony_ci fld dword [sp(%$s2)] 187159b3361Sopenharmony_ci fmul dword [r1+r3] 188159b3361Sopenharmony_ci faddp st1, st0 ;a = c2*fi[k1] + s2*gi[k1] b 189159b3361Sopenharmony_ci 190159b3361Sopenharmony_ci fld dword [r0] 191159b3361Sopenharmony_ci fsub st0, st1 ;f1 a b 192159b3361Sopenharmony_ci fstp dword [sp(%$f1)] ;a b 193159b3361Sopenharmony_ci 194159b3361Sopenharmony_ci fadd dword [r0] ;f0 b 195159b3361Sopenharmony_ci fstp dword [sp(%$f0)] ;b 196159b3361Sopenharmony_ci 197159b3361Sopenharmony_ci fld dword [r1] 198159b3361Sopenharmony_ci fsub st0, st1 ;g1 b 199159b3361Sopenharmony_ci fstp dword [sp(%$g1)] ;b 200159b3361Sopenharmony_ci 201159b3361Sopenharmony_ci fadd dword [r1] ;g0 202159b3361Sopenharmony_ci fstp dword [sp(%$g0)] ; 203159b3361Sopenharmony_ci 204159b3361Sopenharmony_ci 205159b3361Sopenharmony_ci fld dword [sp(%$s2)] 206159b3361Sopenharmony_ci fmul dword [r0+r2] 207159b3361Sopenharmony_ci fld dword [sp(%$c2)] 208159b3361Sopenharmony_ci fmul dword [r1+r2] 209159b3361Sopenharmony_ci fsubp st1, st0 ;b = s2*fi[k3] - c2*gi[k3] 210159b3361Sopenharmony_ci 211159b3361Sopenharmony_ci fld dword [sp(%$c2)] 212159b3361Sopenharmony_ci fmul dword [r0+r2] 213159b3361Sopenharmony_ci fld dword [sp(%$s2)] 214159b3361Sopenharmony_ci fmul dword [r1+r2] 215159b3361Sopenharmony_ci faddp st1, st0 ;a = c2*fi[k3] + s2*gi[k3] b 216159b3361Sopenharmony_ci 217159b3361Sopenharmony_ci fld dword [r0+r3*2] 218159b3361Sopenharmony_ci fsub st0, st1 ;f3 a b 219159b3361Sopenharmony_ci fstp dword [sp(%$f3)] ;a b 220159b3361Sopenharmony_ci 221159b3361Sopenharmony_ci fadd dword [r0+r3*2] ;f2 b 222159b3361Sopenharmony_ci fstp dword [sp(%$f2)] ;b 223159b3361Sopenharmony_ci 224159b3361Sopenharmony_ci fld dword [r1+r3*2] 225159b3361Sopenharmony_ci fsub st0, st1 ;g3 b 226159b3361Sopenharmony_ci fstp dword [sp(%$g3)] ;b 227159b3361Sopenharmony_ci 228159b3361Sopenharmony_ci fadd dword [r1+r3*2] ;g2 229159b3361Sopenharmony_ci fstp dword [sp(%$g2)] ; 230159b3361Sopenharmony_ci 231159b3361Sopenharmony_ci 232159b3361Sopenharmony_ci fld dword [sp(%$s1)] 233159b3361Sopenharmony_ci fmul dword [sp(%$f2)] 234159b3361Sopenharmony_ci fld dword [sp(%$c1)] 235159b3361Sopenharmony_ci fmul dword [sp(%$g3)] 236159b3361Sopenharmony_ci fsubp st1, st0 ;b = s1*f2 - c1*g3 237159b3361Sopenharmony_ci 238159b3361Sopenharmony_ci fld dword [sp(%$c1)] 239159b3361Sopenharmony_ci fmul dword [sp(%$f2)] 240159b3361Sopenharmony_ci fld dword [sp(%$s1)] 241159b3361Sopenharmony_ci fmul dword [sp(%$g3)] 242159b3361Sopenharmony_ci faddp st1, st0 ;a = c1*f2 + s1*g3 b 243159b3361Sopenharmony_ci 244159b3361Sopenharmony_ci fld dword [sp(%$f0)] 245159b3361Sopenharmony_ci fsub st0, st1 ;fi[k2] a b 246159b3361Sopenharmony_ci fstp dword [r0+r3*2] 247159b3361Sopenharmony_ci 248159b3361Sopenharmony_ci fadd dword [sp(%$f0)] ;fi[0] b 249159b3361Sopenharmony_ci fstp dword [r0] 250159b3361Sopenharmony_ci 251159b3361Sopenharmony_ci fld dword [sp(%$g1)] 252159b3361Sopenharmony_ci fsub st0, st1 ;gi[k3] b 253159b3361Sopenharmony_ci fstp dword [r1+r2] 254159b3361Sopenharmony_ci 255159b3361Sopenharmony_ci fadd dword [sp(%$g1)] ;gi[k1] 256159b3361Sopenharmony_ci fstp dword [r1+r3] 257159b3361Sopenharmony_ci 258159b3361Sopenharmony_ci 259159b3361Sopenharmony_ci fld dword [sp(%$c1)] 260159b3361Sopenharmony_ci fmul dword [sp(%$g2)] 261159b3361Sopenharmony_ci fld dword [sp(%$s1)] 262159b3361Sopenharmony_ci fmul dword [sp(%$f3)] 263159b3361Sopenharmony_ci fsubp st1, st0 ;b = c1*g2 - s1*f3 264159b3361Sopenharmony_ci 265159b3361Sopenharmony_ci fld dword [sp(%$s1)] 266159b3361Sopenharmony_ci fmul dword [sp(%$g2)] 267159b3361Sopenharmony_ci fld dword [sp(%$c1)] 268159b3361Sopenharmony_ci fmul dword [sp(%$f3)] 269159b3361Sopenharmony_ci faddp st1, st0 ;a = s1*g2 + c1*f3 b 270159b3361Sopenharmony_ci 271159b3361Sopenharmony_ci fld dword [sp(%$g0)] 272159b3361Sopenharmony_ci fsub st0, st1 ;gi[k2] a b 273159b3361Sopenharmony_ci fstp dword [r1+r3*2] 274159b3361Sopenharmony_ci 275159b3361Sopenharmony_ci fadd dword [sp(%$g0)] ;gi[0] b 276159b3361Sopenharmony_ci fstp dword [r1] 277159b3361Sopenharmony_ci 278159b3361Sopenharmony_ci fld dword [sp(%$f1)] 279159b3361Sopenharmony_ci fsub st0, st1 ;fi[k3] b 280159b3361Sopenharmony_ci fstp dword [r0+r2] 281159b3361Sopenharmony_ci 282159b3361Sopenharmony_ci fadd dword [sp(%$f1)] ;fi[k1] 283159b3361Sopenharmony_ci fstp dword [r0+r3] 284159b3361Sopenharmony_ci 285159b3361Sopenharmony_ci 286159b3361Sopenharmony_ci lea r0, [r0+r3*4] 287159b3361Sopenharmony_ci lea r1, [r1+r3*4] 288159b3361Sopenharmony_ci cmp r0, r6 289159b3361Sopenharmony_ci jb near .do3 290159b3361Sopenharmony_ci 291159b3361Sopenharmony_ci add r5, 4 292159b3361Sopenharmony_ci cmp r5, r4 293159b3361Sopenharmony_ci jb near .for 294159b3361Sopenharmony_ci 295159b3361Sopenharmony_ci cmp r3, [sp(%$n)] 296159b3361Sopenharmony_ci jae .exit 297159b3361Sopenharmony_ci 298159b3361Sopenharmony_ci add dword [sp(%$k)], 2 ;k += 2; 299159b3361Sopenharmony_ci lea r3, [r3*4] ;k1 *= 4 300159b3361Sopenharmony_ci lea r2, [r2*4] ;k3 *= 4 301159b3361Sopenharmony_ci lea r4, [r4*4] ;kx *= 4 302159b3361Sopenharmony_ci mov r0, [sp(%$fz)] ;fi 303159b3361Sopenharmony_ci lea r1, [r0+r4] ;gi = fi + kx 304159b3361Sopenharmony_ci jmp .do 305159b3361Sopenharmony_ci 306159b3361Sopenharmony_ci.exit: 307159b3361Sopenharmony_ci popd ebp, ebx, esi, edi 308159b3361Sopenharmony_ciendproc 309159b3361Sopenharmony_ci 310159b3361Sopenharmony_ci;************************************************************* 311159b3361Sopenharmony_ci 312159b3361Sopenharmony_ci;void fht_FPU_FXCH(float *fz, int n); 313159b3361Sopenharmony_ciproc fht_FPU_FXCH 314159b3361Sopenharmony_ci 315159b3361Sopenharmony_ci%$fz arg 4 316159b3361Sopenharmony_ci%$n arg 4 317159b3361Sopenharmony_ci 318159b3361Sopenharmony_ci%$k local 4 319159b3361Sopenharmony_ci 320159b3361Sopenharmony_ci%$f0 local 4 321159b3361Sopenharmony_ci%$f1 local 4 322159b3361Sopenharmony_ci%$f2 local 4 323159b3361Sopenharmony_ci%$f3 local 4 324159b3361Sopenharmony_ci 325159b3361Sopenharmony_ci%$g0 local 4 326159b3361Sopenharmony_ci%$g1 local 4 327159b3361Sopenharmony_ci%$g2 local 4 328159b3361Sopenharmony_ci%$g3 local 4 329159b3361Sopenharmony_ci 330159b3361Sopenharmony_ci%$s1 local 4 331159b3361Sopenharmony_ci%$c1 local 4 332159b3361Sopenharmony_ci%$s2 local 4 333159b3361Sopenharmony_ci%$c2 local 4 334159b3361Sopenharmony_ci 335159b3361Sopenharmony_ci%$t_s local 4 336159b3361Sopenharmony_ci%$t_c local 4 337159b3361Sopenharmony_ci alloc 338159b3361Sopenharmony_ci 339159b3361Sopenharmony_ci pushd ebp, ebx, esi, edi 340159b3361Sopenharmony_ci 341159b3361Sopenharmony_cifht_FPU_FXCH_1st_part: 342159b3361Sopenharmony_ci 343159b3361Sopenharmony_cifht_FPU_FXCH_2nd_part: 344159b3361Sopenharmony_ci 345159b3361Sopenharmony_cifht_FPU_FXCH_3rd_part: 346159b3361Sopenharmony_ci 347159b3361Sopenharmony_ci.do_init: 348159b3361Sopenharmony_ci mov r3, 16 ;k1*fsize = 4*fsize = k4 349159b3361Sopenharmony_ci mov r4, 8 ;kx = k1/2 350159b3361Sopenharmony_ci mov r2, 48 ;k3*fsize 351159b3361Sopenharmony_ci mov dword [sp(%$k)], 2 ;k = 2 352159b3361Sopenharmony_ci mov r0, [sp(%$fz)] ;fi 353159b3361Sopenharmony_ci lea r1, [r0+8] ;gi = fi + kx 354159b3361Sopenharmony_ci 355159b3361Sopenharmony_ci.do: 356159b3361Sopenharmony_ci.do2: 357159b3361Sopenharmony_ci ;f 358159b3361Sopenharmony_ci fld dword [r0] 359159b3361Sopenharmony_ci fsub dword [r0+r3] 360159b3361Sopenharmony_ci fld dword [r0] 361159b3361Sopenharmony_ci fadd dword [r0+r3] 362159b3361Sopenharmony_ci 363159b3361Sopenharmony_ci fld dword [r0+r3*2] 364159b3361Sopenharmony_ci fsub dword [r0+r2] 365159b3361Sopenharmony_ci fld dword [r0+r3*2] 366159b3361Sopenharmony_ci fadd dword [r0+r2] ;f2 f3 f0 f1 367159b3361Sopenharmony_ci 368159b3361Sopenharmony_ci fld st3 369159b3361Sopenharmony_ci fld st3 370159b3361Sopenharmony_ci fxch st5 371159b3361Sopenharmony_ci fadd st0, st3 372159b3361Sopenharmony_ci fxch st4 373159b3361Sopenharmony_ci fadd st0, st2 374159b3361Sopenharmony_ci fxch st3 375159b3361Sopenharmony_ci fsubp st1, st0 376159b3361Sopenharmony_ci fxch st1 377159b3361Sopenharmony_ci fsubp st4, st0 378159b3361Sopenharmony_ci fxch st2 379159b3361Sopenharmony_ci 380159b3361Sopenharmony_ci fstp dword [r0+r3] ;fi[k1] 381159b3361Sopenharmony_ci fstp dword [r0] ;fi[0] 382159b3361Sopenharmony_ci fstp dword [r0+r2] ;fi[k3] 383159b3361Sopenharmony_ci fstp dword [r0+r3*2] ;fi[k2] 384159b3361Sopenharmony_ci 385159b3361Sopenharmony_ci ;g 386159b3361Sopenharmony_ci fld dword [r1] 387159b3361Sopenharmony_ci fsub dword [r1+r3] 388159b3361Sopenharmony_ci fld dword [r1] 389159b3361Sopenharmony_ci fadd dword [r1+r3] 390159b3361Sopenharmony_ci 391159b3361Sopenharmony_ci fld dword [D_1_41421] 392159b3361Sopenharmony_ci fmul dword [r1+r2] 393159b3361Sopenharmony_ci fld dword [D_1_41421] 394159b3361Sopenharmony_ci fmul dword [r1+r3*2] ;g2 g3 g0 g1 395159b3361Sopenharmony_ci 396159b3361Sopenharmony_ci fld st3 397159b3361Sopenharmony_ci fld st3 398159b3361Sopenharmony_ci fxch st5 399159b3361Sopenharmony_ci fadd st0, st3 400159b3361Sopenharmony_ci fxch st4 401159b3361Sopenharmony_ci fadd st0, st2 402159b3361Sopenharmony_ci fxch st3 403159b3361Sopenharmony_ci fsubp st1, st0 404159b3361Sopenharmony_ci fxch st1 405159b3361Sopenharmony_ci fsubp st4, st0 406159b3361Sopenharmony_ci fxch st2 407159b3361Sopenharmony_ci 408159b3361Sopenharmony_ci fstp dword [r1+r3] ;gi[k1] 409159b3361Sopenharmony_ci fstp dword [r1] ;gi[0] 410159b3361Sopenharmony_ci fstp dword [r1+r2] ;gi[k3] 411159b3361Sopenharmony_ci fstp dword [r1+r3*2] ;gi[k2] 412159b3361Sopenharmony_ci 413159b3361Sopenharmony_ci lea r0, [r0+r3*4] 414159b3361Sopenharmony_ci lea r1, [r1+r3*4] 415159b3361Sopenharmony_ci cmp r0, r6 416159b3361Sopenharmony_ci jb .do2 417159b3361Sopenharmony_ci 418159b3361Sopenharmony_ci 419159b3361Sopenharmony_ci mov r0, [sp(%$k)] 420159b3361Sopenharmony_ci fld dword [costab_fft +r0*4] 421159b3361Sopenharmony_ci fld dword [sintab_fft +r0*4] 422159b3361Sopenharmony_ci fld dword [D_1_0] 423159b3361Sopenharmony_ci fld dword [D_0_0] 424159b3361Sopenharmony_ci fxch st3 425159b3361Sopenharmony_ci fstp dword [sp(%$t_c)] 426159b3361Sopenharmony_ci fxch st1 427159b3361Sopenharmony_ci fstp dword [sp(%$t_s)] 428159b3361Sopenharmony_ci fstp dword [sp(%$c1)] 429159b3361Sopenharmony_ci fstp dword [sp(%$s1)] 430159b3361Sopenharmony_ci 431159b3361Sopenharmony_ci.for_init: 432159b3361Sopenharmony_ci mov r5, 4 ;i = 1*fsize 433159b3361Sopenharmony_ci 434159b3361Sopenharmony_ci.for: 435159b3361Sopenharmony_ci fld dword [sp(%$c1)] 436159b3361Sopenharmony_ci fmul dword [sp(%$t_c)] 437159b3361Sopenharmony_ci fld dword [sp(%$s1)] 438159b3361Sopenharmony_ci fmul dword [sp(%$t_s)] 439159b3361Sopenharmony_ci 440159b3361Sopenharmony_ci fld dword [sp(%$c1)] 441159b3361Sopenharmony_ci fmul dword [sp(%$t_s)] 442159b3361Sopenharmony_ci fld dword [sp(%$s1)] 443159b3361Sopenharmony_ci fmul dword [sp(%$t_c)] 444159b3361Sopenharmony_ci fxch st2 445159b3361Sopenharmony_ci fsubp st3, st0 ;c1 446159b3361Sopenharmony_ci faddp st1, st0 ;s1 c1 447159b3361Sopenharmony_ci 448159b3361Sopenharmony_ci fld st1 449159b3361Sopenharmony_ci fxch st2 450159b3361Sopenharmony_ci fmul st0, st0 ;c1c1 s1 c1 451159b3361Sopenharmony_ci fld st1 452159b3361Sopenharmony_ci fxch st2 453159b3361Sopenharmony_ci fmul st0, st0 ;s1s1 c1c1 s1 c1 454159b3361Sopenharmony_ci 455159b3361Sopenharmony_ci fxch st3 456159b3361Sopenharmony_ci fst dword [sp(%$c1)] ;c1 457159b3361Sopenharmony_ci fxch st2 458159b3361Sopenharmony_ci fst dword [sp(%$s1)] ;s1 c1c1 c1 s1s1 459159b3361Sopenharmony_ci 460159b3361Sopenharmony_ci fmulp st2, st0 461159b3361Sopenharmony_ci fsubrp st2, st0 462159b3361Sopenharmony_ci fadd st0, st0 ;s2 c2 463159b3361Sopenharmony_ci fxch st1 464159b3361Sopenharmony_ci fstp dword [sp(%$c2)] 465159b3361Sopenharmony_ci fstp dword [sp(%$s2)] 466159b3361Sopenharmony_ci 467159b3361Sopenharmony_ci mov r0, [sp(%$fz)] 468159b3361Sopenharmony_ci mov r1, [sp(%$fz)] 469159b3361Sopenharmony_ci add r0, r5 ;r0 = fi 470159b3361Sopenharmony_ci add r1, r3 471159b3361Sopenharmony_ci sub r1, r5 ;r1 = gi 472159b3361Sopenharmony_ci 473159b3361Sopenharmony_ci.do3: 474159b3361Sopenharmony_ci fld dword [sp(%$s2)] 475159b3361Sopenharmony_ci fmul dword [r0+r3] 476159b3361Sopenharmony_ci fld dword [sp(%$c2)] 477159b3361Sopenharmony_ci fmul dword [r1+r3] 478159b3361Sopenharmony_ci 479159b3361Sopenharmony_ci fld dword [sp(%$c2)] 480159b3361Sopenharmony_ci fmul dword [r0+r3] 481159b3361Sopenharmony_ci fld dword [sp(%$s2)] 482159b3361Sopenharmony_ci fmul dword [r1+r3] 483159b3361Sopenharmony_ci fxch st2 484159b3361Sopenharmony_ci fsubp st3, st0 ;b = s2*fi[k1] - c2*gi[k1] 485159b3361Sopenharmony_ci faddp st1, st0 ;a = c2*fi[k1] + s2*gi[k1] b 486159b3361Sopenharmony_ci 487159b3361Sopenharmony_ci fld dword [r1] 488159b3361Sopenharmony_ci fsub st0, st2 ;g1 a b 489159b3361Sopenharmony_ci fxch st2 490159b3361Sopenharmony_ci fadd dword [r1] ;g0 a g1 491159b3361Sopenharmony_ci 492159b3361Sopenharmony_ci fld dword [r0] 493159b3361Sopenharmony_ci fsub st0, st2 ;f1 g0 a g1 494159b3361Sopenharmony_ci fxch st2 495159b3361Sopenharmony_ci fadd dword [r0] ;f0 g0 f1 g1 496159b3361Sopenharmony_ci 497159b3361Sopenharmony_ci fxch st3 498159b3361Sopenharmony_ci fstp dword [sp(%$g1)] 499159b3361Sopenharmony_ci fstp dword [sp(%$g0)] 500159b3361Sopenharmony_ci fstp dword [sp(%$f1)] 501159b3361Sopenharmony_ci fstp dword [sp(%$f0)] 502159b3361Sopenharmony_ci 503159b3361Sopenharmony_ci 504159b3361Sopenharmony_ci fld dword [sp(%$s2)] 505159b3361Sopenharmony_ci fmul dword [r0+r2] 506159b3361Sopenharmony_ci fld dword [sp(%$c2)] 507159b3361Sopenharmony_ci fmul dword [r1+r2] 508159b3361Sopenharmony_ci 509159b3361Sopenharmony_ci fld dword [sp(%$c2)] 510159b3361Sopenharmony_ci fmul dword [r0+r2] 511159b3361Sopenharmony_ci fld dword [sp(%$s2)] 512159b3361Sopenharmony_ci fmul dword [r1+r2] 513159b3361Sopenharmony_ci fxch st2 514159b3361Sopenharmony_ci fsubp st3, st0 ;b = s2*fi[k3] - c2*gi[k3] 515159b3361Sopenharmony_ci faddp st1, st0 ;a = c2*fi[k3] + s2*gi[k3] b 516159b3361Sopenharmony_ci 517159b3361Sopenharmony_ci 518159b3361Sopenharmony_ci fld dword [r1+r3*2] 519159b3361Sopenharmony_ci fsub st0, st2 ;g3 a b 520159b3361Sopenharmony_ci fxch st2 521159b3361Sopenharmony_ci fadd dword [r1+r3*2] ;g2 a g3 522159b3361Sopenharmony_ci 523159b3361Sopenharmony_ci fld dword [r0+r3*2] 524159b3361Sopenharmony_ci fsub st0, st2 ;f3 g2 a g3 525159b3361Sopenharmony_ci fxch st2 526159b3361Sopenharmony_ci fadd dword [r0+r3*2] ;f2 g2 f3 g3 527159b3361Sopenharmony_ci 528159b3361Sopenharmony_ci fxch st3 529159b3361Sopenharmony_ci fstp dword [sp(%$g3)] 530159b3361Sopenharmony_ci fstp dword [sp(%$g2)] 531159b3361Sopenharmony_ci fstp dword [sp(%$f3)] 532159b3361Sopenharmony_ci fstp dword [sp(%$f2)] 533159b3361Sopenharmony_ci 534159b3361Sopenharmony_ci 535159b3361Sopenharmony_ci fld dword [sp(%$s1)] 536159b3361Sopenharmony_ci fmul dword [sp(%$f2)] 537159b3361Sopenharmony_ci fld dword [sp(%$c1)] 538159b3361Sopenharmony_ci fmul dword [sp(%$g3)] 539159b3361Sopenharmony_ci 540159b3361Sopenharmony_ci fld dword [sp(%$c1)] 541159b3361Sopenharmony_ci fmul dword [sp(%$f2)] 542159b3361Sopenharmony_ci fld dword [sp(%$s1)] 543159b3361Sopenharmony_ci fmul dword [sp(%$g3)] 544159b3361Sopenharmony_ci fxch st2 545159b3361Sopenharmony_ci fsubp st3, st0 ;b = s1*f2 - c1*g3 546159b3361Sopenharmony_ci faddp st1, st0 ;a = c1*f2 + s1*g3 b 547159b3361Sopenharmony_ci 548159b3361Sopenharmony_ci fld dword [sp(%$g1)] 549159b3361Sopenharmony_ci fsub st0, st2 ;gi[k3] a b 550159b3361Sopenharmony_ci fxch st2 551159b3361Sopenharmony_ci fadd dword [sp(%$g1)] ;gi[k1] a gi[k3] 552159b3361Sopenharmony_ci 553159b3361Sopenharmony_ci fld dword [sp(%$f0)] 554159b3361Sopenharmony_ci fsub st0, st2 ;fi[k2] gi[k1] a gi[k3] 555159b3361Sopenharmony_ci fxch st2 556159b3361Sopenharmony_ci fadd dword [sp(%$f0)] ;fi[0] gi[k1] fi[k2] gi[k3] 557159b3361Sopenharmony_ci 558159b3361Sopenharmony_ci fxch st3 559159b3361Sopenharmony_ci fstp dword [r1+r2] 560159b3361Sopenharmony_ci fstp dword [r1+r3] 561159b3361Sopenharmony_ci fstp dword [r0+r3*2] 562159b3361Sopenharmony_ci fstp dword [r0] 563159b3361Sopenharmony_ci 564159b3361Sopenharmony_ci 565159b3361Sopenharmony_ci fld dword [sp(%$c1)] 566159b3361Sopenharmony_ci fmul dword [sp(%$g2)] 567159b3361Sopenharmony_ci fld dword [sp(%$s1)] 568159b3361Sopenharmony_ci fmul dword [sp(%$f3)] 569159b3361Sopenharmony_ci 570159b3361Sopenharmony_ci fld dword [sp(%$s1)] 571159b3361Sopenharmony_ci fmul dword [sp(%$g2)] 572159b3361Sopenharmony_ci fld dword [sp(%$c1)] 573159b3361Sopenharmony_ci fmul dword [sp(%$f3)] 574159b3361Sopenharmony_ci fxch st2 575159b3361Sopenharmony_ci fsubp st3, st0 ;b = c1*g2 - s1*f3 576159b3361Sopenharmony_ci faddp st1, st0 ;a = s1*g2 + c1*f3 b 577159b3361Sopenharmony_ci 578159b3361Sopenharmony_ci fld dword [sp(%$f1)] 579159b3361Sopenharmony_ci fsub st0, st2 ;fi[k3] a b 580159b3361Sopenharmony_ci fxch st2 581159b3361Sopenharmony_ci fadd dword [sp(%$f1)] ;fi[k1] a fi[k3] 582159b3361Sopenharmony_ci 583159b3361Sopenharmony_ci fld dword [sp(%$g0)] 584159b3361Sopenharmony_ci fsub st0, st2 ;gi[k2] fi[k1] a fi[k3] 585159b3361Sopenharmony_ci fxch st2 586159b3361Sopenharmony_ci fadd dword [sp(%$g0)] ;gi[0] fi[k1] gi[k2] fi[k3] 587159b3361Sopenharmony_ci 588159b3361Sopenharmony_ci fxch st3 589159b3361Sopenharmony_ci fstp dword [r0+r2] 590159b3361Sopenharmony_ci fstp dword [r0+r3] 591159b3361Sopenharmony_ci fstp dword [r1+r3*2] 592159b3361Sopenharmony_ci fstp dword [r1] 593159b3361Sopenharmony_ci 594159b3361Sopenharmony_ci 595159b3361Sopenharmony_ci lea r0, [r0+r3*4] 596159b3361Sopenharmony_ci lea r1, [r1+r3*4] 597159b3361Sopenharmony_ci cmp r0, r6 598159b3361Sopenharmony_ci jb near .do3 599159b3361Sopenharmony_ci 600159b3361Sopenharmony_ci add r5, 4 601159b3361Sopenharmony_ci cmp r5, r4 602159b3361Sopenharmony_ci jb near .for 603159b3361Sopenharmony_ci 604159b3361Sopenharmony_ci cmp r3, [sp(%$n)] 605159b3361Sopenharmony_ci jae .exit 606159b3361Sopenharmony_ci 607159b3361Sopenharmony_ci add dword [sp(%$k)], 2 ;k += 2; 608159b3361Sopenharmony_ci lea r3, [r3*4] ;k1 *= 4 609159b3361Sopenharmony_ci lea r2, [r2*4] ;k3 *= 4 610159b3361Sopenharmony_ci lea r4, [r4*4] ;kx *= 4 611159b3361Sopenharmony_ci mov r0, [sp(%$fz)] ;fi 612159b3361Sopenharmony_ci lea r1, [r0+r4] ;gi = fi + kx 613159b3361Sopenharmony_ci jmp .do 614159b3361Sopenharmony_ci 615159b3361Sopenharmony_ci.exit: 616159b3361Sopenharmony_ci popd ebp, ebx, esi, edi 617159b3361Sopenharmony_ciendproc 618159b3361Sopenharmony_ci 619159b3361Sopenharmony_ci end 620