1159b3361Sopenharmony_ci; new count bit routine 2159b3361Sopenharmony_ci; part of this code is origined from 3159b3361Sopenharmony_ci; new GOGO-no-coda (1999, 2000) 4159b3361Sopenharmony_ci; Copyright (C) 1999 shigeo 5159b3361Sopenharmony_ci; modified by Keiichi SAKAI 6159b3361Sopenharmony_ci 7159b3361Sopenharmony_ci%include "nasm.h" 8159b3361Sopenharmony_ci 9159b3361Sopenharmony_ci globaldef choose_table_MMX 10159b3361Sopenharmony_ci globaldef MMX_masking 11159b3361Sopenharmony_ci 12159b3361Sopenharmony_ci externdef largetbl 13159b3361Sopenharmony_ci externdef t1l 14159b3361Sopenharmony_ci externdef table23 15159b3361Sopenharmony_ci externdef table56 16159b3361Sopenharmony_ci 17159b3361Sopenharmony_ci segment_data 18159b3361Sopenharmony_ci align 16 19159b3361Sopenharmony_ciD14_14_14_14 dd 0x000E000E, 0x000E000E 20159b3361Sopenharmony_ciD15_15_15_15 dd 0xfff0fff0, 0xfff0fff0 21159b3361Sopenharmony_cimul_add dd 0x00010010, 0x00010010 22159b3361Sopenharmony_cimul_add23 dd 0x00010003, 0x00010003 23159b3361Sopenharmony_cimul_add56 dd 0x00010004, 0x00010004 24159b3361Sopenharmony_citableDEF 25159b3361Sopenharmony_ci dd 0x00010003,0x01,0x00050005,0x05,0x00070006,0x07,0x00090008,0x08,0x000a0008, 0x09 26159b3361Sopenharmony_ci dd 0x000a0009,0x0a,0x000b000a,0x0a,0x000b000a,0x0b,0x000c000a,0x0a,0x000c000b, 0x0b 27159b3361Sopenharmony_ci dd 0x000c000b,0x0c,0x000d000c,0x0c,0x000d000c,0x0d,0x000d000c,0x0d,0x000e000d, 0x0e 28159b3361Sopenharmony_ci dd 0x000b000e,0x0e,0x00040005,0x04,0x00060005,0x06,0x00080007,0x08,0x00090008, 0x09 29159b3361Sopenharmony_ci dd 0x000a0009,0x0a,0x000b0009,0x0a,0x000b000a,0x0b,0x000b000a,0x0b,0x000c000a, 0x0b 30159b3361Sopenharmony_ci dd 0x000c000b,0x0b,0x000c000b,0x0c,0x000d000c,0x0c,0x000e000c,0x0d,0x000d000c, 0x0e 31159b3361Sopenharmony_ci dd 0x000e000d,0x0e,0x000b000d,0x0e,0x00070006,0x07,0x00080007,0x08,0x00090007, 0x09 32159b3361Sopenharmony_ci dd 0x000a0008,0x0a,0x000b0009,0x0b,0x000b0009,0x0b,0x000c000a,0x0c,0x000c000a, 0x0c 33159b3361Sopenharmony_ci dd 0x000d000a,0x0b,0x000c000b,0x0c,0x000d000b,0x0c,0x000d000c,0x0d,0x000d000c, 0x0d 34159b3361Sopenharmony_ci dd 0x000e000d,0x0e,0x000e000d,0x0f,0x000c000d,0x0f,0x00090007,0x08,0x00090008, 0x09 35159b3361Sopenharmony_ci dd 0x000a0008,0x0a,0x000b0009,0x0b,0x000b0009,0x0b,0x000c000a,0x0c,0x000c000a, 0x0c 36159b3361Sopenharmony_ci dd 0x000c000b,0x0c,0x000d000b,0x0c,0x000d000b,0x0d,0x000e000c,0x0d,0x000e000c, 0x0d 37159b3361Sopenharmony_ci dd 0x000e000c,0x0d,0x000f000d,0x0e,0x000f000d,0x0f,0x000d000d,0x0f,0x000a0008, 0x09 38159b3361Sopenharmony_ci dd 0x000a0008,0x09,0x000b0009,0x0b,0x000b0009,0x0b,0x000c000a,0x0c,0x000c000a, 0x0c 39159b3361Sopenharmony_ci dd 0x000d000b,0x0d,0x000d000b,0x0d,0x000d000b,0x0c,0x000e000b,0x0d,0x000e000c, 0x0d 40159b3361Sopenharmony_ci dd 0x000e000c,0x0e,0x000f000c,0x0e,0x000f000d,0x0f,0x000f000d,0x0f,0x000c000d, 0x10 41159b3361Sopenharmony_ci dd 0x000a0009,0x0a,0x000a0009,0x0a,0x000b0009,0x0b,0x000b000a,0x0c,0x000c000a, 0x0c 42159b3361Sopenharmony_ci dd 0x000d000a,0x0c,0x000d000b,0x0d,0x000e000b,0x0d,0x000d000b,0x0d,0x000e000b, 0x0d 43159b3361Sopenharmony_ci dd 0x000e000c,0x0e,0x000f000c,0x0d,0x000f000d,0x0f,0x000f000d,0x0f,0x0010000d, 0x10 44159b3361Sopenharmony_ci dd 0x000d000e,0x10,0x000b000a,0x0a,0x000b0009,0x0b,0x000b000a,0x0c,0x000c000a, 0x0c 45159b3361Sopenharmony_ci dd 0x000d000a,0x0d,0x000d000b,0x0d,0x000d000b,0x0d,0x000d000b,0x0d,0x000e000b, 0x0d 46159b3361Sopenharmony_ci dd 0x000e000c,0x0e,0x000e000c,0x0e,0x000e000c,0x0e,0x000f000d,0x0f,0x000f000d, 0x0f 47159b3361Sopenharmony_ci dd 0x0010000e,0x10,0x000d000e,0x10,0x000b000a,0x0b,0x000b000a,0x0b,0x000c000a, 0x0c 48159b3361Sopenharmony_ci dd 0x000c000b,0x0d,0x000d000b,0x0d,0x000d000b,0x0d,0x000d000b,0x0e,0x000e000c, 0x0e 49159b3361Sopenharmony_ci dd 0x000e000c,0x0e,0x000f000c,0x0e,0x000f000c,0x0f,0x000f000c,0x0f,0x000f000d, 0x0f 50159b3361Sopenharmony_ci dd 0x0011000d,0x10,0x0011000d,0x12,0x000d000e,0x12,0x000b000a,0x0a,0x000c000a, 0x0a 51159b3361Sopenharmony_ci dd 0x000c000a,0x0b,0x000d000b,0x0c,0x000d000b,0x0c,0x000d000b,0x0d,0x000e000b, 0x0d 52159b3361Sopenharmony_ci dd 0x000e000c,0x0e,0x000f000c,0x0e,0x000f000c,0x0e,0x000f000c,0x0e,0x000f000d, 0x0f 53159b3361Sopenharmony_ci dd 0x0010000d,0x0f,0x0010000e,0x10,0x0010000e,0x11,0x000d000e,0x11,0x000c000a, 0x0b 54159b3361Sopenharmony_ci dd 0x000c000a,0x0b,0x000c000b,0x0c,0x000d000b,0x0c,0x000d000b,0x0d,0x000e000b, 0x0d 55159b3361Sopenharmony_ci dd 0x000e000c,0x0d,0x000f000c,0x0f,0x000f000c,0x0e,0x000f000d,0x0f,0x000f000d, 0x0f 56159b3361Sopenharmony_ci dd 0x0010000d,0x10,0x000f000d,0x10,0x0010000e,0x10,0x000f000e,0x12,0x000e000e, 0x11 57159b3361Sopenharmony_ci dd 0x000c000b,0x0b,0x000d000b,0x0c,0x000c000b,0x0c,0x000d000b,0x0d,0x000e000c, 0x0d 58159b3361Sopenharmony_ci dd 0x000e000c,0x0e,0x000e000c,0x0e,0x000e000c,0x0f,0x000f000c,0x0e,0x0010000d, 0x0f 59159b3361Sopenharmony_ci dd 0x0010000d,0x10,0x0010000d,0x0f,0x0011000d,0x10,0x0011000e,0x11,0x0010000f, 0x12 60159b3361Sopenharmony_ci dd 0x000d000e,0x13,0x000d000b,0x0c,0x000d000b,0x0c,0x000d000b,0x0c,0x000d000b, 0x0d 61159b3361Sopenharmony_ci dd 0x000e000c,0x0e,0x000e000c,0x0e,0x000f000c,0x0e,0x0010000c,0x0e,0x0010000d, 0x0f 62159b3361Sopenharmony_ci dd 0x0010000d,0x0f,0x0010000d,0x0f,0x0010000d,0x10,0x0010000e,0x11,0x000f000e, 0x11 63159b3361Sopenharmony_ci dd 0x0010000e,0x11,0x000e000f,0x12,0x000d000c,0x0c,0x000e000c,0x0d,0x000e000b, 0x0d 64159b3361Sopenharmony_ci dd 0x000e000c,0x0e,0x000e000c,0x0e,0x000f000c,0x0f,0x000f000d,0x0e,0x000f000d, 0x0f 65159b3361Sopenharmony_ci dd 0x000f000d,0x10,0x0011000d,0x10,0x0010000d,0x11,0x0010000d,0x11,0x0010000e, 0x11 66159b3361Sopenharmony_ci dd 0x0010000e,0x12,0x0012000f,0x12,0x000e000f,0x12,0x000f000c,0x0d,0x000e000c, 0x0d 67159b3361Sopenharmony_ci dd 0x000e000c,0x0e,0x000e000c,0x0f,0x000f000c,0x0f,0x000f000d,0x0f,0x0010000d, 0x10 68159b3361Sopenharmony_ci dd 0x0010000d,0x10,0x0010000d,0x10,0x0012000e,0x10,0x0011000e,0x10,0x0011000e, 0x11 69159b3361Sopenharmony_ci dd 0x0011000e,0x12,0x0013000e,0x11,0x0011000f,0x12,0x000e000f,0x12,0x000e000d, 0x0e 70159b3361Sopenharmony_ci dd 0x000f000d,0x0e,0x000d000d,0x0e,0x000e000d,0x0f,0x0010000d,0x0f,0x0010000d, 0x0f 71159b3361Sopenharmony_ci dd 0x000f000d,0x11,0x0010000d,0x10,0x0010000e,0x10,0x0011000e,0x13,0x0012000e, 0x11 72159b3361Sopenharmony_ci dd 0x0011000e,0x11,0x0013000f,0x11,0x0011000f,0x13,0x0010000e,0x12,0x000e000f, 0x12 73159b3361Sopenharmony_ci dd 0x000b000d,0x0d,0x000b000d,0x0e,0x000b000d,0x0f,0x000c000d,0x10,0x000c000d, 0x10 74159b3361Sopenharmony_ci dd 0x000d000d,0x10,0x000d000d,0x11,0x000d000e,0x10,0x000e000e,0x11,0x000e000e, 0x11 75159b3361Sopenharmony_ci dd 0x000e000e,0x12,0x000e000e,0x12,0x000e000f,0x15,0x000e000f,0x14,0x000e000f, 0x15 76159b3361Sopenharmony_ci dd 0x000c000f,0x12 77159b3361Sopenharmony_ci 78159b3361Sopenharmony_citableABC 79159b3361Sopenharmony_ci dd 0x00020004,0x1,0x00040004,0x4,0x00060006,0x7,0x00080008,0x9,0x00090009,0xa,0x000a000a,0xa 80159b3361Sopenharmony_ci dd 0x0009000a,0xa,0x000a000a,0xb,0x00000000,0x0,0x00020003,0x1,0x00040004,0x4,0x00070006,0x7 81159b3361Sopenharmony_ci dd 0x00090007,0x9,0x00090009,0x9,0x000a000a,0xa,0x00000000,0x0,0x00040004,0x4,0x00050005,0x6 82159b3361Sopenharmony_ci dd 0x00060006,0x8,0x00080007,0x9,0x000a0009,0xa,0x000a0009,0xb,0x0009000a,0xa,0x000a000a,0xa 83159b3361Sopenharmony_ci dd 0x00000000,0x0,0x00040004,0x4,0x00040005,0x6,0x00060006,0x8,0x000a0007,0x9,0x000a0008,0x9 84159b3361Sopenharmony_ci dd 0x000a000a,0xa,0x00000000,0x0,0x00060006,0x7,0x00070006,0x8,0x00080007,0x9,0x00090008,0xa 85159b3361Sopenharmony_ci dd 0x000a0009,0xb,0x000b000a,0xc,0x000a0009,0xb,0x000a000a,0xb,0x00000000,0x0,0x00070005,0x7 86159b3361Sopenharmony_ci dd 0x00060006,0x7,0x00080007,0x9,0x000a0008,0xa,0x000a0009,0xa,0x000b000a,0xb,0x00000000,0x0 87159b3361Sopenharmony_ci dd 0x00080007,0x8,0x00080007,0x9,0x00090008,0xa,0x000b0008,0xb,0x000a0009,0xc,0x000c000a,0xc 88159b3361Sopenharmony_ci dd 0x000a000a,0xb,0x000b000a,0xc,0x00000000,0x0,0x00090007,0x8,0x000a0007,0x9,0x000a0008,0xa 89159b3361Sopenharmony_ci dd 0x000b0009,0xb,0x000b0009,0xb,0x000c000a,0xb,0x00000000,0x0,0x00090008,0x9,0x000a0008,0xa 90159b3361Sopenharmony_ci dd 0x000a0009,0xb,0x000b0009,0xc,0x000b000a,0xc,0x000c000a,0xc,0x000b000a,0xc,0x000c000b,0xc 91159b3361Sopenharmony_ci dd 0x00000000,0x0,0x00090008,0x8,0x00090008,0x9,0x000a0009,0xa,0x000b0009,0xb,0x000c000a,0xb 92159b3361Sopenharmony_ci dd 0x000c000b,0xc,0x00000000,0x0,0x00090009,0xa,0x000a0009,0xb,0x000b000a,0xc,0x000c000a,0xc 93159b3361Sopenharmony_ci dd 0x000c000a,0xd,0x000d000b,0xd,0x000c000a,0xc,0x000d000b,0xd,0x00000000,0x0,0x000a0009,0x9 94159b3361Sopenharmony_ci dd 0x000a0009,0xa,0x000b000a,0xb,0x000b000a,0xc,0x000d000b,0xc,0x000d000b,0xc,0x00000000,0x0 95159b3361Sopenharmony_ci dd 0x00090009,0x9,0x00090009,0xa,0x00090009,0xb,0x000a000a,0xc,0x000b000a,0xc,0x000c000b,0xc 96159b3361Sopenharmony_ci dd 0x000c000b,0xd,0x000c000c,0xd,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0 97159b3361Sopenharmony_ci dd 0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x0009000a,0xa,0x0009000a,0xa 98159b3361Sopenharmony_ci dd 0x000a000a,0xb,0x000b000b,0xc,0x000c000b,0xc,0x000c000b,0xd,0x000c000b,0xd,0x000c000c,0xd 99159b3361Sopenharmony_ci dd 0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0 100159b3361Sopenharmony_ci dd 0x0,0x00000000, 0x0,0x00000000 101159b3361Sopenharmony_ci 102159b3361Sopenharmony_cilinbits32 103159b3361Sopenharmony_ci dd 0x00040004,0x10001,0x00040004,0x20002,0x00040004,0x30003,0x00040004,0x40004 104159b3361Sopenharmony_ci dd 0x00050005,0x60006,0x00060006,0x60006,0x00070007,0x80008,0x00080008,0x80008 105159b3361Sopenharmony_ci dd 0x00090009,0xa000a,0x000b000b,0xa000a,0x000b000b,0xd000d,0x000d000d,0xd000d 106159b3361Sopenharmony_ci dd 0x000d000d,0xd000d 107159b3361Sopenharmony_ci 108159b3361Sopenharmony_ci 109159b3361Sopenharmony_cichoose_table_H 110159b3361Sopenharmony_ci dw 0x1810, 0x1811, 0x1812, 0x1813, 0x1914, 0x1a14, 0x1b15, 0x1c15 111159b3361Sopenharmony_ci dw 0x1d16, 0x1e16, 0x1e17, 0x1f17, 0x1f17 112159b3361Sopenharmony_ci 113159b3361Sopenharmony_cichoose_jump_table_L: 114159b3361Sopenharmony_ci dd table_MMX.L_case_0 - choose_table_MMX 115159b3361Sopenharmony_ci dd table_MMX.L_case_1 - choose_table_MMX 116159b3361Sopenharmony_ci dd table_MMX.L_case_2 - choose_table_MMX 117159b3361Sopenharmony_ci dd table_MMX.L_case_3 - choose_table_MMX 118159b3361Sopenharmony_ci dd table_MMX.L_case_45 - choose_table_MMX 119159b3361Sopenharmony_ci dd table_MMX.L_case_45 - choose_table_MMX 120159b3361Sopenharmony_ci dd table_MMX.L_case_67 - choose_table_MMX 121159b3361Sopenharmony_ci dd table_MMX.L_case_67 - choose_table_MMX 122159b3361Sopenharmony_ci dd table_MMX.L_case_8_15 - choose_table_MMX 123159b3361Sopenharmony_ci dd table_MMX.L_case_8_15 - choose_table_MMX 124159b3361Sopenharmony_ci dd table_MMX.L_case_8_15 - choose_table_MMX 125159b3361Sopenharmony_ci dd table_MMX.L_case_8_15 - choose_table_MMX 126159b3361Sopenharmony_ci dd table_MMX.L_case_8_15 - choose_table_MMX 127159b3361Sopenharmony_ci dd table_MMX.L_case_8_15 - choose_table_MMX 128159b3361Sopenharmony_ci dd table_MMX.L_case_8_15 - choose_table_MMX 129159b3361Sopenharmony_ci dd table_MMX.L_case_8_15 - choose_table_MMX 130159b3361Sopenharmony_ci 131159b3361Sopenharmony_ci segment_code 132159b3361Sopenharmony_ci; 133159b3361Sopenharmony_ci; use MMX 134159b3361Sopenharmony_ci; 135159b3361Sopenharmony_ci 136159b3361Sopenharmony_ciPIC_OFFSETTABLE 137159b3361Sopenharmony_ci 138159b3361Sopenharmony_ci align 16 139159b3361Sopenharmony_ci; int choose_table(int *ix, int *end, int *s) 140159b3361Sopenharmony_cichoose_table_MMX: 141159b3361Sopenharmony_ci push ebp 142159b3361Sopenharmony_ci call get_pc.bp 143159b3361Sopenharmony_ci add ebp, PIC_BASE() 144159b3361Sopenharmony_ci 145159b3361Sopenharmony_ci mov ecx,[esp+8] ;ecx = begin 146159b3361Sopenharmony_ci mov edx,[esp+12] ;edx = end 147159b3361Sopenharmony_ci sub ecx,edx ;ecx = begin-end(should be minus) 148159b3361Sopenharmony_ci test ecx,8 149159b3361Sopenharmony_ci pxor mm0,mm0 ;mm0=[0:0] 150159b3361Sopenharmony_ci movq mm1,[edx+ecx] 151159b3361Sopenharmony_ci jz .lp 152159b3361Sopenharmony_ci 153159b3361Sopenharmony_ci add ecx,8 154159b3361Sopenharmony_ci jz .exit 155159b3361Sopenharmony_ci 156159b3361Sopenharmony_ci align 4 157159b3361Sopenharmony_ci.lp: 158159b3361Sopenharmony_ci movq mm4,[edx+ecx] 159159b3361Sopenharmony_ci movq mm5,[edx+ecx+8] 160159b3361Sopenharmony_ci add ecx,16 161159b3361Sopenharmony_ci psubusw mm4,mm0 ; $BK\Ev$O(B dword $B$G$J$$$H$$$1$J$$$N$@$,(B 162159b3361Sopenharmony_ci psubusw mm5,mm1 ; $B$=$s$J%3%^%s%I$O$J$$(B :-p 163159b3361Sopenharmony_ci paddw mm0,mm4 ; $B$,(B, $B$3$3$G07$&CM$NHO0O$O(B 8191+15 $B0J2<$J$N$GLdBj$J$$(B 164159b3361Sopenharmony_ci paddw mm1,mm5 165159b3361Sopenharmony_ci jnz .lp 166159b3361Sopenharmony_ci.exit: 167159b3361Sopenharmony_ci psubusw mm1,mm0 ; $B$3$l$bK\Ev$O(B dword $B$G$J$$$H$$$1$J$$(B 168159b3361Sopenharmony_ci paddw mm0,mm1 169159b3361Sopenharmony_ci 170159b3361Sopenharmony_ci movq mm4,mm0 171159b3361Sopenharmony_ci punpckhdq mm4,mm4 172159b3361Sopenharmony_ci psubusw mm4,mm0 ; $B$3$l$bK\Ev$O(B dword $B$G$J$$$H$$$1$J$$(B 173159b3361Sopenharmony_ci paddw mm0,mm4 174159b3361Sopenharmony_ci movd eax,mm0 175159b3361Sopenharmony_ci 176159b3361Sopenharmony_ci cmp eax,15 177159b3361Sopenharmony_ci ja .with_ESC 178159b3361Sopenharmony_ci lea ecx,[PIC_EBP_REL(choose_table_MMX)] 179159b3361Sopenharmony_ci add ecx,[PIC_EBP_REL(choose_jump_table_L+eax*4)] 180159b3361Sopenharmony_ci jmp ecx 181159b3361Sopenharmony_ci 182159b3361Sopenharmony_ci.with_ESC1: 183159b3361Sopenharmony_ci emms 184159b3361Sopenharmony_ci mov ecx, [esp+16] ; *s 185159b3361Sopenharmony_ci mov [ecx], eax 186159b3361Sopenharmony_ci or eax,-1 187159b3361Sopenharmony_ci pop ebp 188159b3361Sopenharmony_ci ret 189159b3361Sopenharmony_ci 190159b3361Sopenharmony_ci.with_ESC: 191159b3361Sopenharmony_ci cmp eax, 8191+15 192159b3361Sopenharmony_ci ja .with_ESC1 193159b3361Sopenharmony_ci 194159b3361Sopenharmony_ci sub eax,15 195159b3361Sopenharmony_ci push ebx 196159b3361Sopenharmony_ci push esi 197159b3361Sopenharmony_ci bsr eax, eax 198159b3361Sopenharmony_ci%assign _P 4*2 199159b3361Sopenharmony_ci movq mm5, [PIC_EBP_REL(D15_15_15_15)] 200159b3361Sopenharmony_ci movq mm6, [PIC_EBP_REL(D14_14_14_14)] 201159b3361Sopenharmony_ci movq mm3, [PIC_EBP_REL(mul_add)] 202159b3361Sopenharmony_ci 203159b3361Sopenharmony_ci mov ecx, [esp+_P+8] ; = ix 204159b3361Sopenharmony_ci; mov edx, [esp+_P+12] ; = end 205159b3361Sopenharmony_ci sub ecx, edx 206159b3361Sopenharmony_ci 207159b3361Sopenharmony_ci xor esi, esi ; sum = 0 208159b3361Sopenharmony_ci test ecx, 8 209159b3361Sopenharmony_ci pxor mm7, mm7 ; linbits_sum, 14$B$r1[$($?$b$N$N?t(B 210159b3361Sopenharmony_ci jz .H_dual_lp1 211159b3361Sopenharmony_ci 212159b3361Sopenharmony_ci movq mm0, [edx+ecx] 213159b3361Sopenharmony_ci add ecx,8 214159b3361Sopenharmony_ci packssdw mm0,mm7 215159b3361Sopenharmony_ci movq mm2, mm0 216159b3361Sopenharmony_ci paddusw mm0, mm5 ; mm0 = min(ix, 15)+0xfff0 217159b3361Sopenharmony_ci pcmpgtw mm2, mm6 ; 14$B$h$jBg$-$$$+!)(B 218159b3361Sopenharmony_ci psubw mm7, mm2 ; 14$B$h$jBg$-$$$H$-(B linbits_sum++; 219159b3361Sopenharmony_ci pmaddwd mm0, mm3 ; {0, 0, y, x}*{1, 16, 1, 16} 220159b3361Sopenharmony_ci movd ebx, mm0 221159b3361Sopenharmony_ci mov esi, [PIC_EBP_REL(largetbl+ebx*4+(16*16+16)*4)] 222159b3361Sopenharmony_ci 223159b3361Sopenharmony_ci jz .H_dual_exit 224159b3361Sopenharmony_ci 225159b3361Sopenharmony_ci align 4 226159b3361Sopenharmony_ci.H_dual_lp1: 227159b3361Sopenharmony_ci movq mm0, [edx+ecx] 228159b3361Sopenharmony_ci movq mm1, [edx+ecx+8] 229159b3361Sopenharmony_ci packssdw mm0,mm1 230159b3361Sopenharmony_ci movq mm2, mm0 231159b3361Sopenharmony_ci paddusw mm0, mm5 ; mm0 = min(ix, 15)+0xfff0 232159b3361Sopenharmony_ci pcmpgtw mm2, mm6 ; 14$B$h$jBg$-$$$+!)(B 233159b3361Sopenharmony_ci pmaddwd mm0, mm3 ; {y, x, y, x}*{1, 16, 1, 16} 234159b3361Sopenharmony_ci movd ebx, mm0 235159b3361Sopenharmony_ci punpckhdq mm0,mm0 236159b3361Sopenharmony_ci add esi, [PIC_EBP_REL(largetbl+ebx*4+(16*16+16)*4)] 237159b3361Sopenharmony_ci movd ebx, mm0 238159b3361Sopenharmony_ci add esi, [PIC_EBP_REL(largetbl+ebx*4+(16*16+16)*4)] 239159b3361Sopenharmony_ci add ecx, 16 240159b3361Sopenharmony_ci psubw mm7, mm2 ; 14$B$h$jBg$-$$$H$-(B linbits_sum++; 241159b3361Sopenharmony_ci jnz .H_dual_lp1 242159b3361Sopenharmony_ci 243159b3361Sopenharmony_ci.H_dual_exit: 244159b3361Sopenharmony_ci pmov mm1,mm7 245159b3361Sopenharmony_ci punpckhdq mm7,mm7 246159b3361Sopenharmony_ci paddd mm7,mm1 247159b3361Sopenharmony_ci punpckldq mm7,mm7 248159b3361Sopenharmony_ci 249159b3361Sopenharmony_ci pmaddwd mm7, [PIC_EBP_REL(linbits32+eax*8)] ; linbits 250159b3361Sopenharmony_ci mov ax, [PIC_EBP_REL(choose_table_H+eax*2)] 251159b3361Sopenharmony_ci 252159b3361Sopenharmony_ci movd ecx, mm7 253159b3361Sopenharmony_ci punpckhdq mm7,mm7 254159b3361Sopenharmony_ci movd edx,mm7 255159b3361Sopenharmony_ci emms 256159b3361Sopenharmony_ci shl edx, 16 257159b3361Sopenharmony_ci add ecx, edx 258159b3361Sopenharmony_ci 259159b3361Sopenharmony_ci add ecx, esi 260159b3361Sopenharmony_ci 261159b3361Sopenharmony_ci pop esi 262159b3361Sopenharmony_ci pop ebx 263159b3361Sopenharmony_ci 264159b3361Sopenharmony_ci mov edx, ecx 265159b3361Sopenharmony_ci and ecx, 0xffff ; ecx = sum2 266159b3361Sopenharmony_ci shr edx, 16 ; edx = sum 267159b3361Sopenharmony_ci 268159b3361Sopenharmony_ci cmp edx, ecx 269159b3361Sopenharmony_ci jle .chooseE_s1 270159b3361Sopenharmony_ci mov edx, ecx 271159b3361Sopenharmony_ci shr eax, 8 272159b3361Sopenharmony_ci.chooseE_s1: 273159b3361Sopenharmony_ci mov ecx, [esp+16] ; *s 274159b3361Sopenharmony_ci and eax, 0xff 275159b3361Sopenharmony_ci add [ecx], edx 276159b3361Sopenharmony_ci pop ebp 277159b3361Sopenharmony_ci ret 278159b3361Sopenharmony_ci 279159b3361Sopenharmony_citable_MMX.L_case_0: 280159b3361Sopenharmony_ci emms 281159b3361Sopenharmony_ci pop ebp 282159b3361Sopenharmony_ci ret 283159b3361Sopenharmony_ci 284159b3361Sopenharmony_citable_MMX.L_case_1: 285159b3361Sopenharmony_ci emms 286159b3361Sopenharmony_ci mov eax, [esp+16] ; *s 287159b3361Sopenharmony_ci mov ecx, [esp+8] ; *ix 288159b3361Sopenharmony_ci sub ecx, edx 289159b3361Sopenharmony_ci push ebx 290159b3361Sopenharmony_ci.lp: 291159b3361Sopenharmony_ci mov ebx, [edx+ecx] 292159b3361Sopenharmony_ci add ebx, ebx 293159b3361Sopenharmony_ci add ebx, [edx+ecx+4] 294159b3361Sopenharmony_ci movzx ebx, byte [PIC_EBP_REL(ebx+t1l)] 295159b3361Sopenharmony_ci add [eax], ebx 296159b3361Sopenharmony_ci add ecx, 8 297159b3361Sopenharmony_ci jnz .lp 298159b3361Sopenharmony_ci pop ebx 299159b3361Sopenharmony_ci mov eax, 1 300159b3361Sopenharmony_ci pop ebp 301159b3361Sopenharmony_ci ret 302159b3361Sopenharmony_ci 303159b3361Sopenharmony_citable_MMX.L_case_45: 304159b3361Sopenharmony_ci push dword 7 305159b3361Sopenharmony_ci lea ecx, [PIC_EBP_REL(tableABC+9*8)] 306159b3361Sopenharmony_ci jmp from3 307159b3361Sopenharmony_ci 308159b3361Sopenharmony_citable_MMX.L_case_67: 309159b3361Sopenharmony_ci push dword 10 310159b3361Sopenharmony_ci lea ecx, [PIC_EBP_REL(tableABC)] 311159b3361Sopenharmony_ci jmp from3 312159b3361Sopenharmony_ci 313159b3361Sopenharmony_citable_MMX.L_case_8_15: 314159b3361Sopenharmony_ci push dword 13 315159b3361Sopenharmony_ci lea ecx, [PIC_EBP_REL(tableDEF)] 316159b3361Sopenharmony_cifrom3: 317159b3361Sopenharmony_ci mov eax,[esp+12] ;eax = *begin 318159b3361Sopenharmony_ci; mov edx,[esp+16] ;edx = *end 319159b3361Sopenharmony_ci 320159b3361Sopenharmony_ci push ebx 321159b3361Sopenharmony_ci sub eax, edx 322159b3361Sopenharmony_ci 323159b3361Sopenharmony_ci movq mm5,[PIC_EBP_REL(mul_add)] 324159b3361Sopenharmony_ci pxor mm2,mm2 ;mm2 = sum 325159b3361Sopenharmony_ci 326159b3361Sopenharmony_ci test eax, 8 327159b3361Sopenharmony_ci jz .choose3_lp1 328159b3361Sopenharmony_ci; odd length 329159b3361Sopenharmony_ci movq mm0,[edx+eax] ;mm0 = ix[0] | ix[1] 330159b3361Sopenharmony_ci add eax,8 331159b3361Sopenharmony_ci packssdw mm0,mm2 332159b3361Sopenharmony_ci 333159b3361Sopenharmony_ci pmaddwd mm0,mm5 334159b3361Sopenharmony_ci movd ebx,mm0 335159b3361Sopenharmony_ci 336159b3361Sopenharmony_ci movq mm2, [ecx+ebx*8] 337159b3361Sopenharmony_ci 338159b3361Sopenharmony_ci jz .choose3_exit 339159b3361Sopenharmony_ci 340159b3361Sopenharmony_ci align 4 341159b3361Sopenharmony_ci.choose3_lp1 342159b3361Sopenharmony_ci movq mm0,[edx+eax] 343159b3361Sopenharmony_ci movq mm1,[edx+eax+8] 344159b3361Sopenharmony_ci add eax,16 345159b3361Sopenharmony_ci packssdw mm0,mm1 ;mm0 = ix[0]|ix[1]|ix[2]|ix[3] 346159b3361Sopenharmony_ci pmaddwd mm0,mm5 347159b3361Sopenharmony_ci movd ebx,mm0 348159b3361Sopenharmony_ci punpckhdq mm0,mm0 349159b3361Sopenharmony_ci paddd mm2, [ecx+ebx*8] 350159b3361Sopenharmony_ci movd ebx,mm0 351159b3361Sopenharmony_ci paddd mm2, [ecx+ebx*8] 352159b3361Sopenharmony_ci jnz .choose3_lp1 353159b3361Sopenharmony_ci.choose3_exit 354159b3361Sopenharmony_ci; xor eax,eax 355159b3361Sopenharmony_ci movd ebx, mm2 356159b3361Sopenharmony_ci punpckhdq mm2,mm2 357159b3361Sopenharmony_ci mov ecx, ebx 358159b3361Sopenharmony_ci and ecx, 0xffff ; ecx = sum2 359159b3361Sopenharmony_ci shr ebx, 16 ; ebx = sum1 360159b3361Sopenharmony_ci movd edx, mm2 ; edx = sum 361159b3361Sopenharmony_ci 362159b3361Sopenharmony_ci cmp edx, ebx 363159b3361Sopenharmony_ci jle .choose3_s1 364159b3361Sopenharmony_ci mov edx, ebx 365159b3361Sopenharmony_ci inc eax 366159b3361Sopenharmony_ci.choose3_s1: 367159b3361Sopenharmony_ci emms 368159b3361Sopenharmony_ci pop ebx 369159b3361Sopenharmony_ci cmp edx, ecx 370159b3361Sopenharmony_ci jle .choose3_s2 371159b3361Sopenharmony_ci mov edx, ecx 372159b3361Sopenharmony_ci mov eax, 2 373159b3361Sopenharmony_ci.choose3_s2: 374159b3361Sopenharmony_ci pop ecx 375159b3361Sopenharmony_ci add eax, ecx 376159b3361Sopenharmony_ci mov ecx, [esp+16] ; *s 377159b3361Sopenharmony_ci add [ecx], edx 378159b3361Sopenharmony_ci pop ebp 379159b3361Sopenharmony_ci ret 380159b3361Sopenharmony_ci 381159b3361Sopenharmony_citable_MMX.L_case_2: 382159b3361Sopenharmony_ci push dword 2 383159b3361Sopenharmony_ci lea ecx,[PIC_EBP_REL(table23)] 384159b3361Sopenharmony_ci pmov mm5,[PIC_EBP_REL(mul_add23)] 385159b3361Sopenharmony_ci jmp from2 386159b3361Sopenharmony_citable_MMX.L_case_3: 387159b3361Sopenharmony_ci push dword 5 388159b3361Sopenharmony_ci lea ecx,[PIC_EBP_REL(table56)] 389159b3361Sopenharmony_ci pmov mm5,[PIC_EBP_REL(mul_add56)] 390159b3361Sopenharmony_cifrom2: 391159b3361Sopenharmony_ci mov eax,[esp+12] ;eax = *begin 392159b3361Sopenharmony_ci; mov edx,[esp+16] ;edx = *end 393159b3361Sopenharmony_ci push ebx 394159b3361Sopenharmony_ci push edi 395159b3361Sopenharmony_ci 396159b3361Sopenharmony_ci sub eax, edx 397159b3361Sopenharmony_ci xor edi, edi 398159b3361Sopenharmony_ci test eax, 8 399159b3361Sopenharmony_ci jz .choose2_lp1 400159b3361Sopenharmony_ci; odd length 401159b3361Sopenharmony_ci movq mm0,[edx+eax] ;mm0 = ix[0] | ix[1] 402159b3361Sopenharmony_ci pxor mm2,mm2 ;mm2 = sum 403159b3361Sopenharmony_ci packssdw mm0,mm2 404159b3361Sopenharmony_ci 405159b3361Sopenharmony_ci pmaddwd mm0,mm5 406159b3361Sopenharmony_ci movd ebx,mm0 407159b3361Sopenharmony_ci 408159b3361Sopenharmony_ci mov edi, [ecx+ebx*4] 409159b3361Sopenharmony_ci 410159b3361Sopenharmony_ci add eax,8 411159b3361Sopenharmony_ci jz .choose2_exit 412159b3361Sopenharmony_ci 413159b3361Sopenharmony_ci align 4 414159b3361Sopenharmony_ci.choose2_lp1 415159b3361Sopenharmony_ci movq mm0,[edx+eax] 416159b3361Sopenharmony_ci movq mm1,[edx+eax+8] 417159b3361Sopenharmony_ci packssdw mm0,mm1 ;mm0 = ix[0]|ix[1]|ix[2]|ix[3] 418159b3361Sopenharmony_ci pmaddwd mm0,mm5 419159b3361Sopenharmony_ci movd ebx,mm0 420159b3361Sopenharmony_ci punpckhdq mm0,mm0 421159b3361Sopenharmony_ci add edi, [ecx+ebx*4] 422159b3361Sopenharmony_ci movd ebx, mm0 423159b3361Sopenharmony_ci add edi, [ecx+ebx*4] 424159b3361Sopenharmony_ci add eax,16 425159b3361Sopenharmony_ci jnc .choose2_lp1 426159b3361Sopenharmony_ci.choose2_exit 427159b3361Sopenharmony_ci mov ecx, edi 428159b3361Sopenharmony_ci pop edi 429159b3361Sopenharmony_ci pop ebx 430159b3361Sopenharmony_ci pop eax ; table num. 431159b3361Sopenharmony_ci emms 432159b3361Sopenharmony_ci 433159b3361Sopenharmony_ci mov edx, ecx 434159b3361Sopenharmony_ci and ecx, 0xffff ; ecx = sum2 435159b3361Sopenharmony_ci shr edx, 16 ; edx = sum1 436159b3361Sopenharmony_ci 437159b3361Sopenharmony_ci cmp edx, ecx 438159b3361Sopenharmony_ci jle .choose2_s1 439159b3361Sopenharmony_ci mov edx, ecx 440159b3361Sopenharmony_ci inc eax 441159b3361Sopenharmony_ci.choose2_s1: 442159b3361Sopenharmony_ci mov ecx, [esp+16] ; *s 443159b3361Sopenharmony_ci add [ecx], edx 444159b3361Sopenharmony_ci pop ebp 445159b3361Sopenharmony_ci ret 446159b3361Sopenharmony_ci 447159b3361Sopenharmony_ci end 448