1159b3361Sopenharmony_ci; new count bit routine
2159b3361Sopenharmony_ci;	part of this code is origined from
3159b3361Sopenharmony_ci;	new GOGO-no-coda (1999, 2000)
4159b3361Sopenharmony_ci;	Copyright (C) 1999 shigeo
5159b3361Sopenharmony_ci;	modified by Keiichi SAKAI
6159b3361Sopenharmony_ci
7159b3361Sopenharmony_ci%include "nasm.h"
8159b3361Sopenharmony_ci
9159b3361Sopenharmony_ci	globaldef	choose_table_MMX
10159b3361Sopenharmony_ci	globaldef	MMX_masking
11159b3361Sopenharmony_ci
12159b3361Sopenharmony_ci	externdef	largetbl
13159b3361Sopenharmony_ci	externdef	t1l
14159b3361Sopenharmony_ci	externdef	table23
15159b3361Sopenharmony_ci	externdef	table56
16159b3361Sopenharmony_ci
17159b3361Sopenharmony_ci	segment_data
18159b3361Sopenharmony_ci	align	16
19159b3361Sopenharmony_ciD14_14_14_14	dd	0x000E000E, 0x000E000E
20159b3361Sopenharmony_ciD15_15_15_15	dd	0xfff0fff0, 0xfff0fff0
21159b3361Sopenharmony_cimul_add		dd	0x00010010, 0x00010010
22159b3361Sopenharmony_cimul_add23	dd	0x00010003, 0x00010003
23159b3361Sopenharmony_cimul_add56	dd	0x00010004, 0x00010004
24159b3361Sopenharmony_citableDEF
25159b3361Sopenharmony_ci	dd	0x00010003,0x01,0x00050005,0x05,0x00070006,0x07,0x00090008,0x08,0x000a0008, 0x09
26159b3361Sopenharmony_ci	dd	0x000a0009,0x0a,0x000b000a,0x0a,0x000b000a,0x0b,0x000c000a,0x0a,0x000c000b, 0x0b
27159b3361Sopenharmony_ci	dd	0x000c000b,0x0c,0x000d000c,0x0c,0x000d000c,0x0d,0x000d000c,0x0d,0x000e000d, 0x0e
28159b3361Sopenharmony_ci	dd	0x000b000e,0x0e,0x00040005,0x04,0x00060005,0x06,0x00080007,0x08,0x00090008, 0x09
29159b3361Sopenharmony_ci	dd	0x000a0009,0x0a,0x000b0009,0x0a,0x000b000a,0x0b,0x000b000a,0x0b,0x000c000a, 0x0b
30159b3361Sopenharmony_ci	dd	0x000c000b,0x0b,0x000c000b,0x0c,0x000d000c,0x0c,0x000e000c,0x0d,0x000d000c, 0x0e
31159b3361Sopenharmony_ci	dd	0x000e000d,0x0e,0x000b000d,0x0e,0x00070006,0x07,0x00080007,0x08,0x00090007, 0x09
32159b3361Sopenharmony_ci	dd	0x000a0008,0x0a,0x000b0009,0x0b,0x000b0009,0x0b,0x000c000a,0x0c,0x000c000a, 0x0c
33159b3361Sopenharmony_ci	dd	0x000d000a,0x0b,0x000c000b,0x0c,0x000d000b,0x0c,0x000d000c,0x0d,0x000d000c, 0x0d
34159b3361Sopenharmony_ci	dd	0x000e000d,0x0e,0x000e000d,0x0f,0x000c000d,0x0f,0x00090007,0x08,0x00090008, 0x09
35159b3361Sopenharmony_ci	dd	0x000a0008,0x0a,0x000b0009,0x0b,0x000b0009,0x0b,0x000c000a,0x0c,0x000c000a, 0x0c
36159b3361Sopenharmony_ci	dd	0x000c000b,0x0c,0x000d000b,0x0c,0x000d000b,0x0d,0x000e000c,0x0d,0x000e000c, 0x0d
37159b3361Sopenharmony_ci	dd	0x000e000c,0x0d,0x000f000d,0x0e,0x000f000d,0x0f,0x000d000d,0x0f,0x000a0008, 0x09
38159b3361Sopenharmony_ci	dd	0x000a0008,0x09,0x000b0009,0x0b,0x000b0009,0x0b,0x000c000a,0x0c,0x000c000a, 0x0c
39159b3361Sopenharmony_ci	dd	0x000d000b,0x0d,0x000d000b,0x0d,0x000d000b,0x0c,0x000e000b,0x0d,0x000e000c, 0x0d
40159b3361Sopenharmony_ci	dd	0x000e000c,0x0e,0x000f000c,0x0e,0x000f000d,0x0f,0x000f000d,0x0f,0x000c000d, 0x10
41159b3361Sopenharmony_ci	dd	0x000a0009,0x0a,0x000a0009,0x0a,0x000b0009,0x0b,0x000b000a,0x0c,0x000c000a, 0x0c
42159b3361Sopenharmony_ci	dd	0x000d000a,0x0c,0x000d000b,0x0d,0x000e000b,0x0d,0x000d000b,0x0d,0x000e000b, 0x0d
43159b3361Sopenharmony_ci	dd	0x000e000c,0x0e,0x000f000c,0x0d,0x000f000d,0x0f,0x000f000d,0x0f,0x0010000d, 0x10
44159b3361Sopenharmony_ci	dd	0x000d000e,0x10,0x000b000a,0x0a,0x000b0009,0x0b,0x000b000a,0x0c,0x000c000a, 0x0c
45159b3361Sopenharmony_ci	dd	0x000d000a,0x0d,0x000d000b,0x0d,0x000d000b,0x0d,0x000d000b,0x0d,0x000e000b, 0x0d
46159b3361Sopenharmony_ci	dd	0x000e000c,0x0e,0x000e000c,0x0e,0x000e000c,0x0e,0x000f000d,0x0f,0x000f000d, 0x0f
47159b3361Sopenharmony_ci	dd	0x0010000e,0x10,0x000d000e,0x10,0x000b000a,0x0b,0x000b000a,0x0b,0x000c000a, 0x0c
48159b3361Sopenharmony_ci	dd	0x000c000b,0x0d,0x000d000b,0x0d,0x000d000b,0x0d,0x000d000b,0x0e,0x000e000c, 0x0e
49159b3361Sopenharmony_ci	dd	0x000e000c,0x0e,0x000f000c,0x0e,0x000f000c,0x0f,0x000f000c,0x0f,0x000f000d, 0x0f
50159b3361Sopenharmony_ci	dd	0x0011000d,0x10,0x0011000d,0x12,0x000d000e,0x12,0x000b000a,0x0a,0x000c000a, 0x0a
51159b3361Sopenharmony_ci	dd	0x000c000a,0x0b,0x000d000b,0x0c,0x000d000b,0x0c,0x000d000b,0x0d,0x000e000b, 0x0d
52159b3361Sopenharmony_ci	dd	0x000e000c,0x0e,0x000f000c,0x0e,0x000f000c,0x0e,0x000f000c,0x0e,0x000f000d, 0x0f
53159b3361Sopenharmony_ci	dd	0x0010000d,0x0f,0x0010000e,0x10,0x0010000e,0x11,0x000d000e,0x11,0x000c000a, 0x0b
54159b3361Sopenharmony_ci	dd	0x000c000a,0x0b,0x000c000b,0x0c,0x000d000b,0x0c,0x000d000b,0x0d,0x000e000b, 0x0d
55159b3361Sopenharmony_ci	dd	0x000e000c,0x0d,0x000f000c,0x0f,0x000f000c,0x0e,0x000f000d,0x0f,0x000f000d, 0x0f
56159b3361Sopenharmony_ci	dd	0x0010000d,0x10,0x000f000d,0x10,0x0010000e,0x10,0x000f000e,0x12,0x000e000e, 0x11
57159b3361Sopenharmony_ci	dd	0x000c000b,0x0b,0x000d000b,0x0c,0x000c000b,0x0c,0x000d000b,0x0d,0x000e000c, 0x0d
58159b3361Sopenharmony_ci	dd	0x000e000c,0x0e,0x000e000c,0x0e,0x000e000c,0x0f,0x000f000c,0x0e,0x0010000d, 0x0f
59159b3361Sopenharmony_ci	dd	0x0010000d,0x10,0x0010000d,0x0f,0x0011000d,0x10,0x0011000e,0x11,0x0010000f, 0x12
60159b3361Sopenharmony_ci	dd	0x000d000e,0x13,0x000d000b,0x0c,0x000d000b,0x0c,0x000d000b,0x0c,0x000d000b, 0x0d
61159b3361Sopenharmony_ci	dd	0x000e000c,0x0e,0x000e000c,0x0e,0x000f000c,0x0e,0x0010000c,0x0e,0x0010000d, 0x0f
62159b3361Sopenharmony_ci	dd	0x0010000d,0x0f,0x0010000d,0x0f,0x0010000d,0x10,0x0010000e,0x11,0x000f000e, 0x11
63159b3361Sopenharmony_ci	dd	0x0010000e,0x11,0x000e000f,0x12,0x000d000c,0x0c,0x000e000c,0x0d,0x000e000b, 0x0d
64159b3361Sopenharmony_ci	dd	0x000e000c,0x0e,0x000e000c,0x0e,0x000f000c,0x0f,0x000f000d,0x0e,0x000f000d, 0x0f
65159b3361Sopenharmony_ci	dd	0x000f000d,0x10,0x0011000d,0x10,0x0010000d,0x11,0x0010000d,0x11,0x0010000e, 0x11
66159b3361Sopenharmony_ci	dd	0x0010000e,0x12,0x0012000f,0x12,0x000e000f,0x12,0x000f000c,0x0d,0x000e000c, 0x0d
67159b3361Sopenharmony_ci	dd	0x000e000c,0x0e,0x000e000c,0x0f,0x000f000c,0x0f,0x000f000d,0x0f,0x0010000d, 0x10
68159b3361Sopenharmony_ci	dd	0x0010000d,0x10,0x0010000d,0x10,0x0012000e,0x10,0x0011000e,0x10,0x0011000e, 0x11
69159b3361Sopenharmony_ci	dd	0x0011000e,0x12,0x0013000e,0x11,0x0011000f,0x12,0x000e000f,0x12,0x000e000d, 0x0e
70159b3361Sopenharmony_ci	dd	0x000f000d,0x0e,0x000d000d,0x0e,0x000e000d,0x0f,0x0010000d,0x0f,0x0010000d, 0x0f
71159b3361Sopenharmony_ci	dd	0x000f000d,0x11,0x0010000d,0x10,0x0010000e,0x10,0x0011000e,0x13,0x0012000e, 0x11
72159b3361Sopenharmony_ci	dd	0x0011000e,0x11,0x0013000f,0x11,0x0011000f,0x13,0x0010000e,0x12,0x000e000f, 0x12
73159b3361Sopenharmony_ci	dd	0x000b000d,0x0d,0x000b000d,0x0e,0x000b000d,0x0f,0x000c000d,0x10,0x000c000d, 0x10
74159b3361Sopenharmony_ci	dd	0x000d000d,0x10,0x000d000d,0x11,0x000d000e,0x10,0x000e000e,0x11,0x000e000e, 0x11
75159b3361Sopenharmony_ci	dd	0x000e000e,0x12,0x000e000e,0x12,0x000e000f,0x15,0x000e000f,0x14,0x000e000f, 0x15
76159b3361Sopenharmony_ci	dd	0x000c000f,0x12
77159b3361Sopenharmony_ci
78159b3361Sopenharmony_citableABC
79159b3361Sopenharmony_ci	dd	0x00020004,0x1,0x00040004,0x4,0x00060006,0x7,0x00080008,0x9,0x00090009,0xa,0x000a000a,0xa
80159b3361Sopenharmony_ci	dd	0x0009000a,0xa,0x000a000a,0xb,0x00000000,0x0,0x00020003,0x1,0x00040004,0x4,0x00070006,0x7
81159b3361Sopenharmony_ci	dd	0x00090007,0x9,0x00090009,0x9,0x000a000a,0xa,0x00000000,0x0,0x00040004,0x4,0x00050005,0x6
82159b3361Sopenharmony_ci	dd	0x00060006,0x8,0x00080007,0x9,0x000a0009,0xa,0x000a0009,0xb,0x0009000a,0xa,0x000a000a,0xa
83159b3361Sopenharmony_ci	dd	0x00000000,0x0,0x00040004,0x4,0x00040005,0x6,0x00060006,0x8,0x000a0007,0x9,0x000a0008,0x9
84159b3361Sopenharmony_ci	dd	0x000a000a,0xa,0x00000000,0x0,0x00060006,0x7,0x00070006,0x8,0x00080007,0x9,0x00090008,0xa
85159b3361Sopenharmony_ci	dd	0x000a0009,0xb,0x000b000a,0xc,0x000a0009,0xb,0x000a000a,0xb,0x00000000,0x0,0x00070005,0x7
86159b3361Sopenharmony_ci	dd	0x00060006,0x7,0x00080007,0x9,0x000a0008,0xa,0x000a0009,0xa,0x000b000a,0xb,0x00000000,0x0
87159b3361Sopenharmony_ci	dd	0x00080007,0x8,0x00080007,0x9,0x00090008,0xa,0x000b0008,0xb,0x000a0009,0xc,0x000c000a,0xc
88159b3361Sopenharmony_ci	dd	0x000a000a,0xb,0x000b000a,0xc,0x00000000,0x0,0x00090007,0x8,0x000a0007,0x9,0x000a0008,0xa
89159b3361Sopenharmony_ci	dd	0x000b0009,0xb,0x000b0009,0xb,0x000c000a,0xb,0x00000000,0x0,0x00090008,0x9,0x000a0008,0xa
90159b3361Sopenharmony_ci	dd	0x000a0009,0xb,0x000b0009,0xc,0x000b000a,0xc,0x000c000a,0xc,0x000b000a,0xc,0x000c000b,0xc
91159b3361Sopenharmony_ci	dd	0x00000000,0x0,0x00090008,0x8,0x00090008,0x9,0x000a0009,0xa,0x000b0009,0xb,0x000c000a,0xb
92159b3361Sopenharmony_ci	dd	0x000c000b,0xc,0x00000000,0x0,0x00090009,0xa,0x000a0009,0xb,0x000b000a,0xc,0x000c000a,0xc
93159b3361Sopenharmony_ci	dd	0x000c000a,0xd,0x000d000b,0xd,0x000c000a,0xc,0x000d000b,0xd,0x00000000,0x0,0x000a0009,0x9
94159b3361Sopenharmony_ci	dd	0x000a0009,0xa,0x000b000a,0xb,0x000b000a,0xc,0x000d000b,0xc,0x000d000b,0xc,0x00000000,0x0
95159b3361Sopenharmony_ci	dd	0x00090009,0x9,0x00090009,0xa,0x00090009,0xb,0x000a000a,0xc,0x000b000a,0xc,0x000c000b,0xc
96159b3361Sopenharmony_ci	dd	0x000c000b,0xd,0x000c000c,0xd,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0
97159b3361Sopenharmony_ci	dd	0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x0009000a,0xa,0x0009000a,0xa
98159b3361Sopenharmony_ci	dd	0x000a000a,0xb,0x000b000b,0xc,0x000c000b,0xc,0x000c000b,0xd,0x000c000b,0xd,0x000c000c,0xd
99159b3361Sopenharmony_ci	dd	0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0
100159b3361Sopenharmony_ci	dd	0x0,0x00000000, 0x0,0x00000000
101159b3361Sopenharmony_ci
102159b3361Sopenharmony_cilinbits32
103159b3361Sopenharmony_ci	dd	0x00040004,0x10001,0x00040004,0x20002,0x00040004,0x30003,0x00040004,0x40004
104159b3361Sopenharmony_ci	dd	0x00050005,0x60006,0x00060006,0x60006,0x00070007,0x80008,0x00080008,0x80008
105159b3361Sopenharmony_ci	dd	0x00090009,0xa000a,0x000b000b,0xa000a,0x000b000b,0xd000d,0x000d000d,0xd000d
106159b3361Sopenharmony_ci	dd	0x000d000d,0xd000d
107159b3361Sopenharmony_ci
108159b3361Sopenharmony_ci
109159b3361Sopenharmony_cichoose_table_H
110159b3361Sopenharmony_ci	dw	0x1810, 0x1811, 0x1812, 0x1813, 0x1914, 0x1a14, 0x1b15, 0x1c15
111159b3361Sopenharmony_ci	dw	0x1d16, 0x1e16, 0x1e17, 0x1f17, 0x1f17
112159b3361Sopenharmony_ci
113159b3361Sopenharmony_cichoose_jump_table_L:
114159b3361Sopenharmony_ci	dd	table_MMX.L_case_0    - choose_table_MMX
115159b3361Sopenharmony_ci	dd	table_MMX.L_case_1    - choose_table_MMX
116159b3361Sopenharmony_ci	dd	table_MMX.L_case_2    - choose_table_MMX
117159b3361Sopenharmony_ci	dd	table_MMX.L_case_3    - choose_table_MMX
118159b3361Sopenharmony_ci	dd	table_MMX.L_case_45   - choose_table_MMX
119159b3361Sopenharmony_ci	dd	table_MMX.L_case_45   - choose_table_MMX
120159b3361Sopenharmony_ci	dd	table_MMX.L_case_67   - choose_table_MMX
121159b3361Sopenharmony_ci	dd	table_MMX.L_case_67   - choose_table_MMX
122159b3361Sopenharmony_ci	dd	table_MMX.L_case_8_15 - choose_table_MMX
123159b3361Sopenharmony_ci	dd	table_MMX.L_case_8_15 - choose_table_MMX
124159b3361Sopenharmony_ci	dd	table_MMX.L_case_8_15 - choose_table_MMX
125159b3361Sopenharmony_ci	dd	table_MMX.L_case_8_15 - choose_table_MMX
126159b3361Sopenharmony_ci	dd	table_MMX.L_case_8_15 - choose_table_MMX
127159b3361Sopenharmony_ci	dd	table_MMX.L_case_8_15 - choose_table_MMX
128159b3361Sopenharmony_ci	dd	table_MMX.L_case_8_15 - choose_table_MMX
129159b3361Sopenharmony_ci	dd	table_MMX.L_case_8_15 - choose_table_MMX
130159b3361Sopenharmony_ci
131159b3361Sopenharmony_ci	segment_code
132159b3361Sopenharmony_ci;
133159b3361Sopenharmony_ci; use MMX
134159b3361Sopenharmony_ci;
135159b3361Sopenharmony_ci
136159b3361Sopenharmony_ciPIC_OFFSETTABLE
137159b3361Sopenharmony_ci
138159b3361Sopenharmony_ci	align	16
139159b3361Sopenharmony_ci; int choose_table(int *ix, int *end, int *s)
140159b3361Sopenharmony_cichoose_table_MMX:
141159b3361Sopenharmony_ci	push	ebp
142159b3361Sopenharmony_ci	call	get_pc.bp
143159b3361Sopenharmony_ci	add	ebp, PIC_BASE()
144159b3361Sopenharmony_ci
145159b3361Sopenharmony_ci	mov	ecx,[esp+8]	;ecx = begin
146159b3361Sopenharmony_ci	mov	edx,[esp+12]	;edx = end
147159b3361Sopenharmony_ci	sub	ecx,edx		;ecx = begin-end(should be minus)
148159b3361Sopenharmony_ci	test	ecx,8
149159b3361Sopenharmony_ci 	pxor	mm0,mm0		;mm0=[0:0]
150159b3361Sopenharmony_ci	movq	mm1,[edx+ecx]
151159b3361Sopenharmony_ci	jz	.lp
152159b3361Sopenharmony_ci
153159b3361Sopenharmony_ci	add	ecx,8
154159b3361Sopenharmony_ci	jz	.exit
155159b3361Sopenharmony_ci
156159b3361Sopenharmony_ci	align	4
157159b3361Sopenharmony_ci.lp:
158159b3361Sopenharmony_ci	movq	mm4,[edx+ecx]
159159b3361Sopenharmony_ci	movq	mm5,[edx+ecx+8]
160159b3361Sopenharmony_ci	add	ecx,16
161159b3361Sopenharmony_ci	psubusw	mm4,mm0	; $BK\Ev$O(B dword $B$G$J$$$H$$$1$J$$$N$@$,(B
162159b3361Sopenharmony_ci	psubusw	mm5,mm1	; $B$=$s$J%3%^%s%I$O$J$$(B :-p
163159b3361Sopenharmony_ci	paddw	mm0,mm4 ; $B$,(B, $B$3$3$G07$&CM$NHO0O$O(B 8191+15 $B0J2<$J$N$GLdBj$J$$(B
164159b3361Sopenharmony_ci	paddw	mm1,mm5
165159b3361Sopenharmony_ci	jnz	.lp
166159b3361Sopenharmony_ci.exit:
167159b3361Sopenharmony_ci	psubusw	mm1,mm0	; $B$3$l$bK\Ev$O(B dword $B$G$J$$$H$$$1$J$$(B
168159b3361Sopenharmony_ci	paddw	mm0,mm1
169159b3361Sopenharmony_ci
170159b3361Sopenharmony_ci	movq	mm4,mm0
171159b3361Sopenharmony_ci	punpckhdq	mm4,mm4
172159b3361Sopenharmony_ci	psubusw	mm4,mm0	; $B$3$l$bK\Ev$O(B dword $B$G$J$$$H$$$1$J$$(B
173159b3361Sopenharmony_ci	paddw	mm0,mm4
174159b3361Sopenharmony_ci	movd	eax,mm0
175159b3361Sopenharmony_ci
176159b3361Sopenharmony_ci	cmp	eax,15
177159b3361Sopenharmony_ci	ja	.with_ESC
178159b3361Sopenharmony_ci	lea	ecx,[PIC_EBP_REL(choose_table_MMX)]
179159b3361Sopenharmony_ci	add	ecx,[PIC_EBP_REL(choose_jump_table_L+eax*4)]
180159b3361Sopenharmony_ci	jmp 	ecx
181159b3361Sopenharmony_ci
182159b3361Sopenharmony_ci.with_ESC1:
183159b3361Sopenharmony_ci	emms
184159b3361Sopenharmony_ci	mov	ecx, [esp+16]	; *s
185159b3361Sopenharmony_ci	mov	[ecx], eax
186159b3361Sopenharmony_ci	or	eax,-1
187159b3361Sopenharmony_ci	pop	ebp
188159b3361Sopenharmony_ci	ret
189159b3361Sopenharmony_ci
190159b3361Sopenharmony_ci.with_ESC:
191159b3361Sopenharmony_ci	cmp	eax, 8191+15
192159b3361Sopenharmony_ci	ja	.with_ESC1
193159b3361Sopenharmony_ci
194159b3361Sopenharmony_ci	sub	eax,15
195159b3361Sopenharmony_ci	push	ebx
196159b3361Sopenharmony_ci	push	esi
197159b3361Sopenharmony_ci	bsr	eax, eax
198159b3361Sopenharmony_ci%assign _P 4*2
199159b3361Sopenharmony_ci	movq    mm5, [PIC_EBP_REL(D15_15_15_15)]
200159b3361Sopenharmony_ci	movq	mm6, [PIC_EBP_REL(D14_14_14_14)]
201159b3361Sopenharmony_ci	movq	mm3, [PIC_EBP_REL(mul_add)]
202159b3361Sopenharmony_ci
203159b3361Sopenharmony_ci	mov	ecx, [esp+_P+8]		; = ix
204159b3361Sopenharmony_ci;	mov	edx, [esp+_P+12]	; = end
205159b3361Sopenharmony_ci	sub	ecx, edx
206159b3361Sopenharmony_ci
207159b3361Sopenharmony_ci	xor	esi, esi	; sum = 0
208159b3361Sopenharmony_ci	test    ecx, 8
209159b3361Sopenharmony_ci	pxor	mm7, mm7	; linbits_sum, 14$B$r1[$($?$b$N$N?t(B
210159b3361Sopenharmony_ci	jz	.H_dual_lp1
211159b3361Sopenharmony_ci
212159b3361Sopenharmony_ci	movq	mm0, [edx+ecx]
213159b3361Sopenharmony_ci	add	ecx,8
214159b3361Sopenharmony_ci	packssdw	mm0,mm7
215159b3361Sopenharmony_ci	movq	mm2, mm0
216159b3361Sopenharmony_ci	paddusw	mm0, mm5	; mm0 = min(ix, 15)+0xfff0
217159b3361Sopenharmony_ci	pcmpgtw	mm2, mm6	; 14$B$h$jBg$-$$$+!)(B
218159b3361Sopenharmony_ci	psubw	mm7, mm2	; 14$B$h$jBg$-$$$H$-(B linbits_sum++;
219159b3361Sopenharmony_ci	pmaddwd	mm0, mm3	; {0, 0, y, x}*{1, 16, 1, 16}
220159b3361Sopenharmony_ci	movd	ebx, mm0
221159b3361Sopenharmony_ci	mov	esi, [PIC_EBP_REL(largetbl+ebx*4+(16*16+16)*4)]
222159b3361Sopenharmony_ci
223159b3361Sopenharmony_ci	jz	.H_dual_exit
224159b3361Sopenharmony_ci
225159b3361Sopenharmony_ci	align   4
226159b3361Sopenharmony_ci.H_dual_lp1:
227159b3361Sopenharmony_ci	movq	mm0, [edx+ecx]
228159b3361Sopenharmony_ci	movq	mm1, [edx+ecx+8]
229159b3361Sopenharmony_ci	packssdw	mm0,mm1
230159b3361Sopenharmony_ci	movq	mm2, mm0
231159b3361Sopenharmony_ci	paddusw	mm0, mm5	; mm0 = min(ix, 15)+0xfff0
232159b3361Sopenharmony_ci	pcmpgtw	mm2, mm6	; 14$B$h$jBg$-$$$+!)(B
233159b3361Sopenharmony_ci	pmaddwd	mm0, mm3	; {y, x, y, x}*{1, 16, 1, 16}
234159b3361Sopenharmony_ci	movd	ebx, mm0
235159b3361Sopenharmony_ci	punpckhdq	mm0,mm0
236159b3361Sopenharmony_ci	add	esi, [PIC_EBP_REL(largetbl+ebx*4+(16*16+16)*4)]
237159b3361Sopenharmony_ci	movd	ebx, mm0
238159b3361Sopenharmony_ci	add	esi, [PIC_EBP_REL(largetbl+ebx*4+(16*16+16)*4)]
239159b3361Sopenharmony_ci	add	ecx, 16
240159b3361Sopenharmony_ci	psubw	mm7, mm2	; 14$B$h$jBg$-$$$H$-(B linbits_sum++;
241159b3361Sopenharmony_ci	jnz	.H_dual_lp1
242159b3361Sopenharmony_ci
243159b3361Sopenharmony_ci.H_dual_exit:
244159b3361Sopenharmony_ci	pmov	mm1,mm7
245159b3361Sopenharmony_ci	punpckhdq	mm7,mm7
246159b3361Sopenharmony_ci	paddd	mm7,mm1
247159b3361Sopenharmony_ci	punpckldq	mm7,mm7
248159b3361Sopenharmony_ci
249159b3361Sopenharmony_ci	pmaddwd	mm7, [PIC_EBP_REL(linbits32+eax*8)]	; linbits
250159b3361Sopenharmony_ci	mov	ax, [PIC_EBP_REL(choose_table_H+eax*2)]
251159b3361Sopenharmony_ci
252159b3361Sopenharmony_ci	movd	ecx, mm7
253159b3361Sopenharmony_ci	punpckhdq	mm7,mm7
254159b3361Sopenharmony_ci	movd	edx,mm7
255159b3361Sopenharmony_ci	emms
256159b3361Sopenharmony_ci	shl	edx, 16
257159b3361Sopenharmony_ci	add	ecx, edx
258159b3361Sopenharmony_ci
259159b3361Sopenharmony_ci	add	ecx, esi
260159b3361Sopenharmony_ci
261159b3361Sopenharmony_ci	pop	esi
262159b3361Sopenharmony_ci	pop	ebx
263159b3361Sopenharmony_ci
264159b3361Sopenharmony_ci	mov	edx, ecx
265159b3361Sopenharmony_ci	and	ecx, 0xffff	; ecx = sum2
266159b3361Sopenharmony_ci	shr	edx, 16	; edx = sum
267159b3361Sopenharmony_ci
268159b3361Sopenharmony_ci	cmp	edx, ecx
269159b3361Sopenharmony_ci	jle	.chooseE_s1
270159b3361Sopenharmony_ci	mov	edx, ecx
271159b3361Sopenharmony_ci	shr	eax, 8
272159b3361Sopenharmony_ci.chooseE_s1:
273159b3361Sopenharmony_ci	mov	ecx, [esp+16] ; *s
274159b3361Sopenharmony_ci	and	eax, 0xff
275159b3361Sopenharmony_ci	add	[ecx], edx
276159b3361Sopenharmony_ci	pop	ebp
277159b3361Sopenharmony_ci	ret
278159b3361Sopenharmony_ci
279159b3361Sopenharmony_citable_MMX.L_case_0:
280159b3361Sopenharmony_ci	emms
281159b3361Sopenharmony_ci	pop	ebp
282159b3361Sopenharmony_ci	ret
283159b3361Sopenharmony_ci
284159b3361Sopenharmony_citable_MMX.L_case_1:
285159b3361Sopenharmony_ci	emms
286159b3361Sopenharmony_ci	mov	eax, [esp+16] ; *s
287159b3361Sopenharmony_ci	mov	ecx, [esp+8] ; *ix
288159b3361Sopenharmony_ci	sub	ecx, edx
289159b3361Sopenharmony_ci	push	ebx
290159b3361Sopenharmony_ci.lp:
291159b3361Sopenharmony_ci	mov	ebx, [edx+ecx]
292159b3361Sopenharmony_ci	add	ebx, ebx
293159b3361Sopenharmony_ci	add	ebx, [edx+ecx+4]
294159b3361Sopenharmony_ci	movzx	ebx, byte [PIC_EBP_REL(ebx+t1l)]
295159b3361Sopenharmony_ci	add	[eax], ebx
296159b3361Sopenharmony_ci	add	ecx, 8
297159b3361Sopenharmony_ci	jnz	.lp
298159b3361Sopenharmony_ci	pop	ebx
299159b3361Sopenharmony_ci	mov	eax, 1
300159b3361Sopenharmony_ci	pop	ebp
301159b3361Sopenharmony_ci	ret
302159b3361Sopenharmony_ci
303159b3361Sopenharmony_citable_MMX.L_case_45:
304159b3361Sopenharmony_ci	push	dword 7
305159b3361Sopenharmony_ci	lea	ecx, [PIC_EBP_REL(tableABC+9*8)]
306159b3361Sopenharmony_ci	jmp	from3
307159b3361Sopenharmony_ci
308159b3361Sopenharmony_citable_MMX.L_case_67:
309159b3361Sopenharmony_ci	push	dword 10
310159b3361Sopenharmony_ci	lea	ecx, [PIC_EBP_REL(tableABC)]
311159b3361Sopenharmony_ci	jmp	from3
312159b3361Sopenharmony_ci
313159b3361Sopenharmony_citable_MMX.L_case_8_15:
314159b3361Sopenharmony_ci	push	dword 13
315159b3361Sopenharmony_ci	lea	ecx, [PIC_EBP_REL(tableDEF)]
316159b3361Sopenharmony_cifrom3:
317159b3361Sopenharmony_ci	mov	eax,[esp+12]	;eax = *begin
318159b3361Sopenharmony_ci;	mov	edx,[esp+16]	;edx = *end
319159b3361Sopenharmony_ci
320159b3361Sopenharmony_ci	push	ebx
321159b3361Sopenharmony_ci	sub	eax, edx
322159b3361Sopenharmony_ci
323159b3361Sopenharmony_ci	movq	mm5,[PIC_EBP_REL(mul_add)]
324159b3361Sopenharmony_ci	pxor	mm2,mm2	;mm2 = sum
325159b3361Sopenharmony_ci
326159b3361Sopenharmony_ci	test	eax, 8
327159b3361Sopenharmony_ci	jz	.choose3_lp1
328159b3361Sopenharmony_ci; odd length
329159b3361Sopenharmony_ci	movq	mm0,[edx+eax]	;mm0 = ix[0] | ix[1]
330159b3361Sopenharmony_ci	add	eax,8
331159b3361Sopenharmony_ci	packssdw	mm0,mm2
332159b3361Sopenharmony_ci
333159b3361Sopenharmony_ci	pmaddwd	mm0,mm5
334159b3361Sopenharmony_ci	movd	ebx,mm0
335159b3361Sopenharmony_ci
336159b3361Sopenharmony_ci	movq	mm2,  [ecx+ebx*8]
337159b3361Sopenharmony_ci
338159b3361Sopenharmony_ci	jz	.choose3_exit
339159b3361Sopenharmony_ci
340159b3361Sopenharmony_ci	align	4
341159b3361Sopenharmony_ci.choose3_lp1
342159b3361Sopenharmony_ci	movq	mm0,[edx+eax]
343159b3361Sopenharmony_ci	movq	mm1,[edx+eax+8]
344159b3361Sopenharmony_ci	add	eax,16
345159b3361Sopenharmony_ci	packssdw	mm0,mm1 ;mm0 = ix[0]|ix[1]|ix[2]|ix[3]
346159b3361Sopenharmony_ci	pmaddwd	mm0,mm5
347159b3361Sopenharmony_ci	movd	ebx,mm0
348159b3361Sopenharmony_ci	punpckhdq	mm0,mm0
349159b3361Sopenharmony_ci	paddd	mm2, [ecx+ebx*8]
350159b3361Sopenharmony_ci	movd	ebx,mm0
351159b3361Sopenharmony_ci	paddd	mm2, [ecx+ebx*8]
352159b3361Sopenharmony_ci	jnz	.choose3_lp1
353159b3361Sopenharmony_ci.choose3_exit
354159b3361Sopenharmony_ci;	xor	eax,eax
355159b3361Sopenharmony_ci	movd	ebx, mm2
356159b3361Sopenharmony_ci	punpckhdq	mm2,mm2
357159b3361Sopenharmony_ci	mov	ecx, ebx
358159b3361Sopenharmony_ci	and	ecx, 0xffff	; ecx = sum2
359159b3361Sopenharmony_ci	shr	ebx, 16	; ebx = sum1
360159b3361Sopenharmony_ci	movd	edx, mm2	; edx = sum
361159b3361Sopenharmony_ci
362159b3361Sopenharmony_ci	cmp	edx, ebx
363159b3361Sopenharmony_ci	jle	.choose3_s1
364159b3361Sopenharmony_ci	mov	edx, ebx
365159b3361Sopenharmony_ci	inc	eax
366159b3361Sopenharmony_ci.choose3_s1:
367159b3361Sopenharmony_ci	emms
368159b3361Sopenharmony_ci	pop	ebx
369159b3361Sopenharmony_ci	cmp	edx, ecx
370159b3361Sopenharmony_ci	jle	.choose3_s2
371159b3361Sopenharmony_ci	mov	edx, ecx
372159b3361Sopenharmony_ci	mov	eax, 2
373159b3361Sopenharmony_ci.choose3_s2:
374159b3361Sopenharmony_ci	pop	ecx
375159b3361Sopenharmony_ci	add	eax, ecx
376159b3361Sopenharmony_ci	mov	ecx, [esp+16] ; *s
377159b3361Sopenharmony_ci	add	[ecx], edx
378159b3361Sopenharmony_ci	pop	ebp
379159b3361Sopenharmony_ci	ret
380159b3361Sopenharmony_ci
381159b3361Sopenharmony_citable_MMX.L_case_2:
382159b3361Sopenharmony_ci	push	dword 2
383159b3361Sopenharmony_ci	lea	ecx,[PIC_EBP_REL(table23)]
384159b3361Sopenharmony_ci	pmov	mm5,[PIC_EBP_REL(mul_add23)]
385159b3361Sopenharmony_ci	jmp	from2
386159b3361Sopenharmony_citable_MMX.L_case_3:
387159b3361Sopenharmony_ci	push	dword 5
388159b3361Sopenharmony_ci	lea	ecx,[PIC_EBP_REL(table56)]
389159b3361Sopenharmony_ci	pmov	mm5,[PIC_EBP_REL(mul_add56)]
390159b3361Sopenharmony_cifrom2:
391159b3361Sopenharmony_ci	mov	eax,[esp+12]	;eax = *begin
392159b3361Sopenharmony_ci;	mov	edx,[esp+16]	;edx = *end
393159b3361Sopenharmony_ci	push	ebx
394159b3361Sopenharmony_ci	push	edi
395159b3361Sopenharmony_ci
396159b3361Sopenharmony_ci	sub	eax, edx
397159b3361Sopenharmony_ci	xor	edi, edi
398159b3361Sopenharmony_ci	test	eax, 8
399159b3361Sopenharmony_ci	jz	.choose2_lp1
400159b3361Sopenharmony_ci; odd length
401159b3361Sopenharmony_ci	movq	mm0,[edx+eax]	;mm0 = ix[0] | ix[1]
402159b3361Sopenharmony_ci	pxor	mm2,mm2		;mm2 = sum
403159b3361Sopenharmony_ci	packssdw	mm0,mm2
404159b3361Sopenharmony_ci
405159b3361Sopenharmony_ci	pmaddwd	mm0,mm5
406159b3361Sopenharmony_ci	movd	ebx,mm0
407159b3361Sopenharmony_ci
408159b3361Sopenharmony_ci	mov	edi,  [ecx+ebx*4]
409159b3361Sopenharmony_ci
410159b3361Sopenharmony_ci	add	eax,8
411159b3361Sopenharmony_ci	jz	.choose2_exit
412159b3361Sopenharmony_ci
413159b3361Sopenharmony_ci	align	4
414159b3361Sopenharmony_ci.choose2_lp1
415159b3361Sopenharmony_ci	movq	mm0,[edx+eax]
416159b3361Sopenharmony_ci	movq	mm1,[edx+eax+8]
417159b3361Sopenharmony_ci	packssdw	mm0,mm1 ;mm0 = ix[0]|ix[1]|ix[2]|ix[3]
418159b3361Sopenharmony_ci	pmaddwd	mm0,mm5
419159b3361Sopenharmony_ci	movd	ebx,mm0
420159b3361Sopenharmony_ci	punpckhdq	mm0,mm0
421159b3361Sopenharmony_ci	add	edi, [ecx+ebx*4]
422159b3361Sopenharmony_ci	movd	ebx, mm0
423159b3361Sopenharmony_ci	add	edi, [ecx+ebx*4]
424159b3361Sopenharmony_ci	add	eax,16
425159b3361Sopenharmony_ci	jnc	.choose2_lp1
426159b3361Sopenharmony_ci.choose2_exit
427159b3361Sopenharmony_ci	mov	ecx, edi
428159b3361Sopenharmony_ci	pop	edi
429159b3361Sopenharmony_ci	pop	ebx
430159b3361Sopenharmony_ci	pop	eax ; table num.
431159b3361Sopenharmony_ci	emms
432159b3361Sopenharmony_ci
433159b3361Sopenharmony_ci	mov	edx, ecx
434159b3361Sopenharmony_ci	and	ecx, 0xffff	; ecx = sum2
435159b3361Sopenharmony_ci	shr	edx, 16	; edx = sum1
436159b3361Sopenharmony_ci
437159b3361Sopenharmony_ci	cmp	edx, ecx
438159b3361Sopenharmony_ci	jle	.choose2_s1
439159b3361Sopenharmony_ci	mov	edx, ecx
440159b3361Sopenharmony_ci	inc	eax
441159b3361Sopenharmony_ci.choose2_s1:
442159b3361Sopenharmony_ci	mov	ecx, [esp+16] ; *s
443159b3361Sopenharmony_ci	add	[ecx], edx
444159b3361Sopenharmony_ci	pop	ebp
445159b3361Sopenharmony_ci	ret
446159b3361Sopenharmony_ci
447159b3361Sopenharmony_ci	end
448