1/**************************************************************************
2 *
3 * Copyright (C) 1999-2005  Brian Paul   All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included
13 * in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
16 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
22 *
23 **************************************************************************/
24
25#ifndef _RTASM_X86SSE_H_
26#define _RTASM_X86SSE_H_
27
28#include "pipe/p_compiler.h"
29#include "pipe/p_config.h"
30
31#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
32
33/* It is up to the caller to ensure that instructions issued are
34 * suitable for the host cpu.  There are no checks made in this module
35 * for mmx/sse/sse2 support on the cpu.
36 */
37struct x86_reg {
38   unsigned file:2;
39   unsigned idx:4;
40   unsigned mod:2;		/* mod_REG if this is just a register */
41   int      disp:24;		/* only +/- 23bits of offset - should be enough... */
42};
43
44#define X86_MMX 1
45#define X86_MMX2 2
46#define X86_SSE 4
47#define X86_SSE2 8
48#define X86_SSE3 0x10
49#define X86_SSE4_1 0x20
50
51struct x86_function {
52   unsigned caps;
53   unsigned size;
54   unsigned char *store;
55   unsigned char *csr;
56
57   unsigned stack_offset:16;
58   unsigned need_emms:8;
59   int x87_stack:8;
60
61   unsigned char error_overflow[4];
62};
63
64enum x86_reg_file {
65   file_REG32,
66   file_MMX,
67   file_XMM,
68   file_x87
69};
70
71/* Values for mod field of modr/m byte
72 */
73enum x86_reg_mod {
74   mod_INDIRECT,
75   mod_DISP8,
76   mod_DISP32,
77   mod_REG
78};
79
80enum x86_reg_name {
81   reg_AX,
82   reg_CX,
83   reg_DX,
84   reg_BX,
85   reg_SP,
86   reg_BP,
87   reg_SI,
88   reg_DI,
89   reg_R8,
90   reg_R9,
91   reg_R10,
92   reg_R11,
93   reg_R12,
94   reg_R13,
95   reg_R14,
96   reg_R15
97};
98
99
100enum x86_cc {
101   cc_O,			/* overflow */
102   cc_NO,			/* not overflow */
103   cc_NAE,			/* not above or equal / carry */
104   cc_AE,			/* above or equal / not carry */
105   cc_E,			/* equal / zero */
106   cc_NE			/* not equal / not zero */
107};
108
109enum sse_cc {
110   cc_Equal,
111   cc_LessThan,
112   cc_LessThanEqual,
113   cc_Unordered,
114   cc_NotEqual,
115   cc_NotLessThan,
116   cc_NotLessThanEqual,
117   cc_Ordered
118};
119
120#define cc_Z  cc_E
121#define cc_NZ cc_NE
122
123
124/** generic pointer to function */
125typedef void (*x86_func)(void);
126
127
128/* Begin/end/retrieve function creation:
129 */
130
131enum x86_target
132{
133   X86_32,
134   X86_64_STD_ABI,
135   X86_64_WIN64_ABI
136};
137
138/* make this read a member of x86_function if target != host is desired */
139static inline enum x86_target x86_target( struct x86_function* p )
140{
141#ifdef PIPE_ARCH_X86
142   return X86_32;
143#elif (defined(PIPE_OS_CYGWIN) || defined(PIPE_OS_WINDOWS)) && defined(PIPE_ARCH_X86_64)
144   return X86_64_WIN64_ABI;
145#elif defined(PIPE_ARCH_X86_64)
146   return X86_64_STD_ABI;
147#endif
148}
149
150static inline unsigned x86_target_caps( struct x86_function* p )
151{
152   return p->caps;
153}
154
155void x86_init_func( struct x86_function *p );
156void x86_init_func_size( struct x86_function *p, unsigned code_size );
157void x86_release_func( struct x86_function *p );
158x86_func x86_get_func( struct x86_function *p );
159
160/* Debugging:
161 */
162void x86_print_reg( struct x86_reg reg );
163
164
165/* Create and manipulate registers and regmem values:
166 */
167struct x86_reg x86_make_reg( enum x86_reg_file file,
168			     enum x86_reg_name idx );
169
170struct x86_reg x86_make_disp( struct x86_reg reg,
171			      int disp );
172
173struct x86_reg x86_deref( struct x86_reg reg );
174
175struct x86_reg x86_get_base_reg( struct x86_reg reg );
176
177
178/* Labels, jumps and fixup:
179 */
180int x86_get_label( struct x86_function *p );
181
182void x64_rexw(struct x86_function *p);
183
184void x86_jcc( struct x86_function *p,
185	      enum x86_cc cc,
186	      int label );
187
188int x86_jcc_forward( struct x86_function *p,
189			  enum x86_cc cc );
190
191int x86_jmp_forward( struct x86_function *p);
192
193int x86_call_forward( struct x86_function *p);
194
195void x86_fixup_fwd_jump( struct x86_function *p,
196			 int fixup );
197
198void x86_jmp( struct x86_function *p, int label );
199
200/* void x86_call( struct x86_function *p, void (*label)() ); */
201void x86_call( struct x86_function *p, struct x86_reg reg);
202
203void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm );
204void x86_add_imm( struct x86_function *p, struct x86_reg dst, int imm );
205void x86_or_imm( struct x86_function *p, struct x86_reg dst, int imm );
206void x86_and_imm( struct x86_function *p, struct x86_reg dst, int imm );
207void x86_sub_imm( struct x86_function *p, struct x86_reg dst, int imm );
208void x86_xor_imm( struct x86_function *p, struct x86_reg dst, int imm );
209void x86_cmp_imm( struct x86_function *p, struct x86_reg dst, int imm );
210
211
212/* Macro for sse_shufps() and sse2_pshufd():
213 */
214#define SHUF(_x,_y,_z,_w)       (((_x)<<0) | ((_y)<<2) | ((_z)<<4) | ((_w)<<6))
215#define SHUF_NOOP               RSW(0,1,2,3)
216#define GET_SHUF(swz, idx)      (((swz) >> ((idx)*2)) & 0x3)
217
218void mmx_emms( struct x86_function *p );
219void mmx_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
220void mmx_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
221void mmx_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
222void mmx_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
223
224void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
225void sse2_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
226void sse2_movdqu( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
227void sse2_movdqa( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
228void sse2_movsd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
229void sse2_movupd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
230void sse2_movapd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
231
232void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
233void sse2_cvttps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
234void sse2_cvtdq2ps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
235void sse2_cvtsd2ss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
236void sse2_cvtpd2ps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
237
238void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
239void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
240void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
241void sse2_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
242void sse2_pshufd( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
243                  unsigned char shuf );
244void sse2_pshuflw( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
245                  unsigned char shuf );
246void sse2_pshufhw( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
247                  unsigned char shuf );
248void sse2_rcpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
249void sse2_rcpss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
250
251void sse2_punpcklbw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
252void sse2_punpcklwd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
253void sse2_punpckldq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
254void sse2_punpcklqdq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
255
256void sse2_psllw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
257void sse2_pslld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
258void sse2_psllq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
259
260void sse2_psrlw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
261void sse2_psrld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
262void sse2_psrlq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
263
264void sse2_psraw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
265void sse2_psrad_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
266
267void sse2_por( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
268
269void sse2_pshuflw( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm );
270void sse2_pshufhw( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm );
271void sse2_pshufd( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm );
272
273void sse2_pcmpgtd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
274
275void sse_prefetchnta( struct x86_function *p, struct x86_reg ptr);
276void sse_prefetch0( struct x86_function *p, struct x86_reg ptr);
277void sse_prefetch1( struct x86_function *p, struct x86_reg ptr);
278
279void sse_movntps( struct x86_function *p, struct x86_reg dst, struct x86_reg src);
280
281void sse_addps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
282void sse_addss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
283void sse_cvtps2pi( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
284void sse_divss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
285void sse_andnps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
286void sse_andps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
287void sse_cmpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src,
288                enum sse_cc cc );
289void sse_maxps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
290void sse_maxss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
291void sse_minps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
292void sse_movaps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
293void sse_movhlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
294void sse_movhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
295void sse_movlhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
296void sse_movlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
297void sse_movss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
298void sse_movups( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
299void sse_mulps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
300void sse_mulss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
301void sse_orps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
302void sse_xorps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
303void sse_subps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
304void sse_rsqrtps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
305void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
306void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
307                 unsigned char shuf );
308void sse_unpckhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
309void sse_unpcklps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
310void sse_pmovmskb( struct x86_function *p, struct x86_reg dest, struct x86_reg src );
311void sse_movmskps( struct x86_function *p, struct x86_reg dst, struct x86_reg src);
312
313void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
314void x86_and( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
315void x86_cmovcc( struct x86_function *p, struct x86_reg dst, struct x86_reg src, enum x86_cc cc );
316void x86_cmp( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
317void x86_dec( struct x86_function *p, struct x86_reg reg );
318void x86_inc( struct x86_function *p, struct x86_reg reg );
319void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
320void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
321void x64_mov64( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
322void x86_mov8( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
323void x86_mov16( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
324void x86_movzx8(struct x86_function *p, struct x86_reg dst, struct x86_reg src );
325void x86_movzx16(struct x86_function *p, struct x86_reg dst, struct x86_reg src );
326void x86_mov_imm(struct x86_function *p, struct x86_reg dst, int imm );
327void x86_mov8_imm(struct x86_function *p, struct x86_reg dst, uint8_t imm );
328void x86_mov16_imm(struct x86_function *p, struct x86_reg dst, uint16_t imm );
329void x86_mul( struct x86_function *p, struct x86_reg src );
330void x86_imul( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
331void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
332void x86_pop( struct x86_function *p, struct x86_reg reg );
333void x86_push( struct x86_function *p, struct x86_reg reg );
334void x86_push_imm32( struct x86_function *p, int imm );
335void x86_ret( struct x86_function *p );
336void x86_retw( struct x86_function *p, unsigned short imm );
337void x86_sub( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
338void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
339void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
340void x86_sahf( struct x86_function *p );
341void x86_div( struct x86_function *p, struct x86_reg src );
342void x86_bswap( struct x86_function *p, struct x86_reg src );
343void x86_shr_imm( struct x86_function *p, struct x86_reg reg, unsigned imm );
344void x86_sar_imm( struct x86_function *p, struct x86_reg reg, unsigned imm );
345void x86_shl_imm( struct x86_function *p, struct x86_reg reg, unsigned imm  );
346
347void x86_cdecl_caller_push_regs( struct x86_function *p );
348void x86_cdecl_caller_pop_regs( struct x86_function *p );
349
350void x87_assert_stack_empty( struct x86_function *p );
351
352void x87_f2xm1( struct x86_function *p );
353void x87_fabs( struct x86_function *p );
354void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
355void x87_faddp( struct x86_function *p, struct x86_reg dst );
356void x87_fchs( struct x86_function *p );
357void x87_fclex( struct x86_function *p );
358void x87_fcmovb( struct x86_function *p, struct x86_reg src );
359void x87_fcmovbe( struct x86_function *p, struct x86_reg src );
360void x87_fcmove( struct x86_function *p, struct x86_reg src );
361void x87_fcmovnb( struct x86_function *p, struct x86_reg src );
362void x87_fcmovnbe( struct x86_function *p, struct x86_reg src );
363void x87_fcmovne( struct x86_function *p, struct x86_reg src );
364void x87_fcom( struct x86_function *p, struct x86_reg dst );
365void x87_fcomi( struct x86_function *p, struct x86_reg dst );
366void x87_fcomip( struct x86_function *p, struct x86_reg dst );
367void x87_fcomp( struct x86_function *p, struct x86_reg dst );
368void x87_fcos( struct x86_function *p );
369void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
370void x87_fdivp( struct x86_function *p, struct x86_reg dst );
371void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
372void x87_fdivrp( struct x86_function *p, struct x86_reg dst );
373void x87_fild( struct x86_function *p, struct x86_reg arg );
374void x87_fist( struct x86_function *p, struct x86_reg dst );
375void x87_fistp( struct x86_function *p, struct x86_reg dst );
376void x87_fld( struct x86_function *p, struct x86_reg arg );
377void x87_fld1( struct x86_function *p );
378void x87_fldcw( struct x86_function *p, struct x86_reg arg );
379void x87_fldl2e( struct x86_function *p );
380void x87_fldln2( struct x86_function *p );
381void x87_fldz( struct x86_function *p );
382void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
383void x87_fmulp( struct x86_function *p, struct x86_reg dst );
384void x87_fnclex( struct x86_function *p );
385void x87_fprndint( struct x86_function *p );
386void x87_fpop( struct x86_function *p );
387void x87_fscale( struct x86_function *p );
388void x87_fsin( struct x86_function *p );
389void x87_fsincos( struct x86_function *p );
390void x87_fsqrt( struct x86_function *p );
391void x87_fst( struct x86_function *p, struct x86_reg dst );
392void x87_fstp( struct x86_function *p, struct x86_reg dst );
393void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
394void x87_fsubp( struct x86_function *p, struct x86_reg dst );
395void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
396void x87_fsubrp( struct x86_function *p, struct x86_reg dst );
397void x87_ftst( struct x86_function *p );
398void x87_fxch( struct x86_function *p, struct x86_reg dst );
399void x87_fxtract( struct x86_function *p );
400void x87_fyl2x( struct x86_function *p );
401void x87_fyl2xp1( struct x86_function *p );
402void x87_fwait( struct x86_function *p );
403void x87_fnstcw( struct x86_function *p, struct x86_reg dst );
404void x87_fnstsw( struct x86_function *p, struct x86_reg dst );
405void x87_fucompp( struct x86_function *p );
406void x87_fucomp( struct x86_function *p, struct x86_reg arg );
407void x87_fucom( struct x86_function *p, struct x86_reg arg );
408
409
410
411/* Retrieve a reference to one of the function arguments, taking into
412 * account any push/pop activity.  Note - doesn't track explicit
413 * manipulation of ESP by other instructions.
414 */
415struct x86_reg x86_fn_arg( struct x86_function *p, unsigned arg );
416
417#endif
418#endif
419