1/************************************************************************** 2 * 3 * Copyright (C) 1999-2005 Brian Paul All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included 13 * in all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 16 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 * 23 **************************************************************************/ 24 25#ifndef _RTASM_X86SSE_H_ 26#define _RTASM_X86SSE_H_ 27 28#include "pipe/p_compiler.h" 29#include "pipe/p_config.h" 30 31#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) 32 33/* It is up to the caller to ensure that instructions issued are 34 * suitable for the host cpu. There are no checks made in this module 35 * for mmx/sse/sse2 support on the cpu. 36 */ 37struct x86_reg { 38 unsigned file:2; 39 unsigned idx:4; 40 unsigned mod:2; /* mod_REG if this is just a register */ 41 int disp:24; /* only +/- 23bits of offset - should be enough... */ 42}; 43 44#define X86_MMX 1 45#define X86_MMX2 2 46#define X86_SSE 4 47#define X86_SSE2 8 48#define X86_SSE3 0x10 49#define X86_SSE4_1 0x20 50 51struct x86_function { 52 unsigned caps; 53 unsigned size; 54 unsigned char *store; 55 unsigned char *csr; 56 57 unsigned stack_offset:16; 58 unsigned need_emms:8; 59 int x87_stack:8; 60 61 unsigned char error_overflow[4]; 62}; 63 64enum x86_reg_file { 65 file_REG32, 66 file_MMX, 67 file_XMM, 68 file_x87 69}; 70 71/* Values for mod field of modr/m byte 72 */ 73enum x86_reg_mod { 74 mod_INDIRECT, 75 mod_DISP8, 76 mod_DISP32, 77 mod_REG 78}; 79 80enum x86_reg_name { 81 reg_AX, 82 reg_CX, 83 reg_DX, 84 reg_BX, 85 reg_SP, 86 reg_BP, 87 reg_SI, 88 reg_DI, 89 reg_R8, 90 reg_R9, 91 reg_R10, 92 reg_R11, 93 reg_R12, 94 reg_R13, 95 reg_R14, 96 reg_R15 97}; 98 99 100enum x86_cc { 101 cc_O, /* overflow */ 102 cc_NO, /* not overflow */ 103 cc_NAE, /* not above or equal / carry */ 104 cc_AE, /* above or equal / not carry */ 105 cc_E, /* equal / zero */ 106 cc_NE /* not equal / not zero */ 107}; 108 109enum sse_cc { 110 cc_Equal, 111 cc_LessThan, 112 cc_LessThanEqual, 113 cc_Unordered, 114 cc_NotEqual, 115 cc_NotLessThan, 116 cc_NotLessThanEqual, 117 cc_Ordered 118}; 119 120#define cc_Z cc_E 121#define cc_NZ cc_NE 122 123 124/** generic pointer to function */ 125typedef void (*x86_func)(void); 126 127 128/* Begin/end/retrieve function creation: 129 */ 130 131enum x86_target 132{ 133 X86_32, 134 X86_64_STD_ABI, 135 X86_64_WIN64_ABI 136}; 137 138/* make this read a member of x86_function if target != host is desired */ 139static inline enum x86_target x86_target( struct x86_function* p ) 140{ 141#ifdef PIPE_ARCH_X86 142 return X86_32; 143#elif (defined(PIPE_OS_CYGWIN) || defined(PIPE_OS_WINDOWS)) && defined(PIPE_ARCH_X86_64) 144 return X86_64_WIN64_ABI; 145#elif defined(PIPE_ARCH_X86_64) 146 return X86_64_STD_ABI; 147#endif 148} 149 150static inline unsigned x86_target_caps( struct x86_function* p ) 151{ 152 return p->caps; 153} 154 155void x86_init_func( struct x86_function *p ); 156void x86_init_func_size( struct x86_function *p, unsigned code_size ); 157void x86_release_func( struct x86_function *p ); 158x86_func x86_get_func( struct x86_function *p ); 159 160/* Debugging: 161 */ 162void x86_print_reg( struct x86_reg reg ); 163 164 165/* Create and manipulate registers and regmem values: 166 */ 167struct x86_reg x86_make_reg( enum x86_reg_file file, 168 enum x86_reg_name idx ); 169 170struct x86_reg x86_make_disp( struct x86_reg reg, 171 int disp ); 172 173struct x86_reg x86_deref( struct x86_reg reg ); 174 175struct x86_reg x86_get_base_reg( struct x86_reg reg ); 176 177 178/* Labels, jumps and fixup: 179 */ 180int x86_get_label( struct x86_function *p ); 181 182void x64_rexw(struct x86_function *p); 183 184void x86_jcc( struct x86_function *p, 185 enum x86_cc cc, 186 int label ); 187 188int x86_jcc_forward( struct x86_function *p, 189 enum x86_cc cc ); 190 191int x86_jmp_forward( struct x86_function *p); 192 193int x86_call_forward( struct x86_function *p); 194 195void x86_fixup_fwd_jump( struct x86_function *p, 196 int fixup ); 197 198void x86_jmp( struct x86_function *p, int label ); 199 200/* void x86_call( struct x86_function *p, void (*label)() ); */ 201void x86_call( struct x86_function *p, struct x86_reg reg); 202 203void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ); 204void x86_add_imm( struct x86_function *p, struct x86_reg dst, int imm ); 205void x86_or_imm( struct x86_function *p, struct x86_reg dst, int imm ); 206void x86_and_imm( struct x86_function *p, struct x86_reg dst, int imm ); 207void x86_sub_imm( struct x86_function *p, struct x86_reg dst, int imm ); 208void x86_xor_imm( struct x86_function *p, struct x86_reg dst, int imm ); 209void x86_cmp_imm( struct x86_function *p, struct x86_reg dst, int imm ); 210 211 212/* Macro for sse_shufps() and sse2_pshufd(): 213 */ 214#define SHUF(_x,_y,_z,_w) (((_x)<<0) | ((_y)<<2) | ((_z)<<4) | ((_w)<<6)) 215#define SHUF_NOOP RSW(0,1,2,3) 216#define GET_SHUF(swz, idx) (((swz) >> ((idx)*2)) & 0x3) 217 218void mmx_emms( struct x86_function *p ); 219void mmx_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 220void mmx_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 221void mmx_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 222void mmx_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 223 224void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 225void sse2_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 226void sse2_movdqu( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 227void sse2_movdqa( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 228void sse2_movsd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 229void sse2_movupd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 230void sse2_movapd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 231 232void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 233void sse2_cvttps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 234void sse2_cvtdq2ps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 235void sse2_cvtsd2ss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 236void sse2_cvtpd2ps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 237 238void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 239void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 240void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 241void sse2_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 242void sse2_pshufd( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, 243 unsigned char shuf ); 244void sse2_pshuflw( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, 245 unsigned char shuf ); 246void sse2_pshufhw( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, 247 unsigned char shuf ); 248void sse2_rcpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 249void sse2_rcpss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 250 251void sse2_punpcklbw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 252void sse2_punpcklwd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 253void sse2_punpckldq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 254void sse2_punpcklqdq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 255 256void sse2_psllw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ); 257void sse2_pslld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ); 258void sse2_psllq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ); 259 260void sse2_psrlw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ); 261void sse2_psrld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ); 262void sse2_psrlq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ); 263 264void sse2_psraw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ); 265void sse2_psrad_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ); 266 267void sse2_por( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 268 269void sse2_pshuflw( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm ); 270void sse2_pshufhw( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm ); 271void sse2_pshufd( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm ); 272 273void sse2_pcmpgtd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 274 275void sse_prefetchnta( struct x86_function *p, struct x86_reg ptr); 276void sse_prefetch0( struct x86_function *p, struct x86_reg ptr); 277void sse_prefetch1( struct x86_function *p, struct x86_reg ptr); 278 279void sse_movntps( struct x86_function *p, struct x86_reg dst, struct x86_reg src); 280 281void sse_addps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 282void sse_addss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 283void sse_cvtps2pi( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 284void sse_divss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 285void sse_andnps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 286void sse_andps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 287void sse_cmpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src, 288 enum sse_cc cc ); 289void sse_maxps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 290void sse_maxss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 291void sse_minps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 292void sse_movaps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 293void sse_movhlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 294void sse_movhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 295void sse_movlhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 296void sse_movlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 297void sse_movss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 298void sse_movups( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 299void sse_mulps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 300void sse_mulss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 301void sse_orps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 302void sse_xorps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 303void sse_subps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 304void sse_rsqrtps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 305void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 306void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, 307 unsigned char shuf ); 308void sse_unpckhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 309void sse_unpcklps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 310void sse_pmovmskb( struct x86_function *p, struct x86_reg dest, struct x86_reg src ); 311void sse_movmskps( struct x86_function *p, struct x86_reg dst, struct x86_reg src); 312 313void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 314void x86_and( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 315void x86_cmovcc( struct x86_function *p, struct x86_reg dst, struct x86_reg src, enum x86_cc cc ); 316void x86_cmp( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 317void x86_dec( struct x86_function *p, struct x86_reg reg ); 318void x86_inc( struct x86_function *p, struct x86_reg reg ); 319void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 320void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 321void x64_mov64( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 322void x86_mov8( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 323void x86_mov16( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 324void x86_movzx8(struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 325void x86_movzx16(struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 326void x86_mov_imm(struct x86_function *p, struct x86_reg dst, int imm ); 327void x86_mov8_imm(struct x86_function *p, struct x86_reg dst, uint8_t imm ); 328void x86_mov16_imm(struct x86_function *p, struct x86_reg dst, uint16_t imm ); 329void x86_mul( struct x86_function *p, struct x86_reg src ); 330void x86_imul( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 331void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 332void x86_pop( struct x86_function *p, struct x86_reg reg ); 333void x86_push( struct x86_function *p, struct x86_reg reg ); 334void x86_push_imm32( struct x86_function *p, int imm ); 335void x86_ret( struct x86_function *p ); 336void x86_retw( struct x86_function *p, unsigned short imm ); 337void x86_sub( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 338void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 339void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); 340void x86_sahf( struct x86_function *p ); 341void x86_div( struct x86_function *p, struct x86_reg src ); 342void x86_bswap( struct x86_function *p, struct x86_reg src ); 343void x86_shr_imm( struct x86_function *p, struct x86_reg reg, unsigned imm ); 344void x86_sar_imm( struct x86_function *p, struct x86_reg reg, unsigned imm ); 345void x86_shl_imm( struct x86_function *p, struct x86_reg reg, unsigned imm ); 346 347void x86_cdecl_caller_push_regs( struct x86_function *p ); 348void x86_cdecl_caller_pop_regs( struct x86_function *p ); 349 350void x87_assert_stack_empty( struct x86_function *p ); 351 352void x87_f2xm1( struct x86_function *p ); 353void x87_fabs( struct x86_function *p ); 354void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); 355void x87_faddp( struct x86_function *p, struct x86_reg dst ); 356void x87_fchs( struct x86_function *p ); 357void x87_fclex( struct x86_function *p ); 358void x87_fcmovb( struct x86_function *p, struct x86_reg src ); 359void x87_fcmovbe( struct x86_function *p, struct x86_reg src ); 360void x87_fcmove( struct x86_function *p, struct x86_reg src ); 361void x87_fcmovnb( struct x86_function *p, struct x86_reg src ); 362void x87_fcmovnbe( struct x86_function *p, struct x86_reg src ); 363void x87_fcmovne( struct x86_function *p, struct x86_reg src ); 364void x87_fcom( struct x86_function *p, struct x86_reg dst ); 365void x87_fcomi( struct x86_function *p, struct x86_reg dst ); 366void x87_fcomip( struct x86_function *p, struct x86_reg dst ); 367void x87_fcomp( struct x86_function *p, struct x86_reg dst ); 368void x87_fcos( struct x86_function *p ); 369void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); 370void x87_fdivp( struct x86_function *p, struct x86_reg dst ); 371void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); 372void x87_fdivrp( struct x86_function *p, struct x86_reg dst ); 373void x87_fild( struct x86_function *p, struct x86_reg arg ); 374void x87_fist( struct x86_function *p, struct x86_reg dst ); 375void x87_fistp( struct x86_function *p, struct x86_reg dst ); 376void x87_fld( struct x86_function *p, struct x86_reg arg ); 377void x87_fld1( struct x86_function *p ); 378void x87_fldcw( struct x86_function *p, struct x86_reg arg ); 379void x87_fldl2e( struct x86_function *p ); 380void x87_fldln2( struct x86_function *p ); 381void x87_fldz( struct x86_function *p ); 382void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); 383void x87_fmulp( struct x86_function *p, struct x86_reg dst ); 384void x87_fnclex( struct x86_function *p ); 385void x87_fprndint( struct x86_function *p ); 386void x87_fpop( struct x86_function *p ); 387void x87_fscale( struct x86_function *p ); 388void x87_fsin( struct x86_function *p ); 389void x87_fsincos( struct x86_function *p ); 390void x87_fsqrt( struct x86_function *p ); 391void x87_fst( struct x86_function *p, struct x86_reg dst ); 392void x87_fstp( struct x86_function *p, struct x86_reg dst ); 393void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); 394void x87_fsubp( struct x86_function *p, struct x86_reg dst ); 395void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); 396void x87_fsubrp( struct x86_function *p, struct x86_reg dst ); 397void x87_ftst( struct x86_function *p ); 398void x87_fxch( struct x86_function *p, struct x86_reg dst ); 399void x87_fxtract( struct x86_function *p ); 400void x87_fyl2x( struct x86_function *p ); 401void x87_fyl2xp1( struct x86_function *p ); 402void x87_fwait( struct x86_function *p ); 403void x87_fnstcw( struct x86_function *p, struct x86_reg dst ); 404void x87_fnstsw( struct x86_function *p, struct x86_reg dst ); 405void x87_fucompp( struct x86_function *p ); 406void x87_fucomp( struct x86_function *p, struct x86_reg arg ); 407void x87_fucom( struct x86_function *p, struct x86_reg arg ); 408 409 410 411/* Retrieve a reference to one of the function arguments, taking into 412 * account any push/pop activity. Note - doesn't track explicit 413 * manipulation of ESP by other instructions. 414 */ 415struct x86_reg x86_fn_arg( struct x86_function *p, unsigned arg ); 416 417#endif 418#endif 419